def test_out_of_band_retrieve(self): # Setup the environemnt stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) # Fill the dataset self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id,40) # Retrieve the data granule = DataRetrieverService.retrieve_oob(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) self.assertTrue((rdt['time'] == np.arange(40)).all())
def _get_highcharts_data(self, data_product_id='', visualization_parameters=None): """Retrieves the data for the specified DP @param data_product_id str @param visualization_parameters str @retval jsonp_visualization_data str @throws NotFound object with specified id, query does not exist """ # An empty dict is returned in case there is no data in coverage empty_hc = [] # error check if not data_product_id: raise BadRequest("The data_product_id parameter is missing") use_direct_access = False if visualization_parameters == {}: visualization_parameters = None # Extract the parameters. Definitely init first query = None if visualization_parameters: #query = {'parameters':[]} query = {} # Error check and damage control. Definitely need time if 'parameters' in visualization_parameters and len(visualization_parameters['parameters']) > 0: if not 'time' in visualization_parameters['parameters']: visualization_parameters['parameters'].append('time') query['parameters'] = visualization_parameters['parameters'] # The times passed from UI are system times so convert them to NTP if 'start_time' in visualization_parameters: query['start_time'] = int(visualization_parameters['start_time']) if 'end_time' in visualization_parameters: query['end_time'] = int((visualization_parameters['end_time'])) # stride time if 'stride_time' in visualization_parameters: try: query['stride_time'] = int(visualization_parameters['stride_time']) except TypeError: # There are some (rare) situations where the AJAX request has 'null' in the request # Example: # {"query_type":"google_dt","parameters":[],"start_time":-2208988800,"end_time":-2208988800,"stride_time":null,"use_direct_access":0} query['stride_time'] = 1 else: query['stride_time'] = 1 # direct access parameter if 'use_direct_access' in visualization_parameters: if (int(visualization_parameters['use_direct_access']) == 1): use_direct_access = True else: use_direct_access = False # get the dataset_id and objs associated with the data_product. Need it to do the data retrieval ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, True) if ds_ids is None or not ds_ids: raise NotFound("Could not find dataset associated with data product") stream_def_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasStreamDefinition, id_only=True) if not stream_def_ids: raise NotFound('Could not find stream definition associated with data product') stream_def_id = stream_def_ids[0] if use_direct_access: retrieved_granule = DataRetrieverService.retrieve_oob(ds_ids[0], query=query, delivery_format=stream_def_id) else: #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2}) retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query, delivery_format=stream_def_id) # If thereis no data, return an empty dict if retrieved_granule is None: return simplejson.dumps(empty_hc) # send the granule through the transform to get the google datatable hc_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('highcharts',id_only=True) hc_stream_def = self.clients.pubsub_management.create_stream_definition('HighCharts_out', parameter_dictionary_id=hc_pdict_id) hc_data_granule = VizTransformHighChartsAlgorithm.execute(retrieved_granule, params=hc_stream_def, config=visualization_parameters) if hc_data_granule == None: return simplejson.dumps(empty_hc) hc_rdt = RecordDictionaryTool.load_from_granule(hc_data_granule) # Now go through this redundant step of converting the hc_data into a non numpy version hc_data_np = (get_safe(hc_rdt, "hc_data"))[0] hc_data = [] for series in hc_data_np: s = {} for key in series: if key == "data": s["data"] = series["data"].tolist() continue s[key] = series[key] hc_data.append(s) # return the json version of the table return json.dumps(hc_data)
def _get_google_dt(self, data_product_id='', visualization_parameters=None): """Retrieves the data for the specified DP @param data_product_id str @param visualization_parameters str @retval jsonp_visualization_data str @throws NotFound object with specified id, query does not exist """ # An empty dict is returned in case there is no data in coverage empty_gdt = gviz_api.DataTable([('time', 'datetime', 'time')]) # error check if not data_product_id: raise BadRequest("The data_product_id parameter is missing") use_direct_access = False if visualization_parameters == {}: visualization_parameters = None # Extract the parameters. Definitely init first query = None if visualization_parameters: #query = {'parameters':[]} query = {} # Error check and damage control. Definitely need time if 'parameters' in visualization_parameters and len(visualization_parameters['parameters']) > 0: if not 'time' in visualization_parameters['parameters']: visualization_parameters['parameters'].append('time') query['parameters'] = visualization_parameters['parameters'] # The times passed from UI are system times so convert them to NTP if 'start_time' in visualization_parameters: query['start_time'] = int(visualization_parameters['start_time']) if 'end_time' in visualization_parameters: query['end_time'] = int((visualization_parameters['end_time'])) # stride time if 'stride_time' in visualization_parameters: try: query['stride_time'] = int(visualization_parameters['stride_time']) except TypeError: # There are some (rare) situations where the AJAX request has 'null' in the request # Example: # {"query_type":"google_dt","parameters":[],"start_time":-2208988800,"end_time":-2208988800,"stride_time":null,"use_direct_access":0} query['stride_time'] = 1 else: query['stride_time'] = 1 # direct access parameter if 'use_direct_access' in visualization_parameters: if (int(visualization_parameters['use_direct_access']) == 1): use_direct_access = True else: use_direct_access = False # get the dataset_id and objs associated with the data_product. Need it to do the data retrieval ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, True) if ds_ids is None or not ds_ids: raise NotFound("Could not find dataset associated with data product") stream_def_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasStreamDefinition, id_only=True) if not stream_def_ids: raise NotFound('Could not find stream definition associated with data product') stream_def_id = stream_def_ids[0] if use_direct_access: retrieved_granule = DataRetrieverService.retrieve_oob(ds_ids[0], query=query, delivery_format=stream_def_id) else: #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2}) retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query, delivery_format=stream_def_id) # If thereis no data, return an empty dict if retrieved_granule is None: return empty_gdt.ToJSon() # send the granule through the transform to get the google datatable gdt_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True) gdt_stream_def = self.clients.pubsub_management.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id) gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(retrieved_granule, params=gdt_stream_def, config=visualization_parameters) if gdt_data_granule == None: return empty_gdt.ToJSon() gdt_rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule) gdt_components = get_safe(gdt_rdt, 'google_dt_components') gdt_component = gdt_components[0] temp_gdt_description = gdt_component["data_description"] temp_gdt_content = gdt_component["data_content"] # adjust the 'float' time to datetime in the content gdt_description = [('time', 'datetime', 'time')] gdt_content = [] for idx in range(1,len(temp_gdt_description)): temp_arr = temp_gdt_description[idx] if temp_arr != None and temp_arr[0] != 'time': if len(temp_arr) == 3: gdt_description.append((temp_arr[0], temp_arr[1], temp_arr[2])) if len(temp_arr) == 4: gdt_description.append((temp_arr[0], temp_arr[1], temp_arr[2], temp_arr[3])) for tempTuple in temp_gdt_content: # sometimes there are inexplicable empty tuples in the content. Drop them if tempTuple == [] or len(tempTuple) == 0: continue varTuple = [] varTuple.append(datetime.fromtimestamp(tempTuple[0])) for idx in range(1,len(tempTuple)): varTuple.append(tempTuple[idx]) gdt_content.append(varTuple) # now generate the Google datatable out of the description and content gdt = gviz_api.DataTable(gdt_description) gdt.LoadData(gdt_content) # return the json version of the table return gdt.ToJSon()
def retrieve_stream(dataset_id="", query=None): return DataRetrieverService.retrieve_oob(dataset_id, query)
def get_visualization_data(self, data_product_id='', visualization_parameters=None, callback='', tqx=""): """Retrieves the data for the specified DP @param data_product_id str @param visualization_parameters str @param callback str @retval jsonp_visualization_data str @throws NotFound object with specified id, query does not exist """ # error check if not data_product_id: raise BadRequest("The data_product_id parameter is missing") use_direct_access = False if visualization_parameters == {}: visualization_parameters = None reqId = 0 # If a reqId was passed in tqx, extract it if tqx: tqx_param_list = tqx.split(";") for param in tqx_param_list: key, value = param.split(":") if key == 'reqId': reqId = value # Extract the parameters. Definitely init first query = None if visualization_parameters: #query = {'parameters':[]} query = {} # Error check and damage control. Definitely need time if 'parameters' in visualization_parameters and len(visualization_parameters['parameters']) > 0: if not 'time' in visualization_parameters['parameters']: visualization_parameters['parameters'].append('time') query['parameters'] = visualization_parameters['parameters'] # The times passed from UI are system times so convert them to NTP if 'start_time' in visualization_parameters: #query['start_time'] = int(ntplib.system_to_ntp_time(float(visualization_parameters['start_time']))) query['start_time'] = int(visualization_parameters['start_time']) if 'end_time' in visualization_parameters: #query['end_time'] = int(ntplib.system_to_ntp_time(float(visualization_parameters['end_time']))) query['end_time'] = int((visualization_parameters['end_time'])) # stride time if 'stride_time' in visualization_parameters: query['stride_time'] = int(visualization_parameters['stride_time']) else: query['stride_time'] == 1 # direct access parameter if 'use_direct_access' in visualization_parameters: if (int(visualization_parameters['use_direct_access']) == 1): use_direct_access = True else: use_direct_access = False # get the dataset_id associated with the data_product. Need it to do the data retrieval ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, True) if ds_ids is None or not ds_ids: raise NotFound("Could not find dataset associated with data product") if use_direct_access: retrieved_granule = DataRetrieverService.retrieve_oob(ds_ids[0], query=query) else: #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2}) retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query) if retrieved_granule is None: return None #temp_rdt = RecordDictionaryTool.load_from_granule(retrieved_granule) # send the granule through the transform to get the google datatable gdt_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True) gdt_stream_def = self.clients.pubsub_management.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id) gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(retrieved_granule, params=gdt_stream_def, config=visualization_parameters) if gdt_data_granule == None: return None gdt_rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule) gdt_components = get_safe(gdt_rdt, 'google_dt_components') gdt_component = gdt_components[0] temp_gdt_description = gdt_component["data_description"] temp_gdt_content = gdt_component["data_content"] # adjust the 'float' time to datetime in the content gdt_description = [('time', 'datetime', 'time')] gdt_content = [] for idx in range(1,len(temp_gdt_description)): temp_arr = temp_gdt_description[idx] if temp_arr != None and temp_arr[0] != 'time': gdt_description.append((temp_arr[0], temp_arr[1], temp_arr[2])) for tempTuple in temp_gdt_content: # sometimes there are inexplicable empty tuples in the content. Drop them if tempTuple == [] or len(tempTuple) == 0: continue varTuple = [] varTuple.append(datetime.fromtimestamp(tempTuple[0])) for idx in range(1,len(tempTuple)): # some silly numpy format won't go away so need to cast numbers to floats if(gdt_description[idx][1] == 'number'): if tempTuple[idx] == None: varTuple.append(0.0) else: # Precision hardcoded for now. Needs to be on a per parameter basis varTuple.append(round(float(tempTuple[idx]),5)) else: varTuple.append(tempTuple[idx]) gdt_content.append(varTuple) # now generate the Google datatable out of the description and content gdt = gviz_api.DataTable(gdt_description) gdt.LoadData(gdt_content) # return the json version of the table if callback == '': return gdt.ToJSonResponse(req_id = reqId) else: return callback + "(\"" + gdt.ToJSonResponse(req_id = reqId) + "\")"
def _get_highcharts_data(self, data_product_id='', visualization_parameters=None): """Retrieves the data for the specified DP @param data_product_id str @param visualization_parameters str @retval jsonp_visualization_data str @throws NotFound object with specified id, query does not exist """ # An empty dict is returned in case there is no data in coverage empty_hc = [] # error check if not data_product_id: raise BadRequest("The data_product_id parameter is missing") use_direct_access = False if visualization_parameters == {}: visualization_parameters = None # Extract the parameters. Definitely init first query = None if visualization_parameters: #query = {'parameters':[]} query = {} # Error check and damage control. Definitely need time if 'parameters' in visualization_parameters and len( visualization_parameters['parameters']) > 0: if not 'time' in visualization_parameters['parameters']: visualization_parameters['parameters'].append('time') query['parameters'] = visualization_parameters['parameters'] # The times passed from UI are system times so convert them to NTP if 'start_time' in visualization_parameters: query['start_time'] = int( visualization_parameters['start_time']) if 'end_time' in visualization_parameters: query['end_time'] = int((visualization_parameters['end_time'])) # stride time if 'stride_time' in visualization_parameters: try: query['stride_time'] = int( visualization_parameters['stride_time']) except TypeError: # There are some (rare) situations where the AJAX request has 'null' in the request # Example: # {"query_type":"google_dt","parameters":[],"start_time":-2208988800,"end_time":-2208988800,"stride_time":null,"use_direct_access":0} query['stride_time'] = 1 else: query['stride_time'] = 1 # direct access parameter if 'use_direct_access' in visualization_parameters: if (int(visualization_parameters['use_direct_access']) == 1): use_direct_access = True else: use_direct_access = False # get the dataset_id and objs associated with the data_product. Need it to do the data retrieval ds_ids, _ = self.clients.resource_registry.find_objects( data_product_id, PRED.hasDataset, RT.Dataset, True) if ds_ids is None or not ds_ids: raise NotFound( "Could not find dataset associated with data product") stream_def_ids, _ = self.clients.resource_registry.find_objects( data_product_id, PRED.hasStreamDefinition, id_only=True) if not stream_def_ids: raise NotFound( 'Could not find stream definition associated with data product' ) stream_def_id = stream_def_ids[0] if use_direct_access: retrieved_granule = DataRetrieverService.retrieve_oob( ds_ids[0], query=query, delivery_format=stream_def_id) else: #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2}) retrieved_granule = self.clients.data_retriever.retrieve( ds_ids[0], query=query, delivery_format=stream_def_id) # If thereis no data, return an empty dict if retrieved_granule is None: return simplejson.dumps(empty_hc) # send the granule through the transform to get the google datatable hc_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name( 'highcharts', id_only=True) hc_stream_def = self.clients.pubsub_management.create_stream_definition( 'HighCharts_out', parameter_dictionary_id=hc_pdict_id) hc_data_granule = VizTransformHighChartsAlgorithm.execute( retrieved_granule, params=hc_stream_def, config=visualization_parameters) if hc_data_granule == None: return simplejson.dumps(empty_hc) hc_rdt = RecordDictionaryTool.load_from_granule(hc_data_granule) # Now go through this redundant step of converting the hc_data into a non numpy version hc_data_np = (get_safe(hc_rdt, "hc_data"))[0] hc_data = [] for series in hc_data_np: s = {} for key in series: if key == "data": s["data"] = series["data"].tolist() continue s[key] = series[key] hc_data.append(s) # return the json version of the table return json.dumps(hc_data)
def retrieve_stream(dataset_id='', query=None): return DataRetrieverService.retrieve_oob(dataset_id, query)