def test_get_agg_topic_map( get_container_func, ports_config, query, agg_topics_collection, expected_agg_topic_map, ): get_container, image = get_container_func with get_container(image, ports=ports_config["ports"], env=ENV_MONGODB) as container: wait_for_connection(container) query_database(container, query) actual_agg_topic_map = mongoutils.get_agg_topic_map( mongo_client(ports_config["port_on_host"]), agg_topics_collection) assert actual_agg_topic_map == expected_agg_topic_map
def historian_setup(self): _log.debug("HISTORIAN SETUP") self._client = mongoutils.get_mongo_client(self._connection_params) db = self._client.get_default_database() db[self._data_collection].create_index( [('topic_id', pymongo.DESCENDING), ('ts', pymongo.DESCENDING)], unique=True, background=True) self._topic_id_map, self._topic_name_map = \ mongoutils.get_topic_map( self._client, self._topic_collection) self._load_meta_map() if self._agg_topic_collection in db.collection_names(): _log.debug("found agg_topics_collection ") self._agg_topic_id_map = mongoutils.get_agg_topic_map( self._client, self._agg_topic_collection) else: _log.debug("no agg topics to load") self._agg_topic_id_map = {}
def historian_setup(self): _log.debug("HISTORIAN SETUP") self._client = mongoutils.get_mongo_client(self._connection_params, minPoolSize=10) _log.info("Mongo client created with min pool size {}".format( self._client.min_pool_size)) db = self._client.get_default_database() col_list = db.collection_names() create_index1 = True create_index2 = True if self._readonly: create_index1 = False create_index2 = False # if data collection exists check if necessary indexes exists elif self._data_collection in col_list: index_info = db[self._data_collection].index_information() index_list = [value['key'] for value in index_info.viewvalues()] index_new_list = [] for index in index_list: keys = set() for key in index: keys.add(key[0]) index_new_list.append(keys) _log.debug("Index list got from db is {}. formatted list is ".format( index_list, index_new_list)) i1 = {'topic_id', 'ts'} if i1 in index_new_list: create_index1 = False i2 = {'ts'} if i2 in index_new_list: create_index2 = False # create data indexes if needed if create_index1: db[self._data_collection].create_index( [('topic_id', pymongo.DESCENDING), ('ts', pymongo.DESCENDING)], unique=True, background=True) if create_index2: db[self._data_collection].create_index( [('ts', pymongo.DESCENDING)], background=True) self._topic_id_map, self._topic_name_map = \ mongoutils.get_topic_map( self._client, self._topic_collection) self._load_meta_map() if self._agg_topic_collection in db.collection_names(): _log.debug("found agg_topics_collection ") self._agg_topic_id_map = mongoutils.get_agg_topic_map( self._client, self._agg_topic_collection) else: _log.debug("no agg topics to load") self._agg_topic_id_map = {} if not self._readonly: db[self.HOURLY_COLLECTION].create_index( [('topic_id', pymongo.DESCENDING), ('ts', pymongo.DESCENDING)], unique=True, background=True) db[self.HOURLY_COLLECTION].create_index( [('last_updated_data', pymongo.DESCENDING)], background=True) db[self.DAILY_COLLECTION].create_index( [('topic_id', pymongo.DESCENDING), ('ts', pymongo.DESCENDING)], unique=True, background=True) db[self.DAILY_COLLECTION].create_index( [('last_updated_data', pymongo.DESCENDING)], background=True)
def query_historian(self, topic, start=None, end=None, agg_type=None, agg_period=None, skip=0, count=None, order="FIRST_TO_LAST"): """ Returns the results of the query from the mongo database. This historian stores data to the nearest second. It will not store subsecond resolution data. This is an optimisation based upon storage for the database. Please see :py:meth:`volttron.platform.agent.base_historian.BaseQueryHistorianAgent.query_historian` for input parameters and return value details """ start_time = datetime.utcnow() collection_name = self._data_collection use_rolled_up_data = False query_start = start query_end = end topics_list = [] if isinstance(topic, str): topics_list.append(topic) elif isinstance(topic, list): topics_list = topic if agg_type and agg_period: # query aggregate data collection instead collection_name = agg_type + "_" + agg_period else: name, query_start, query_end = \ self.verify_use_of_rolledup_data(start, end, topics_list) if name: collection_name = name use_rolled_up_data = True _log.debug("Using collection {} for query:".format(collection_name)) multi_topic_query = len(topics_list) > 1 topic_ids = [] id_name_map = {} for topic in topics_list: # find topic if based on topic table entry topic_id = self._topic_id_map.get(topic.lower(), None) if agg_type: agg_type = agg_type.lower() # replace id from aggregate_topics table topic_id = self._agg_topic_id_map.get( (topic.lower(), agg_type, agg_period), None) if topic_id is None: # load agg topic id again as it might be a newly # configured aggregation self._agg_topic_id_map = mongoutils.get_agg_topic_map( self._client, self._agg_topic_collection) topic_id = self._agg_topic_id_map.get( (topic.lower(), agg_type, agg_period), None) if topic_id: topic_ids.append(topic_id) id_name_map[ObjectId(topic_id)] = topic else: _log.warn('No such topic {}'.format(topic)) if not topic_ids: return {} else: _log.debug("Found topic id for {} as {}".format( topics_list, topic_ids)) order_by = 1 if order == 'LAST_TO_FIRST': order_by = -1 if count is None: count = 100 skip_count = 0 if skip > 0: skip_count = skip values = defaultdict(list) pool = ThreadPool(5) try: # Query for one topic at a time in a loop instead of topic_id # $in in order to apply $limit to each topic searched instead # of the combined result _log.debug("Spawning threads") pool.map(self.query_topic_data, zip(topic_ids, repeat(id_name_map), repeat(collection_name), repeat(start), repeat(end), repeat(query_start), repeat(query_end), repeat(count), repeat(skip_count), repeat(order_by), repeat(use_rolled_up_data), repeat(values))) pool.close() pool.join() _log.debug("Time taken to load all values for all topics" " {}".format(datetime.utcnow() - start_time)) # _log.debug("Results got {}".format(values)) return self.add_metadata_to_query_result(agg_type, multi_topic_query, topic, topic_ids, values) finally: pool.close()
def get_agg_topic_map(self): return mongoutils.get_agg_topic_map(self.dbclient, self._agg_topic_collection)
def query_historian(self, topic, start=None, end=None, agg_type=None, agg_period=None, skip=0, count=None, order="FIRST_TO_LAST"): """ Returns the results of the query from the mongo database. This historian stores data to the nearest second. It will not store subsecond resolution data. This is an optimisation based upon storage for the database. Please see :py:meth:`volttron.platform.agent.base_historian.BaseQueryHistorianAgent.query_historian` for input parameters and return value details """ start_time = datetime.utcnow() collection_name = self._data_collection if agg_type and agg_period: # query aggregate data collection instead collection_name = agg_type + "_" + agg_period topics_list = [] if isinstance(topic, str): topics_list.append(topic) elif isinstance(topic, list): topics_list = topic topic_ids = [] id_name_map = {} for topic in topics_list: # find topic if based on topic table entry topic_id = self._topic_id_map.get(topic.lower(), None) if agg_type: agg_type = agg_type.lower() # replace id from aggregate_topics table topic_id = self._agg_topic_id_map.get( (topic.lower(), agg_type, agg_period), None) if topic_id is None: # load agg topic id again as it might be a newly # configured aggregation self._agg_topic_id_map = mongoutils.get_agg_topic_map( self._client, self._agg_topic_collection) topic_id = self._agg_topic_id_map.get( (topic.lower(), agg_type, agg_period), None) if topic_id: topic_ids.append(topic_id) id_name_map[ObjectId(topic_id)] = topic else: _log.warn('No such topic {}'.format(topic)) if not topic_ids: return {} else: _log.debug("Found topic id for {} as {}".format( topics_list, topic_ids)) multi_topic_query = len(topic_ids) > 1 db = self._client.get_default_database() ts_filter = {} order_by = 1 if order == 'LAST_TO_FIRST': order_by = -1 if start is not None: ts_filter["$gte"] = start if end is not None: ts_filter["$lte"] = end if count is None: count = 100 skip_count = 0 if skip > 0: skip_count = skip find_params = {} if ts_filter: find_params = {'ts': ts_filter} values = defaultdict(list) for x in topic_ids: find_params['topic_id'] = ObjectId(x) _log.debug("querying table with params {}".format(find_params)) pipeline = [{ "$match": find_params }, { "$skip": skip_count }, { "$sort": { "ts": order_by } }, { "$limit": count }, { "$project": { "_id": 0, "timestamp": { '$dateToString': { 'format': "%Y-%m-%dT%H:%M:%S.%L000+00:00", "date": "$ts" } }, "value": 1 } }] _log.debug("pipeline for agg query is {}".format(pipeline)) cursor = db[collection_name].aggregate(pipeline) rows = list(cursor) _log.debug("Time after fetch {}".format(datetime.utcnow() - start_time)) for row in rows: values[id_name_map[x]].append((row['timestamp'], row['value'])) _log.debug( "Time taken to load into values {}".format(datetime.utcnow() - start_time)) _log.debug( "Time taken to load all values {}".format(datetime.utcnow() - start_time)) if len(values) > 0: # If there are results add metadata if it is a query on a # single # topic if not multi_topic_query: values = values.values()[0] if agg_type: # if aggregation is on single topic find the topic id # in the topics table. _log.debug("Single topic aggregate query. Try to get " "metadata") topic_id = self._topic_id_map.get(topic.lower(), None) if topic_id: _log.debug("aggregation of a single topic, " "found topic id in topic map. " "topic_id={}".format(topic_id)) metadata = self._topic_meta.get(topic_id, {}) else: # if topic name does not have entry in topic_id_map # it is a user configured aggregation_topic_name # which denotes aggregation across multiple points metadata = {} else: # this is a query on raw data, get metadata for # topic from topic_meta map _log.debug("Single topic regular query. Get " "metadata from meta map") metadata = self._topic_meta.get(topic_ids[0], {}) return {'values': values, 'metadata': metadata} else: _log.debug("return values without metadata for multi " "topic query") return {'values': values} else: return {}
def query_historian(self, topic, start=None, end=None, agg_type=None, agg_period=None, skip=0, count=None, order="FIRST_TO_LAST"): """ Returns the results of the query from the mongo database. This historian stores data to the nearest second. It will not store subsecond resolution data. This is an optimisation based upon storage for the database. Please see :py:meth:`volttron.platform.agent.base_historian.BaseQueryHistorianAgent.query_historian` for input parameters and return value details """ start_time = datetime.utcnow() collection_name = self._data_collection if agg_type and agg_period: # query aggregate data collection instead collection_name = agg_type + "_" + agg_period topics_list = [] if isinstance(topic, str): topics_list.append(topic) elif isinstance(topic, list): topics_list = topic topic_ids = [] id_name_map = {} for topic in topics_list: # find topic if based on topic table entry topic_id = self._topic_id_map.get(topic.lower(), None) if agg_type: agg_type = agg_type.lower() # replace id from aggregate_topics table topic_id = self._agg_topic_id_map.get( (topic.lower(), agg_type, agg_period), None) if topic_id is None: # load agg topic id again as it might be a newly # configured aggregation self._agg_topic_id_map = mongoutils.get_agg_topic_map( self._client, self._agg_topic_collection) topic_id = self._agg_topic_id_map.get( (topic.lower(), agg_type, agg_period), None) if topic_id: topic_ids.append(topic_id) id_name_map[ObjectId(topic_id)] = topic else: _log.warn('No such topic {}'.format(topic)) if not topic_ids: return {} else: _log.debug("Found topic id for {} as {}".format( topics_list, topic_ids)) multi_topic_query = len(topic_ids) > 1 db = self._client.get_default_database() ts_filter = {} order_by = 1 if order == 'LAST_TO_FIRST': order_by = -1 if start is not None: ts_filter["$gte"] = start if end is not None: ts_filter["$lte"] = end if count is None: count = 100 skip_count = 0 if skip > 0: skip_count = skip find_params = {} if ts_filter: find_params = {'ts': ts_filter} values = defaultdict(list) for x in topic_ids: find_params['topic_id'] = ObjectId(x) _log.debug("querying table with params {}".format(find_params)) pipeline = [{"$match": find_params}, {"$skip": skip_count}, {"$sort": {"ts": order_by}}, {"$limit": count}, { "$project": {"_id": 0, "timestamp": { '$dateToString': { 'format': "%Y-%m-%dT%H:%M:%S.%L000+00:00", "date": "$ts"}}, "value": 1}}] _log.debug("pipeline for agg query is {}".format(pipeline)) cursor = db[collection_name].aggregate(pipeline) rows = list(cursor) _log.debug("Time after fetch {}".format( datetime.utcnow() - start_time)) for row in rows: values[id_name_map[x]].append( (row['timestamp'], row['value'])) _log.debug("Time taken to load into values {}".format( datetime.utcnow() - start_time)) _log.debug("Time taken to load all values {}".format( datetime.utcnow() - start_time)) if len(values) > 0: # If there are results add metadata if it is a query on a # single # topic if not multi_topic_query: values = values.values()[0] if agg_type: # if aggregation is on single topic find the topic id # in the topics table. _log.debug("Single topic aggregate query. Try to get " "metadata") topic_id = self._topic_id_map.get(topic.lower(), None) if topic_id: _log.debug("aggregation of a single topic, " "found topic id in topic map. " "topic_id={}".format(topic_id)) metadata = self._topic_meta.get(topic_id, {}) else: # if topic name does not have entry in topic_id_map # it is a user configured aggregation_topic_name # which denotes aggregation across multiple points metadata = {} else: # this is a query on raw data, get metadata for # topic from topic_meta map _log.debug("Single topic regular query. Get " "metadata from meta map") metadata = self._topic_meta.get(topic_ids[0], {}) return {'values': values, 'metadata': metadata} else: _log.debug("return values without metadata for multi " "topic query") return {'values': values} else: return {}