def _do_query_rewrite(self, q, fields_avail, pk): """ generates a nested query that: * requests entity by PK * projects the missing fields """ q_filters = q.filters # find all lookup (primary keys) for given das entity # It is safe to combine the queries filters_first = [f for f in q_filters if get_filter_name(f) in fields_avail] filters_nested = [f for f in q_filters if get_filter_name(f) not in fields_avail] q1_mongo = q.mongo_query.copy() q1_mongo['filters'] = { 'grep': list(set(filters_first) | set([pk, ])), } q1 = DASQuery(q1_mongo) q2 = q.mongo_query.copy() # make DASQuery pass dataset wildcard check pk_to_replace = '/a/b/c' if pk == 'dataset.name' else '<PK>' q2['spec'] = {pk: pk_to_replace} q2['filters'] = {'grep': list(filters_nested)} q2 = DASQuery(q2) msg = self.render_template('cms_query_rewrite', q1_str=self.convert2dasql(q1), q2_str=self.convert2dasql(q2), pk=pk, # user replaces this with PK from 1st query pk_to_replace=pk_to_replace, cli_docs=self.CLI_LINK) return msg
def check_fields(self, dasquery): """ if not all of required fields are available, check if they could be obtained trough a nested query on PK. if so return a message to user. e.g. transforms: dataset dataset=/*/*reco*/* | grep dataset.name, dataset.nevents>3 into: dataset dataset=/*/*reco*/* | grep dataset.name and: dataset dataset=<PK> | grep dataset.nevents>3 """ DEBUG = False if not dasquery.filters: return fields_available = set(self.get_fields_in_query_result(dasquery)) # TODO: shall we care about compound lookups? entity = dasquery.mongo_query['fields'][0] q_filters = dasquery.filters q_fieldset = set(get_filter_name(field) for field in q_filters) q_fields_missing = q_fieldset - fields_available if not q_fields_missing: # no rewrite needed return if DEBUG: pprint(['DASQUERY:', dasquery.query]) pprint(['RESULT ENTITY:', entity]) pprint(['FILTERS FOR DAS QUERY:', dasquery.filters]) pprint(['PARAMS FOR DAS QUERY:', dasquery.params()]) pprint(['Feilds available in Current Query:', fields_available]) pks_of_entity = list(set(self.cms_rep.dasmapping.mapkeys(entity)) & fields_available) # try any of PKs available for pk in pks_of_entity: # list of fields for given entity retrieved by PK # TODO: this is not differentiated by PK yet in schema adapter... fields_in_nested_by_pk = \ self.schema_adapter.get_field_list_for_entity_by_pk(entity, pk) query_rewritable = q_fields_missing.issubset( set(fields_in_nested_by_pk)) # if all fields that are still missing, # are available in query='entity PK=pk' if query_rewritable and q_fields_missing: result = self._do_query_rewrite(dasquery, fields_available, pk) if result: print('Rewrite OK') return result return False
def check_fields(self, dasquery): """ if not all of required fields are available, check if they could be obtained trough a nested query on PK. if so return a message to user. e.g. transforms: dataset dataset=/*/*reco*/* | grep dataset.name, dataset.nevents>3 into: dataset dataset=/*/*reco*/* | grep dataset.name and: dataset dataset=<PK> | grep dataset.nevents>3 """ DEBUG = False if not dasquery.filters: return fields_available = set(self.get_fields_in_query_result(dasquery)) # TODO: shall we care about compound lookups? entity = dasquery.mongo_query['fields'][0] q_filters = dasquery.filters q_fieldset = set(get_filter_name(field) for field in q_filters) q_fields_missing = q_fieldset - fields_available if not q_fields_missing: # no rewrite needed return if DEBUG: pprint(['DASQUERY:', dasquery.query]) pprint(['RESULT ENTITY:', entity]) pprint(['FILTERS FOR DAS QUERY:', dasquery.filters]) pprint(['PARAMS FOR DAS QUERY:', dasquery.params()]) pprint(['Feilds available in Current Query:', fields_available]) pks_of_entity = list( set(self.cms_rep.dasmapping.mapkeys(entity)) & fields_available) # try any of PKs available for pk in pks_of_entity: # list of fields for given entity retrieved by PK # TODO: this is not differentiated by PK yet in schema adapter... fields_in_nested_by_pk = \ self.schema_adapter.get_field_list_for_entity_by_pk(entity, pk) query_rewritable = q_fields_missing.issubset( set(fields_in_nested_by_pk)) # if all fields that are still missing, # are available in query='entity PK=pk' if query_rewritable and q_fields_missing: result = self._do_query_rewrite(dasquery, fields_available, pk) if result: print('Rewrite OK') return result return False
def _do_query_rewrite(self, q, fields_avail, pk): """ generates a nested query that: * requests entity by PK * projects the missing fields """ q_filters = q.filters # find all lookup (primary keys) for given das entity # It is safe to combine the queries filters_first = [ f for f in q_filters if get_filter_name(f) in fields_avail ] filters_nested = [ f for f in q_filters if get_filter_name(f) not in fields_avail ] q1_mongo = q.mongo_query.copy() q1_mongo['filters'] = { 'grep': list(set(filters_first) | set([ pk, ])), } q1 = DASQuery(q1_mongo) q2_mongo = q.mongo_query.copy() # make DASQuery pass dataset wildcard check pk_to_replace = '/a/b/c' if pk == 'dataset.name' else '<PK>' q2_mongo['spec'] = {pk: pk_to_replace} q2_mongo['filters'] = {'grep': list(filters_nested)} # if the queries are the same, the rewrite is unsuccessful if set(q1_mongo['spec'].keys()) == set(q2_mongo['spec'].keys()): return q2 = DASQuery(q2_mongo) msg = self.render_template( 'cms_query_rewrite', q1_str=self.convert2dasql(q1), q2_str=self.convert2dasql(q2), pk=pk, # user replaces this with PK from 1st query pk_to_replace=pk_to_replace, cli_docs=self.CLI_LINK) return msg