def _search_and_cache(tag_ids): """ Arguments: tag_ids == list of Tag IDs Note: To make this method more efficient, tags should be sorted by some of its attributes (e.g. by id itself). """ key = _normal_search_key(tag_ids) try: return SearchCache.objects.get(key=key) except: cache = SearchCache(key=key) cache.save() tags = Tag.objects.filter(id__in=tag_ids) cache.tags.set(*tags) # Do not use set_tags() here. cache_content_type = ContentType.objects.get_for_model(SearchCache) tag_id = tag_ids[-1] if len(tag_ids) > 1: recursion = _search_and_cache(tag_ids[:-1]) query = 'INSERT INTO search_searchcacheelement (object_id, content_type_id, cache_id)' \ ' SELECT A.object_id, A.content_type_id, %d FROM search_searchcacheelement AS A' \ ' INNER JOIN tags_taggeditem AS B ON (A.object_id = B.object_id AND A.content_type_id = B.content_type_id)' \ ' WHERE A.cache_id=%d AND B.tag_id=%d;' \ % (cache.id, recursion.id, tag_id) else: # search shouldn't include itself query = 'INSERT INTO search_searchcacheelement (object_id, content_type_id, cache_id)' \ ' SELECT A.object_id, A.content_type_id, %d FROM tags_taggeditem AS A' \ ' WHERE A.tag_id=%d AND A.content_type_id != %d;' \ % (cache.id, tag_id, cache_content_type.id) cursor = connection.cursor() cursor.execute(query) transaction.commit_unless_managed() return cache
def reverse_search(input): """ Find all objects whose tags are a subset of given tags. Returns SearchCache object if any (existing) tag given, otherwise None. Example: reverse_search(['imo', '1997']) --> SearchCache pointing to: --> Folder with filter tag 'imo' --> Folder with filter tag 'imo', '1997' (...) Examples of non matching objects: --> Folder with filter tag 'shortlist', '1997' --> Task with tags 'imo', '1997', 'geo' """ input = split_tags(input) if not input: return None # if no tag given, don't just return all objects tags = get_available_tags(input) if len(tags) != len(input): return None tag_ids = [x.id for x in tags] key = _reverse_search_key(tag_ids) try: return SearchCache.objects.get(key=key) except SearchCache.DoesNotExist: pass # Create cache object. cache = SearchCache(key=key) cache.save() cache.tags.set(*tags) # Do not use set_tags here. # Generate SQL query cache_content_type = ContentType.objects.get_for_model(SearchCache) tag_ids = [x.id for x in tags] query = 'SELECT DISTINCT A.object_id, A.content_type_id, A.tag_id FROM tags_taggeditem A' \ ' INNER JOIN tags_taggeditem B' \ ' ON (A.object_id = B.object_id AND A.content_type_id = B.content_type_id)' \ ' WHERE B.tag_id IN (%s) AND B.content_type_id != %d' \ % (','.join([str(id) for id in tag_ids]), cache_content_type.id) # Manually fetch. cursor = connection.cursor() cursor.execute(query) tagged_items = cursor.fetchall() # Generate and save search result. objects = defaultdict(set) for object_id, content_type_id, tag_id in tagged_items: # Seperate tagged items by objects (get tags for each object) objects[(object_id, content_type_id)].add(tag_id) ids_set = set(tag_ids) # Filter only those objects whose tags are subset of the given set of tags SearchCacheElement.objects.bulk_create( SearchCacheElement(object_id=key[0], content_type_id=key[1], cache=cache) for key, obj_tags in objects.iteritems() if obj_tags.issubset(ids_set) ) return cache