def test12b_count(self): "Testing `Count` aggregate use with the `GeoManager` on non geo-fields. See #11087." # Should only be one author (Trevor Paglen) returned by this query, and # the annotation should have 3 for the number of books, see #11087. # Also testing with a `GeoValuesQuerySet`, see #11489. qs = Author.objects.annotate(num_books=Count('books')).filter(num_books__gt=1) vqs = Author.objects.values('name').annotate(num_books=Count('books')).filter(num_books__gt=1) self.assertEqual(1, len(qs)) self.assertEqual(3, qs[0].num_books) self.assertEqual(1, len(vqs)) self.assertEqual(3, vqs[0]['num_books'])
def test12b_count(self): "Testing `Count` aggregate on non geo-fields." # Should only be one author (Trevor Paglen) returned by this query, and # the annotation should have 3 for the number of books, see #11087. # Also testing with a values(), see #11489. qs = Author.objects.annotate(num_books=Count("books")).filter( num_books__gt=1) vqs = (Author.objects.values("name").annotate( num_books=Count("books")).filter(num_books__gt=1)) self.assertEqual(1, len(qs)) self.assertEqual(3, qs[0].num_books) self.assertEqual(1, len(vqs)) self.assertEqual(3, vqs[0]["num_books"])
def test13c_count(self): "Testing `Count` aggregate with `.values()`. See #15305." qs = (Location.objects.filter(id=5).annotate( num_cities=Count("city")).values("id", "point", "num_cities")) self.assertEqual(1, len(qs)) self.assertEqual(2, qs[0]["num_cities"]) self.assertIsInstance(qs[0]["point"], GEOSGeometry)
def test13c_count(self): "Testing `Count` aggregate with `.values()`. See #15305." qs = Location.objects.filter(id=5).annotate( num_cities=Count('city')).values('id', 'point', 'num_cities') self.assertEqual(1, len(qs)) self.assertEqual(2, qs[0]['num_cities']) self.assertIsInstance(qs[0]['point'], GEOSGeometry)
def test12a_count(self): "Testing `Count` aggregate on geo-fields." # The City, 'Fort Worth' uses the same location as Dallas. dallas = City.objects.get(name='Dallas') # Count annotation should be 2 for the Dallas location now. loc = Location.objects.annotate(num_cities=Count('city')).get(id=dallas.location.id) self.assertEqual(2, loc.num_cities)
def test16_annotated_date_queryset(self): "Ensure annotated date querysets work if spatial backend is used. See #14648." birth_years = [ dt.year for dt in list( Author.objects.annotate( num_books=Count('books')).dates('dob', 'year')) ] birth_years.sort() self.assertEqual([1950, 1974], birth_years)
def test12a_count(self): "Testing `Count` aggregate use with the `GeoManager` on geo-fields." # Creating a new City, 'Fort Worth', that uses the same location # as Dallas. dallas = City.objects.get(name='Dallas') ftworth = City.objects.create(name='Fort Worth', state='TX', location=dallas.location) # Count annotation should be 2 for the Dallas location now. loc = Location.objects.annotate(num_cities=Count('city')).get(id=dallas.location.id) self.assertEqual(2, loc.num_cities)
def test12b_count(self): "Testing `Count` aggregate use with the `GeoManager` on non geo-fields. See #11087." # Creating some data for the Book/Author non-geo models that # use GeoManager. See #11087. tp = Author.objects.create(name='Trevor Paglen') Book.objects.create(title='Torture Taxi', author=tp) Book.objects.create(title='I Could Tell You But Then You Would Have to be Destroyed by Me', author=tp) Book.objects.create(title='Blank Spots on the Map', author=tp) wp = Author.objects.create(name='William Patry') Book.objects.create(title='Patry on Copyright', author=wp) # Should only be one author (Trevor Paglen) returned by this query, and # the annotation should have 3 for the number of books. Also testing # with a `GeoValuesQuerySet` (see #11489). qs = Author.objects.annotate(num_books=Count('books')).filter(num_books__gt=1) vqs = Author.objects.values('name').annotate(num_books=Count('books')).filter(num_books__gt=1) self.assertEqual(1, len(qs)) self.assertEqual(3, qs[0].num_books) self.assertEqual(1, len(vqs)) self.assertEqual(3, vqs[0]['num_books'])
def top_lookups(self, schema_field, count): """ Returns a list of {lookup, count} dictionaries representing the top Lookups for this QuerySet. """ real_name = "db_attribute." + str(schema_field.real_name) if schema_field.is_many_to_many_lookup(): clone = self.prepare_attribute_qs().filter( schema__id=schema_field.schema_id) clone = clone.extra(where=[ real_name + " ~ ('[[:<:]]' || db_lookup.id || '[[:>:]]')" ]) # We want to count the current queryset and get a single # row for injecting into the subsequent Lookup query, but # we don't want Django's aggregation support to # automatically group by fields that aren't relevant and # would cause multiple rows as a result. So we call # `values()' on a field that we're already filtering by, # in this case, schema, as essentially a harmless identify # function. clone = clone.values('schema').annotate(count=Count('schema')) qs = Lookup.objects.filter(schema_field__id=schema_field.id) qs = qs.extra(select={ 'lookup_id': 'id', 'item_count': clone.values('count').query }) else: qs = self.prepare_attribute_qs().extra( select={'lookup_id': real_name}) qs.query.group_by = [real_name] qs = qs.values('lookup_id').annotate(item_count=Count('id')) ids_and_counts = [(v['lookup_id'], v['item_count']) for v in qs.values( 'lookup_id', 'item_count').order_by('-item_count') if v['item_count']][:count] lookup_objs = Lookup.objects.in_bulk([i[0] for i in ids_and_counts]) return [{ 'lookup': lookup_objs[i[0]], 'count': i[1] } for i in ids_and_counts]
def getSpeciesMetadata(self): """ Returns metadata for all species at a specific genus """ species = self.QuerySet.filter(genus_id=self.id).values('species_id').annotate(points=Collect('geom'),ab=Count('species_id'),name=Min('scientific_name')) return species
def getFamiliesMetadata(self): """ Returns metadata for all families of a specific class """ families = self.QuerySet.filter(order_id=self.id).values('family_id').annotate(points=Collect('geom'),ab=Count('family_id'),name=Min('family')) return families
def getOrdersMetadata(self): """ Returns metadata for all classes of a specific order """ orders = self.QuerySet.filter(class_id=self.id).values('order_id').annotate(points=Collect('geom'),ab=Count('order_id'),name=Min('_order')) return orders
def get_queryset(self): return Tag.objects.filter(approved = True).values('dataset','tag').annotate(num_tags = Count('id'))
def getClassesMetadata(self): """ Returns metadata for all classes of a specific order """ classes = self.QuerySet.filter(phylum_id=self.id).values('class_id').annotate(points=Collect('geom'),ab=Count('class_id'),name=Min('_class')) return classes
def getPhylaMetadata(self): """ Returns metadata for all phyla of a specific kingdom """ phyla = self.QuerySet.filter(kingdom_id=self.id).values('phylum_id').annotate(points=Collect('geom'),ab=Count('phylum_id'),name=Min('phylum')) return phyla
def generate_clusters(cls, precision, operator=None): if not cls.objects.filter(precision=precision, operator=operator).exists(): # Generate clusters from smaller clusters if 1 <= precision < MAX_CLUSTER_PRECISION_SIZE: print("Generating clusters...") # Get smaller clusters and annotate the new geohash for the bigger clusters smaller_precision = precision + 1 smaller_clusters = cls.objects.filter(precision=smaller_precision, operator=operator)\ .annotate(bigger_geohash=GeoHash('point', precision=precision)) # Group by bigger geohash clusters_hashes = smaller_clusters.values( 'bigger_geohash').distinct() total = clusters_hashes.count() if not total: raise ValueError( "No clusters found for precision {}".format(precision + 1)) print("Saving data for {} clusters...".format(total)) loop_counter = 0 percentage = 0 cluster_array = [] for cluster_dict in clusters_hashes: geohash = cluster_dict['bigger_geohash'] # Get data from smaller clusters sub_clusters = smaller_clusters.filter( bigger_geohash=geohash).values('point', 'count', 'data') count = reduce((lambda acc, cl: acc + cl['count']), sub_clusters, 0) point = Point( reduce((lambda acc, cl: acc + (cl['point'].x * float(cl['count']))), sub_clusters, 0.0) / float(count), reduce((lambda acc, cl: acc + (cl['point'].y * float(cl['count']))), sub_clusters, 0.0) / float(count)) data = '' if count != 1 else sub_clusters[0]['data'] cluster = cls(point=point, precision=precision, count=count, data=data, operator=operator) cluster_array.append(cluster) if len(cluster_array) >= DATABASE_COMMIT_SIZE: cls.objects.bulk_create(cluster_array) cluster_array = [] loop_counter += 1 prev_percentage = percentage percentage = 100 * loop_counter // total if percentage > prev_percentage: print(" {}% done ({} clusters)".format( percentage, loop_counter)) if len(cluster_array) > 0: cls.objects.bulk_create(cluster_array) return # Generate clusters from base stations elif precision == MAX_CLUSTER_PRECISION_SIZE: print("Generating clusters...") # Add geohash to all base stations base_stations = BS_MODEL.objects.annotate( geohash=GeoHash('point', precision=precision)) # Filter by operator if operator: mnc_list = [m.value for m in operator.mnc_set.all()] base_stations = base_stations.filter(mnc__in=mnc_list) # Group by geohash and get cluster MultiPoint and count clusters_values = base_stations.values('geohash').annotate( count=Count('point'), geom=Collect('point')) total = clusters_values.count() if not total: raise ValueError( "No base stations found for precision {}".format( precision)) print("Saving data for {} clusters...".format(total)) loop_counter = 0 percentage = 0 cluster_array = [] for cluster_dict in clusters_values: count = cluster_dict['count'] point = cluster_dict['geom'].centroid data = '' if count != 1 else base_stations.get( geohash=cluster_dict['geohash']).data cluster = cls(point=point, precision=precision, count=count, data=data, operator=operator) cluster_array.append(cluster) if len(cluster_array) >= DATABASE_COMMIT_SIZE: cls.objects.bulk_create(cluster_array) cluster_array = [] loop_counter += 1 prev_percentage = percentage percentage = 100 * loop_counter // total if percentage > prev_percentage: print(" {}% done ({} clusters)".format( percentage, loop_counter)) if len(cluster_array) > 0: cls.objects.bulk_create(cluster_array) return else: raise ValueError( "precision must be in the [1, {}] interval".format( MAX_CLUSTER_PRECISION_SIZE)) else: operator_string = ' and operator {}'.format( operator) if operator else '' raise ValueError( "There are already clusters for precision {}{}".format( precision, operator_string))
def getGenusMetadata(self): """ Returns metadata for all genus of a specific family """ genera = self.QuerySet.filter(family_id=self.id).values('genus_id').annotate(points=Collect('geom'),ab=Count('genus_id'),name=Min('genus')) return genera
def test_event_url_model(self): # Migrated from doctest, does not seem to check something specific, it # looks more like a code example urls_numbers = Event.objects.annotate(urls_nr=Count('urls')) events_with_more_than_1_url = urls_numbers.filter(urls_nr__gte=2) self.assertFalse(events_with_more_than_1_url)