def items(self, obj): # Note that items() returns "packed" tuples instead of objects. # This is necessary because we return NewsItems and blog entries, # plus different types of NewsItems (bunched vs. unbunched). # Limit the feed to all NewsItems published in the last four days. # We *do* include items from today in this query, but we'll filter # those later in this method so that only today's *uncollapsed* items # (schema.can_collapse=False) will be included in the feed. We don't # want today's *collapsed* items to be included, because more items # might be added to the database before the day is finished, and # that would result in the RSS item being updated multiple times, which # is annoying. # TODO: re-use ebpub.db.schemafilters for filtering here. # TODO: allow user control over date range today_value = today() start_date = today_value - datetime.timedelta(days=5) # Include future stuff, useful for events end_date = today_value + datetime.timedelta(days=5) qs = ( NewsItem.objects.select_related() .by_request(self.request) .filter(item_date__gte=start_date, item_date__lte=end_date) .order_by("-item_date", "schema__id", "id") ) # Filter out ignored schemas -- those whose slugs are specified in # the "ignore" query-string parameter. if "ignore" in self.request.GET: schema_slugs = self.request.GET["ignore"].split(",") qs = qs.exclude(schema__slug__in=schema_slugs) # Filter wanted schemas -- those whose slugs are specified in the # "only" query-string parameter. if "only" in self.request.GET: schema_slugs = self.request.GET["only"].split(",") qs = qs.filter(schema__slug__in=schema_slugs) block_radius = self.request.GET.get("radius", BLOCK_RADIUS_DEFAULT) if block_radius not in BLOCK_RADIUS_CHOICES: raise Http404("Invalid radius") ni_list = list(self.newsitems_for_obj(obj, qs, block_radius)) schema_list = list(set([ni.schema for ni in ni_list])) populate_attributes_if_needed(ni_list, schema_list) is_block = isinstance(obj, Block) # Note that this decorates the results by returning tuples instead of # NewsItems. This is necessary because we're bunching. for schema_group in bunch_by_date_and_schema(ni_list, today_value): schema = schema_group[0].schema if schema.can_collapse: yield ("newsitem", obj, schema, schema_group, is_block, block_radius) else: for newsitem in schema_group: yield ("newsitem", obj, schema, newsitem, is_block, block_radius)
def items(self, obj): # Note that items() returns "packed" tuples instead of objects. # This is necessary because we return NewsItems and blog entries, # plus different types of NewsItems (bunched vs. unbunched). # Limit the feed to all NewsItems published in the last four days. # We *do* include items from today in this query, but we'll filter # those later in this method so that only today's *uncollapsed* items # (schema.can_collapse=False) will be included in the feed. We don't # want today's *collapsed* items to be included, because more items # might be added to the database before the day is finished, and # that would result in the RSS item being updated multiple times, which # is annoying. # TODO: re-use ebpub.db.schemafilters for filtering here. # TODO: allow user control over date range today_value = today() start_date = today_value - datetime.timedelta(days=5) # Include future stuff, useful for events end_date = today_value + datetime.timedelta(days=5) qs = NewsItem.objects.select_related().by_request(self.request).filter( item_date__gte=start_date, item_date__lte=end_date).order_by('-item_date', 'schema__id', 'id') # Filter out ignored schemas -- those whose slugs are specified in # the "ignore" query-string parameter. if 'ignore' in self.request.GET: schema_slugs = self.request.GET['ignore'].split(',') qs = qs.exclude(schema__slug__in=schema_slugs) # Filter wanted schemas -- those whose slugs are specified in the # "only" query-string parameter. if 'only' in self.request.GET: schema_slugs = self.request.GET['only'].split(',') qs = qs.filter(schema__slug__in=schema_slugs) block_radius = self.request.GET.get('radius', BLOCK_RADIUS_DEFAULT) if block_radius not in BLOCK_RADIUS_CHOICES: raise Http404('Invalid radius') ni_list = list(self.newsitems_for_obj(obj, qs, block_radius)) schema_list = list(set([ni.schema for ni in ni_list])) populate_attributes_if_needed(ni_list, schema_list) is_block = isinstance(obj, Block) # Note that this decorates the results by returning tuples instead of # NewsItems. This is necessary because we're bunching. for schema_group in bunch_by_date_and_schema(ni_list, today_value): schema = schema_group[0].schema if schema.can_collapse: yield ('newsitem', obj, schema, schema_group, is_block, block_radius) else: for newsitem in schema_group: yield ('newsitem', obj, schema, newsitem, is_block, block_radius)
def save(self, old_record, list_record, detail_record): attributes = list_record.pop('attributes', {}) list_record.setdefault('schema', self.schema.id) if not old_record: list_record.setdefault('item_date', today()) list_record.setdefault('pub_date', now()) from ebpub.db.forms import NewsItemForm form = NewsItemForm(list_record, instance=old_record) if form.is_valid(): return self.create_or_update(old_record, attributes, **form.cleaned_data) else: raise SkipRecord(form.errors)
def friendlydate(value): """ A filter that takes a date and includes 'Today' or 'Yesterday' if relevant, or the day of the week if it's within the past week, otherwise just the date. Example (in template): .. code-block:: html+django {% start_date|friendlydate %} Examples, in python: .. code-block:: python >>> import mock, datetime >>> with mock.patch('ebpub.db.templatetags.dateutils.today', lambda: datetime.date(2011, 8, 15)): ... print friendlydate(datetime.date(2011, 8, 15)) ... print friendlydate(datetime.date(2011, 8, 16)) ... print friendlydate(datetime.date(2011, 8, 14)) ... print friendlydate(datetime.date(2011, 8, 13)) ... print friendlydate(datetime.date(2011, 8, 9)) ... print friendlydate(datetime.date(2011, 8, 8)) ... Today August 15, 2011 Tomorrow August 16, 2011 Yesterday August 14, 2011 Saturday August 13, 2011 Tuesday August 9, 2011 August 8, 2011 """ try: # Convert to a datetime.date, if it's a datetime.datetime. value = value.date() except AttributeError: pass # Using value.day because strftine('%d') is zero-padded and we don't want that. # TODO: parameterize format to allow i18n? formatted_date = value.strftime('%B ') + unicode( value.day) + value.strftime(', %Y') _today = today() if value == _today: return 'Today %s' % formatted_date elif value == _today - datetime.timedelta(1): return 'Yesterday %s' % formatted_date elif value == _today + datetime.timedelta(1): return 'Tomorrow %s' % formatted_date elif _today - value <= datetime.timedelta(6): return '%s %s' % (value.strftime('%A'), formatted_date) return formatted_date
def friendlydate(value): """ A filter that takes a date and includes 'Today' or 'Yesterday' if relevant, or the day of the week if it's within the past week, otherwise just the date. Example (in template): .. code-block:: html+django {% start_date|friendlydate %} Examples, in python: .. code-block:: python >>> import mock, datetime >>> with mock.patch('ebpub.db.templatetags.dateutils.today', lambda: datetime.date(2011, 8, 15)): ... print friendlydate(datetime.date(2011, 8, 15)) ... print friendlydate(datetime.date(2011, 8, 16)) ... print friendlydate(datetime.date(2011, 8, 14)) ... print friendlydate(datetime.date(2011, 8, 13)) ... print friendlydate(datetime.date(2011, 8, 9)) ... print friendlydate(datetime.date(2011, 8, 8)) ... Today August 15, 2011 Tomorrow August 16, 2011 Yesterday August 14, 2011 Saturday August 13, 2011 Tuesday August 9, 2011 August 8, 2011 """ try: # Convert to a datetime.date, if it's a datetime.datetime. value = value.date() except AttributeError: pass # Using value.day because strftine('%d') is zero-padded and we don't want that. # TODO: parameterize format to allow i18n? formatted_date = value.strftime('%B ') + unicode(value.day) + value.strftime(', %Y') _today = today() if value == _today: return 'Today %s' % formatted_date elif value == _today - datetime.timedelta(1): return 'Yesterday %s' % formatted_date elif value == _today + datetime.timedelta(1): return 'Tomorrow %s' % formatted_date elif _today - value <= datetime.timedelta(6): return '%s %s' % (value.strftime('%A'), formatted_date) return formatted_date
def save(self, old_record, list_record, detail_record): attributes = list_record.pop('attributes', {}) list_record.setdefault('schema', self.schema.id) if not old_record: list_record.setdefault('item_date', today()) list_record.setdefault('pub_date', now()) from ebpub.db.forms import NewsItemForm form = NewsItemForm(list_record, instance=old_record) if form.is_valid(): return self.create_or_update(old_record, attributes, **form.cleaned_data) else: self.logger.info("Skipping due to validation failures:") for key, val in form.errors.items(): self.logger.info("%s: %s" % (key, val.as_text())) raise SkipRecord(form.errors)
def update_aggregates(schema_id_or_slug, dry_run=False, reset=False): """ Updates all Aggregate* tables for the given schema_id/slug, deleting/updating the existing records if necessary. If dry_run is True, then the records won't be updated -- only the SQL will be output. If reset is True, then all aggregates for this schema will be deleted before updating. """ logger.info("... %s" % schema_id_or_slug) if not str(schema_id_or_slug).isdigit(): schema_id = Schema.objects.get(slug=schema_id_or_slug).id else: schema_id = schema_id_or_slug cursor = connection.cursor() if reset and not dry_run: for aggmodel in (AggregateAll, AggregateDay, AggregateLocation, AggregateLocationDay, AggregateFieldLookup): logger.info("... deleting all %s for schema %s" % (aggmodel.__name__, schema_id_or_slug)) aggmodel.objects.filter(schema__id=schema_id).delete() # AggregateAll cursor.execute("SELECT COUNT(*) FROM db_newsitem WHERE schema_id = %s", (schema_id,)) new_values = [{"total": row[0]} for row in cursor.fetchall()] smart_update( cursor, new_values, AggregateAll._meta.db_table, ("total",), (), {"schema_id": schema_id}, dry_run=dry_run ) # AggregateDay cursor.execute( """ SELECT item_date, COUNT(*) FROM db_newsitem WHERE schema_id = %s GROUP BY 1""", (schema_id,), ) new_values = [{"date_part": row[0], "total": row[1]} for row in cursor.fetchall()] smart_update( cursor, new_values, AggregateDay._meta.db_table, ("date_part", "total"), ("date_part",), {"schema_id": schema_id}, dry_run=dry_run, ) # AggregateLocationDay cursor.execute( """ SELECT nl.location_id, ni.item_date, loc.location_type_id, COUNT(*) FROM db_newsitemlocation nl, db_newsitem ni, db_location loc WHERE nl.news_item_id = ni.id AND ni.schema_id = %s AND nl.location_id = loc.id GROUP BY 1, 2, 3""", (schema_id,), ) new_values = [ {"location_id": row[0], "date_part": row[1], "location_type_id": row[2], "total": row[3]} for row in cursor.fetchall() ] smart_update( cursor, new_values, AggregateLocationDay._meta.db_table, ("location_id", "date_part", "location_type_id", "total"), ("location_id", "date_part", "location_type_id"), {"schema_id": schema_id}, dry_run=dry_run, ) # AggregateLocation # This query is a bit clever -- we just sum up the totals created in a # previous aggregate. It's a helpful optimization, because otherwise # the location query is way too slow. # Note that we calculate the total for the last 30 days that had at least # one news item -- *NOT* the last 30 days, period. # We add date_part <= current_date here to keep sparse items in the future # from throwing off counts for the previous 30 days. cursor.execute( "SELECT date_part FROM %s WHERE schema_id = %%s AND date_part <= current_date ORDER BY date_part DESC LIMIT 1" % AggregateLocationDay._meta.db_table, (schema_id,), ) try: end_date = cursor.fetchone()[0] except TypeError: # if cursor.fetchone() is None, there are no records. pass else: # Note that BETWEEN is inclusive on both ends, so to get # AggregateLocationDays for eg. 30 days, we'd need a timedelta of 29 start_date = end_date - constants.DAYS_AGGREGATE_TIMEDELTA cursor.execute( """ SELECT location_id, location_type_id, SUM(total) FROM %s WHERE schema_id = %%s AND date_part BETWEEN %%s AND %%s GROUP BY 1, 2""" % AggregateLocationDay._meta.db_table, (schema_id, start_date, end_date), ) new_values = [{"location_id": row[0], "location_type_id": row[1], "total": row[2]} for row in cursor.fetchall()] smart_update( cursor, new_values, AggregateLocation._meta.db_table, ("location_id", "location_type_id", "total"), ("location_id", "location_type_id"), {"schema_id": schema_id}, dry_run=dry_run, ) for sf in SchemaField.objects.filter(schema__id=schema_id, is_filter=True, is_lookup=True): try: end_date = ( NewsItem.objects.filter(schema__id=schema_id, item_date__lte=today()) .values_list("item_date", flat=True) .order_by("-item_date")[0] ) except IndexError: continue # There have been no NewsItems in the given date range. # Note BETWEEN is inclusive on both ends. start_date = end_date - constants.DAYS_AGGREGATE_TIMEDELTA if sf.is_many_to_many_lookup(): # AggregateFieldLookup cursor.execute( """ SELECT id, ( SELECT COUNT(*) FROM db_attribute a, db_newsitem ni WHERE a.news_item_id = ni.id AND a.schema_id = %%s AND ni.schema_id = %%s AND a.%s ~ ('[[:<:]]' || db_lookup.id || '[[:>:]]') AND ni.item_date BETWEEN %%s AND %%s ) FROM db_lookup WHERE schema_field_id = %%s""" % sf.real_name, (schema_id, schema_id, start_date, end_date, sf.id), ) new_values = [{"lookup_id": row[0], "total": row[1]} for row in cursor.fetchall()] smart_update( cursor, new_values, AggregateFieldLookup._meta.db_table, ("lookup_id", "total"), ("lookup_id",), {"schema_id": schema_id, "schema_field_id": sf.id}, dry_run=dry_run, ) else: # AggregateFieldLookup cursor.execute( """ SELECT a.%s, COUNT(*) FROM db_attribute a, db_newsitem ni WHERE a.news_item_id = ni.id AND a.schema_id = %%s AND ni.schema_id = %%s AND %s IS NOT NULL AND ni.item_date BETWEEN %%s AND %%s GROUP BY 1""" % (sf.real_name, sf.real_name), (schema_id, schema_id, start_date, end_date), ) new_values = [{"lookup_id": row[0], "total": row[1]} for row in cursor.fetchall()] smart_update( cursor, new_values, AggregateFieldLookup._meta.db_table, ("lookup_id", "total"), ("lookup_id",), {"schema_id": schema_id, "schema_field_id": sf.id}, dry_run=dry_run, ) transaction.commit_unless_managed()
def update_aggregates(schema_id_or_slug, dry_run=False, reset=False): """ Updates all Aggregate* tables for the given schema_id/slug, deleting/updating the existing records if necessary. If dry_run is True, then the records won't be updated -- only the SQL will be output. If reset is True, then all aggregates for this schema will be deleted before updating. """ logger.info('... %s' % schema_id_or_slug) if not str(schema_id_or_slug).isdigit(): schema_id = Schema.objects.get(slug=schema_id_or_slug).id else: schema_id = schema_id_or_slug cursor = connection.cursor() if reset and not dry_run: for aggmodel in (AggregateAll, AggregateDay, AggregateLocation, AggregateLocationDay, AggregateFieldLookup): logger.info('... deleting all %s for schema %s' % (aggmodel.__name__, schema_id_or_slug)) aggmodel.objects.filter(schema__id=schema_id).delete() # AggregateAll cursor.execute("SELECT COUNT(*) FROM db_newsitem WHERE schema_id = %s", (schema_id, )) new_values = [{'total': row[0]} for row in cursor.fetchall()] smart_update(cursor, new_values, AggregateAll._meta.db_table, ('total', ), (), {'schema_id': schema_id}, dry_run=dry_run) # AggregateDay cursor.execute( """ SELECT item_date, COUNT(*) FROM db_newsitem WHERE schema_id = %s GROUP BY 1""", (schema_id, )) new_values = [{ 'date_part': row[0], 'total': row[1] } for row in cursor.fetchall()] smart_update( cursor, new_values, AggregateDay._meta.db_table, ('date_part', 'total'), ('date_part', ), {'schema_id': schema_id}, dry_run=dry_run, ) # AggregateLocationDay cursor.execute( """ SELECT nl.location_id, ni.item_date, loc.location_type_id, COUNT(*) FROM db_newsitemlocation nl, db_newsitem ni, db_location loc WHERE nl.news_item_id = ni.id AND ni.schema_id = %s AND nl.location_id = loc.id GROUP BY 1, 2, 3""", (schema_id, )) new_values = [{ 'location_id': row[0], 'date_part': row[1], 'location_type_id': row[2], 'total': row[3] } for row in cursor.fetchall()] smart_update( cursor, new_values, AggregateLocationDay._meta.db_table, ('location_id', 'date_part', 'location_type_id', 'total'), ('location_id', 'date_part', 'location_type_id'), {'schema_id': schema_id}, dry_run=dry_run, ) # AggregateLocation # This query is a bit clever -- we just sum up the totals created in a # previous aggregate. It's a helpful optimization, because otherwise # the location query is way too slow. # Note that we calculate the total for the last 30 days that had at least # one news item -- *NOT* the last 30 days, period. # We add date_part <= current_date here to keep sparse items in the future # from throwing off counts for the previous 30 days. cursor.execute("SELECT date_part FROM %s WHERE schema_id = %%s AND date_part <= current_date ORDER BY date_part DESC LIMIT 1" % \ AggregateLocationDay._meta.db_table, (schema_id,)) try: end_date = cursor.fetchone()[0] except TypeError: # if cursor.fetchone() is None, there are no records. pass else: # Note that BETWEEN is inclusive on both ends, so to get # AggregateLocationDays for eg. 30 days, we'd need a timedelta of 29 start_date = end_date - constants.DAYS_AGGREGATE_TIMEDELTA cursor.execute( """ SELECT location_id, location_type_id, SUM(total) FROM %s WHERE schema_id = %%s AND date_part BETWEEN %%s AND %%s GROUP BY 1, 2""" % AggregateLocationDay._meta.db_table, (schema_id, start_date, end_date)) new_values = [{ 'location_id': row[0], 'location_type_id': row[1], 'total': row[2] } for row in cursor.fetchall()] smart_update( cursor, new_values, AggregateLocation._meta.db_table, ('location_id', 'location_type_id', 'total'), ('location_id', 'location_type_id'), {'schema_id': schema_id}, dry_run=dry_run, ) for sf in SchemaField.objects.filter(schema__id=schema_id, is_filter=True, is_lookup=True): try: end_date = NewsItem.objects.filter( schema__id=schema_id, item_date__lte=today()).values_list( 'item_date', flat=True).order_by('-item_date')[0] except IndexError: continue # There have been no NewsItems in the given date range. # Note BETWEEN is inclusive on both ends. start_date = end_date - constants.DAYS_AGGREGATE_TIMEDELTA if sf.is_many_to_many_lookup(): # AggregateFieldLookup cursor.execute( """ SELECT id, ( SELECT COUNT(*) FROM db_attribute a, db_newsitem ni WHERE a.news_item_id = ni.id AND a.schema_id = %%s AND ni.schema_id = %%s AND a.%s ~ ('[[:<:]]' || db_lookup.id || '[[:>:]]') AND ni.item_date BETWEEN %%s AND %%s ) FROM db_lookup WHERE schema_field_id = %%s""" % sf.real_name, (schema_id, schema_id, start_date, end_date, sf.id)) new_values = [{ 'lookup_id': row[0], 'total': row[1] } for row in cursor.fetchall()] smart_update( cursor, new_values, AggregateFieldLookup._meta.db_table, ('lookup_id', 'total'), ('lookup_id', ), { 'schema_id': schema_id, 'schema_field_id': sf.id }, dry_run=dry_run, ) else: # AggregateFieldLookup cursor.execute( """ SELECT a.%s, COUNT(*) FROM db_attribute a, db_newsitem ni WHERE a.news_item_id = ni.id AND a.schema_id = %%s AND ni.schema_id = %%s AND %s IS NOT NULL AND ni.item_date BETWEEN %%s AND %%s GROUP BY 1""" % (sf.real_name, sf.real_name), (schema_id, schema_id, start_date, end_date)) new_values = [{ 'lookup_id': row[0], 'total': row[1] } for row in cursor.fetchall()] smart_update(cursor, new_values, AggregateFieldLookup._meta.db_table, ('lookup_id', 'total'), ('lookup_id', ), { 'schema_id': schema_id, 'schema_field_id': sf.id }, dry_run=dry_run) transaction.commit_unless_managed()