Example #1
0
    def items(self, obj):
        # Note that items() returns "packed" tuples instead of objects.
        # This is necessary because we return NewsItems and blog entries,
        # plus different types of NewsItems (bunched vs. unbunched).

        # Limit the feed to all NewsItems published in the last four days.
        # We *do* include items from today in this query, but we'll filter
        # those later in this method so that only today's *uncollapsed* items
        # (schema.can_collapse=False) will be included in the feed. We don't
        # want today's *collapsed* items to be included, because more items
        # might be added to the database before the day is finished, and
        # that would result in the RSS item being updated multiple times, which
        # is annoying.

        # TODO: re-use ebpub.db.schemafilters for filtering here.

        # TODO: allow user control over date range
        today_value = today()
        start_date = today_value - datetime.timedelta(days=5)
        # Include future stuff, useful for events
        end_date = today_value + datetime.timedelta(days=5)

        qs = (
            NewsItem.objects.select_related()
            .by_request(self.request)
            .filter(item_date__gte=start_date, item_date__lte=end_date)
            .order_by("-item_date", "schema__id", "id")
        )

        # Filter out ignored schemas -- those whose slugs are specified in
        # the "ignore" query-string parameter.
        if "ignore" in self.request.GET:
            schema_slugs = self.request.GET["ignore"].split(",")
            qs = qs.exclude(schema__slug__in=schema_slugs)

        # Filter wanted schemas -- those whose slugs are specified in the
        # "only" query-string parameter.
        if "only" in self.request.GET:
            schema_slugs = self.request.GET["only"].split(",")
            qs = qs.filter(schema__slug__in=schema_slugs)

        block_radius = self.request.GET.get("radius", BLOCK_RADIUS_DEFAULT)
        if block_radius not in BLOCK_RADIUS_CHOICES:
            raise Http404("Invalid radius")
        ni_list = list(self.newsitems_for_obj(obj, qs, block_radius))
        schema_list = list(set([ni.schema for ni in ni_list]))
        populate_attributes_if_needed(ni_list, schema_list)

        is_block = isinstance(obj, Block)

        # Note that this decorates the results by returning tuples instead of
        # NewsItems. This is necessary because we're bunching.
        for schema_group in bunch_by_date_and_schema(ni_list, today_value):
            schema = schema_group[0].schema
            if schema.can_collapse:
                yield ("newsitem", obj, schema, schema_group, is_block, block_radius)
            else:
                for newsitem in schema_group:
                    yield ("newsitem", obj, schema, newsitem, is_block, block_radius)
Example #2
0
    def items(self, obj):
        # Note that items() returns "packed" tuples instead of objects.
        # This is necessary because we return NewsItems and blog entries,
        # plus different types of NewsItems (bunched vs. unbunched).

        # Limit the feed to all NewsItems published in the last four days.
        # We *do* include items from today in this query, but we'll filter
        # those later in this method so that only today's *uncollapsed* items
        # (schema.can_collapse=False) will be included in the feed. We don't
        # want today's *collapsed* items to be included, because more items
        # might be added to the database before the day is finished, and
        # that would result in the RSS item being updated multiple times, which
        # is annoying.

        # TODO: re-use ebpub.db.schemafilters for filtering here.

        # TODO: allow user control over date range
        today_value = today()
        start_date = today_value - datetime.timedelta(days=5)
        # Include future stuff, useful for events
        end_date = today_value + datetime.timedelta(days=5)

        qs = NewsItem.objects.select_related().by_request(self.request).filter(
            item_date__gte=start_date,
            item_date__lte=end_date).order_by('-item_date', 'schema__id', 'id')

        # Filter out ignored schemas -- those whose slugs are specified in
        # the "ignore" query-string parameter.
        if 'ignore' in self.request.GET:
            schema_slugs = self.request.GET['ignore'].split(',')
            qs = qs.exclude(schema__slug__in=schema_slugs)

        # Filter wanted schemas -- those whose slugs are specified in the
        # "only" query-string parameter.
        if 'only' in self.request.GET:
            schema_slugs = self.request.GET['only'].split(',')
            qs = qs.filter(schema__slug__in=schema_slugs)

        block_radius = self.request.GET.get('radius', BLOCK_RADIUS_DEFAULT)
        if block_radius not in BLOCK_RADIUS_CHOICES:
            raise Http404('Invalid radius')
        ni_list = list(self.newsitems_for_obj(obj, qs, block_radius))
        schema_list = list(set([ni.schema for ni in ni_list]))
        populate_attributes_if_needed(ni_list, schema_list)

        is_block = isinstance(obj, Block)

        # Note that this decorates the results by returning tuples instead of
        # NewsItems. This is necessary because we're bunching.
        for schema_group in bunch_by_date_and_schema(ni_list, today_value):
            schema = schema_group[0].schema
            if schema.can_collapse:
                yield ('newsitem', obj, schema, schema_group, is_block,
                       block_radius)
            else:
                for newsitem in schema_group:
                    yield ('newsitem', obj, schema, newsitem, is_block,
                           block_radius)
Example #3
0
 def save(self, old_record, list_record, detail_record):
     attributes = list_record.pop('attributes', {})
     list_record.setdefault('schema', self.schema.id)
     if not old_record:
         list_record.setdefault('item_date', today())
         list_record.setdefault('pub_date', now())
     from ebpub.db.forms import NewsItemForm
     form = NewsItemForm(list_record, instance=old_record)
     if form.is_valid():
         return self.create_or_update(old_record, attributes,
                                      **form.cleaned_data)
     else:
         raise SkipRecord(form.errors)
Example #4
0
def friendlydate(value):
    """
    A filter that takes a date and includes 'Today' or 'Yesterday' if
    relevant, or the day of the week if it's within the past week,
    otherwise just the date.

    Example (in template):

    .. code-block:: html+django

      {% start_date|friendlydate %}

    Examples, in python:

    .. code-block:: python

      >>> import mock, datetime
      >>> with mock.patch('ebpub.db.templatetags.dateutils.today', lambda: datetime.date(2011, 8, 15)):
      ...     print friendlydate(datetime.date(2011, 8, 15))
      ...     print friendlydate(datetime.date(2011, 8, 16))
      ...     print friendlydate(datetime.date(2011, 8, 14))
      ...     print friendlydate(datetime.date(2011, 8, 13))
      ...     print friendlydate(datetime.date(2011, 8, 9))
      ...     print friendlydate(datetime.date(2011, 8, 8))
      ...
      Today August 15, 2011
      Tomorrow August 16, 2011
      Yesterday August 14, 2011
      Saturday August 13, 2011
      Tuesday August 9, 2011
      August 8, 2011
    """
    try:  # Convert to a datetime.date, if it's a datetime.datetime.
        value = value.date()
    except AttributeError:
        pass
    # Using value.day because strftine('%d') is zero-padded and we don't want that.
    # TODO: parameterize format to allow i18n?
    formatted_date = value.strftime('%B ') + unicode(
        value.day) + value.strftime(', %Y')
    _today = today()
    if value == _today:
        return 'Today %s' % formatted_date
    elif value == _today - datetime.timedelta(1):
        return 'Yesterday %s' % formatted_date
    elif value == _today + datetime.timedelta(1):
        return 'Tomorrow %s' % formatted_date
    elif _today - value <= datetime.timedelta(6):
        return '%s %s' % (value.strftime('%A'), formatted_date)
    return formatted_date
Example #5
0
def friendlydate(value):
    """
    A filter that takes a date and includes 'Today' or 'Yesterday' if
    relevant, or the day of the week if it's within the past week,
    otherwise just the date.

    Example (in template):

    .. code-block:: html+django

      {% start_date|friendlydate %}

    Examples, in python:

    .. code-block:: python

      >>> import mock, datetime
      >>> with mock.patch('ebpub.db.templatetags.dateutils.today', lambda: datetime.date(2011, 8, 15)):
      ...     print friendlydate(datetime.date(2011, 8, 15))
      ...     print friendlydate(datetime.date(2011, 8, 16))
      ...     print friendlydate(datetime.date(2011, 8, 14))
      ...     print friendlydate(datetime.date(2011, 8, 13))
      ...     print friendlydate(datetime.date(2011, 8, 9))
      ...     print friendlydate(datetime.date(2011, 8, 8))
      ...
      Today August 15, 2011
      Tomorrow August 16, 2011
      Yesterday August 14, 2011
      Saturday August 13, 2011
      Tuesday August 9, 2011
      August 8, 2011
    """
    try: # Convert to a datetime.date, if it's a datetime.datetime.
        value = value.date()
    except AttributeError:
        pass
    # Using value.day because strftine('%d') is zero-padded and we don't want that.
    # TODO: parameterize format to allow i18n?
    formatted_date = value.strftime('%B ') + unicode(value.day) + value.strftime(', %Y')
    _today = today()
    if value == _today:
        return 'Today %s' % formatted_date
    elif value == _today - datetime.timedelta(1):
        return 'Yesterday %s' % formatted_date
    elif value == _today + datetime.timedelta(1):
        return 'Tomorrow %s' % formatted_date
    elif _today - value <= datetime.timedelta(6):
        return '%s %s' % (value.strftime('%A'), formatted_date)
    return formatted_date
Example #6
0
 def save(self, old_record, list_record, detail_record):
     attributes = list_record.pop('attributes', {})
     list_record.setdefault('schema', self.schema.id)
     if not old_record:
         list_record.setdefault('item_date', today())
         list_record.setdefault('pub_date', now())
     from ebpub.db.forms import NewsItemForm
     form = NewsItemForm(list_record, instance=old_record)
     if form.is_valid():
         return self.create_or_update(old_record, attributes,
                                      **form.cleaned_data)
     else:
         self.logger.info("Skipping due to validation failures:")
         for key, val in form.errors.items():
             self.logger.info("%s: %s" % (key, val.as_text()))
         raise SkipRecord(form.errors)
Example #7
0
 def save(self, old_record, list_record, detail_record):
     attributes = list_record.pop('attributes', {})
     list_record.setdefault('schema', self.schema.id)
     if not old_record:
         list_record.setdefault('item_date', today())
         list_record.setdefault('pub_date', now())
     from ebpub.db.forms import NewsItemForm
     form = NewsItemForm(list_record, instance=old_record)
     if form.is_valid():
         return self.create_or_update(old_record, attributes,
                                      **form.cleaned_data)
     else:
         self.logger.info("Skipping due to validation failures:")
         for key, val in form.errors.items():
             self.logger.info("%s: %s" % (key, val.as_text()))
         raise SkipRecord(form.errors)
Example #8
0
def update_aggregates(schema_id_or_slug, dry_run=False, reset=False):
    """
    Updates all Aggregate* tables for the given schema_id/slug,
    deleting/updating the existing records if necessary.

    If dry_run is True, then the records won't be updated -- only the SQL
    will be output.

    If reset is True, then all aggregates for this schema will be deleted before
    updating.
    """
    logger.info("... %s" % schema_id_or_slug)
    if not str(schema_id_or_slug).isdigit():
        schema_id = Schema.objects.get(slug=schema_id_or_slug).id
    else:
        schema_id = schema_id_or_slug
    cursor = connection.cursor()

    if reset and not dry_run:
        for aggmodel in (AggregateAll, AggregateDay, AggregateLocation, AggregateLocationDay, AggregateFieldLookup):
            logger.info("... deleting all %s for schema %s" % (aggmodel.__name__, schema_id_or_slug))
            aggmodel.objects.filter(schema__id=schema_id).delete()

    # AggregateAll
    cursor.execute("SELECT COUNT(*) FROM db_newsitem WHERE schema_id = %s", (schema_id,))
    new_values = [{"total": row[0]} for row in cursor.fetchall()]
    smart_update(
        cursor, new_values, AggregateAll._meta.db_table, ("total",), (), {"schema_id": schema_id}, dry_run=dry_run
    )

    # AggregateDay
    cursor.execute(
        """
        SELECT item_date, COUNT(*)
        FROM db_newsitem
        WHERE schema_id = %s
        GROUP BY 1""",
        (schema_id,),
    )
    new_values = [{"date_part": row[0], "total": row[1]} for row in cursor.fetchall()]
    smart_update(
        cursor,
        new_values,
        AggregateDay._meta.db_table,
        ("date_part", "total"),
        ("date_part",),
        {"schema_id": schema_id},
        dry_run=dry_run,
    )

    # AggregateLocationDay
    cursor.execute(
        """
        SELECT nl.location_id, ni.item_date, loc.location_type_id, COUNT(*)
        FROM db_newsitemlocation nl, db_newsitem ni, db_location loc
        WHERE nl.news_item_id = ni.id
            AND ni.schema_id = %s
            AND nl.location_id = loc.id
        GROUP BY 1, 2, 3""",
        (schema_id,),
    )
    new_values = [
        {"location_id": row[0], "date_part": row[1], "location_type_id": row[2], "total": row[3]}
        for row in cursor.fetchall()
    ]
    smart_update(
        cursor,
        new_values,
        AggregateLocationDay._meta.db_table,
        ("location_id", "date_part", "location_type_id", "total"),
        ("location_id", "date_part", "location_type_id"),
        {"schema_id": schema_id},
        dry_run=dry_run,
    )

    # AggregateLocation
    # This query is a bit clever -- we just sum up the totals created in a
    # previous aggregate. It's a helpful optimization, because otherwise
    # the location query is way too slow.
    # Note that we calculate the total for the last 30 days that had at least
    # one news item -- *NOT* the last 30 days, period.
    # We add date_part <= current_date here to keep sparse items in the future
    # from throwing off counts for the previous 30 days.
    cursor.execute(
        "SELECT date_part FROM %s WHERE schema_id = %%s AND date_part <= current_date ORDER BY date_part DESC LIMIT 1"
        % AggregateLocationDay._meta.db_table,
        (schema_id,),
    )
    try:
        end_date = cursor.fetchone()[0]
    except TypeError:  # if cursor.fetchone() is None, there are no records.
        pass
    else:
        # Note that BETWEEN is inclusive on both ends, so to get
        # AggregateLocationDays for eg. 30 days, we'd need a timedelta of 29
        start_date = end_date - constants.DAYS_AGGREGATE_TIMEDELTA
        cursor.execute(
            """
            SELECT location_id, location_type_id, SUM(total)
            FROM %s
            WHERE schema_id = %%s
                AND date_part BETWEEN %%s AND %%s
            GROUP BY 1, 2"""
            % AggregateLocationDay._meta.db_table,
            (schema_id, start_date, end_date),
        )
        new_values = [{"location_id": row[0], "location_type_id": row[1], "total": row[2]} for row in cursor.fetchall()]
        smart_update(
            cursor,
            new_values,
            AggregateLocation._meta.db_table,
            ("location_id", "location_type_id", "total"),
            ("location_id", "location_type_id"),
            {"schema_id": schema_id},
            dry_run=dry_run,
        )

    for sf in SchemaField.objects.filter(schema__id=schema_id, is_filter=True, is_lookup=True):
        try:
            end_date = (
                NewsItem.objects.filter(schema__id=schema_id, item_date__lte=today())
                .values_list("item_date", flat=True)
                .order_by("-item_date")[0]
            )
        except IndexError:
            continue  # There have been no NewsItems in the given date range.
        # Note BETWEEN is inclusive on both ends.
        start_date = end_date - constants.DAYS_AGGREGATE_TIMEDELTA

        if sf.is_many_to_many_lookup():
            # AggregateFieldLookup
            cursor.execute(
                """
                SELECT id, (
                    SELECT COUNT(*) FROM db_attribute a, db_newsitem ni
                    WHERE a.news_item_id = ni.id
                        AND a.schema_id = %%s
                        AND ni.schema_id = %%s
                        AND a.%s ~ ('[[:<:]]' || db_lookup.id || '[[:>:]]')
                        AND ni.item_date BETWEEN %%s AND %%s
                )
                FROM db_lookup
                WHERE schema_field_id = %%s"""
                % sf.real_name,
                (schema_id, schema_id, start_date, end_date, sf.id),
            )
            new_values = [{"lookup_id": row[0], "total": row[1]} for row in cursor.fetchall()]
            smart_update(
                cursor,
                new_values,
                AggregateFieldLookup._meta.db_table,
                ("lookup_id", "total"),
                ("lookup_id",),
                {"schema_id": schema_id, "schema_field_id": sf.id},
                dry_run=dry_run,
            )
        else:
            # AggregateFieldLookup
            cursor.execute(
                """
                SELECT a.%s, COUNT(*)
                FROM db_attribute a, db_newsitem ni
                WHERE a.news_item_id = ni.id
                    AND a.schema_id = %%s
                    AND ni.schema_id = %%s
                    AND %s IS NOT NULL
                    AND ni.item_date BETWEEN %%s AND %%s
                GROUP BY 1"""
                % (sf.real_name, sf.real_name),
                (schema_id, schema_id, start_date, end_date),
            )
            new_values = [{"lookup_id": row[0], "total": row[1]} for row in cursor.fetchall()]
            smart_update(
                cursor,
                new_values,
                AggregateFieldLookup._meta.db_table,
                ("lookup_id", "total"),
                ("lookup_id",),
                {"schema_id": schema_id, "schema_field_id": sf.id},
                dry_run=dry_run,
            )

    transaction.commit_unless_managed()
Example #9
0
def update_aggregates(schema_id_or_slug, dry_run=False, reset=False):
    """
    Updates all Aggregate* tables for the given schema_id/slug,
    deleting/updating the existing records if necessary.

    If dry_run is True, then the records won't be updated -- only the SQL
    will be output.

    If reset is True, then all aggregates for this schema will be deleted before
    updating.
    """
    logger.info('... %s' % schema_id_or_slug)
    if not str(schema_id_or_slug).isdigit():
        schema_id = Schema.objects.get(slug=schema_id_or_slug).id
    else:
        schema_id = schema_id_or_slug
    cursor = connection.cursor()

    if reset and not dry_run:
        for aggmodel in (AggregateAll, AggregateDay, AggregateLocation,
                         AggregateLocationDay, AggregateFieldLookup):
            logger.info('... deleting all %s for schema %s' %
                        (aggmodel.__name__, schema_id_or_slug))
            aggmodel.objects.filter(schema__id=schema_id).delete()

    # AggregateAll
    cursor.execute("SELECT COUNT(*) FROM db_newsitem WHERE schema_id = %s",
                   (schema_id, ))
    new_values = [{'total': row[0]} for row in cursor.fetchall()]
    smart_update(cursor,
                 new_values,
                 AggregateAll._meta.db_table, ('total', ), (),
                 {'schema_id': schema_id},
                 dry_run=dry_run)

    # AggregateDay
    cursor.execute(
        """
        SELECT item_date, COUNT(*)
        FROM db_newsitem
        WHERE schema_id = %s
        GROUP BY 1""", (schema_id, ))
    new_values = [{
        'date_part': row[0],
        'total': row[1]
    } for row in cursor.fetchall()]
    smart_update(
        cursor,
        new_values,
        AggregateDay._meta.db_table,
        ('date_part', 'total'),
        ('date_part', ),
        {'schema_id': schema_id},
        dry_run=dry_run,
    )

    # AggregateLocationDay
    cursor.execute(
        """
        SELECT nl.location_id, ni.item_date, loc.location_type_id, COUNT(*)
        FROM db_newsitemlocation nl, db_newsitem ni, db_location loc
        WHERE nl.news_item_id = ni.id
            AND ni.schema_id = %s
            AND nl.location_id = loc.id
        GROUP BY 1, 2, 3""", (schema_id, ))
    new_values = [{
        'location_id': row[0],
        'date_part': row[1],
        'location_type_id': row[2],
        'total': row[3]
    } for row in cursor.fetchall()]
    smart_update(
        cursor,
        new_values,
        AggregateLocationDay._meta.db_table,
        ('location_id', 'date_part', 'location_type_id', 'total'),
        ('location_id', 'date_part', 'location_type_id'),
        {'schema_id': schema_id},
        dry_run=dry_run,
    )

    # AggregateLocation
    # This query is a bit clever -- we just sum up the totals created in a
    # previous aggregate. It's a helpful optimization, because otherwise
    # the location query is way too slow.
    # Note that we calculate the total for the last 30 days that had at least
    # one news item -- *NOT* the last 30 days, period.
    # We add date_part <= current_date here to keep sparse items in the future
    # from throwing off counts for the previous 30 days.
    cursor.execute("SELECT date_part FROM %s WHERE schema_id = %%s AND date_part <= current_date ORDER BY date_part DESC LIMIT 1" % \
        AggregateLocationDay._meta.db_table, (schema_id,))
    try:
        end_date = cursor.fetchone()[0]
    except TypeError:  # if cursor.fetchone() is None, there are no records.
        pass
    else:
        # Note that BETWEEN is inclusive on both ends, so to get
        # AggregateLocationDays for eg. 30 days, we'd need a timedelta of 29
        start_date = end_date - constants.DAYS_AGGREGATE_TIMEDELTA
        cursor.execute(
            """
            SELECT location_id, location_type_id, SUM(total)
            FROM %s
            WHERE schema_id = %%s
                AND date_part BETWEEN %%s AND %%s
            GROUP BY 1, 2""" % AggregateLocationDay._meta.db_table,
            (schema_id, start_date, end_date))
        new_values = [{
            'location_id': row[0],
            'location_type_id': row[1],
            'total': row[2]
        } for row in cursor.fetchall()]
        smart_update(
            cursor,
            new_values,
            AggregateLocation._meta.db_table,
            ('location_id', 'location_type_id', 'total'),
            ('location_id', 'location_type_id'),
            {'schema_id': schema_id},
            dry_run=dry_run,
        )

    for sf in SchemaField.objects.filter(schema__id=schema_id,
                                         is_filter=True,
                                         is_lookup=True):
        try:
            end_date = NewsItem.objects.filter(
                schema__id=schema_id, item_date__lte=today()).values_list(
                    'item_date', flat=True).order_by('-item_date')[0]
        except IndexError:
            continue  # There have been no NewsItems in the given date range.
        # Note BETWEEN is inclusive on both ends.
        start_date = end_date - constants.DAYS_AGGREGATE_TIMEDELTA

        if sf.is_many_to_many_lookup():
            # AggregateFieldLookup
            cursor.execute(
                """
                SELECT id, (
                    SELECT COUNT(*) FROM db_attribute a, db_newsitem ni
                    WHERE a.news_item_id = ni.id
                        AND a.schema_id = %%s
                        AND ni.schema_id = %%s
                        AND a.%s ~ ('[[:<:]]' || db_lookup.id || '[[:>:]]')
                        AND ni.item_date BETWEEN %%s AND %%s
                )
                FROM db_lookup
                WHERE schema_field_id = %%s""" % sf.real_name,
                (schema_id, schema_id, start_date, end_date, sf.id))
            new_values = [{
                'lookup_id': row[0],
                'total': row[1]
            } for row in cursor.fetchall()]
            smart_update(
                cursor,
                new_values,
                AggregateFieldLookup._meta.db_table,
                ('lookup_id', 'total'),
                ('lookup_id', ),
                {
                    'schema_id': schema_id,
                    'schema_field_id': sf.id
                },
                dry_run=dry_run,
            )
        else:
            # AggregateFieldLookup
            cursor.execute(
                """
                SELECT a.%s, COUNT(*)
                FROM db_attribute a, db_newsitem ni
                WHERE a.news_item_id = ni.id
                    AND a.schema_id = %%s
                    AND ni.schema_id = %%s
                    AND %s IS NOT NULL
                    AND ni.item_date BETWEEN %%s AND %%s
                GROUP BY 1""" % (sf.real_name, sf.real_name),
                (schema_id, schema_id, start_date, end_date))
            new_values = [{
                'lookup_id': row[0],
                'total': row[1]
            } for row in cursor.fetchall()]
            smart_update(cursor,
                         new_values,
                         AggregateFieldLookup._meta.db_table,
                         ('lookup_id', 'total'), ('lookup_id', ), {
                             'schema_id': schema_id,
                             'schema_field_id': sf.id
                         },
                         dry_run=dry_run)

    transaction.commit_unless_managed()