Beispiel #1
0
 def test_january(self):
     self.assertEqual(
         date(2018, 1, 1), get_start_of_quarter(date(2018, 1, 1))
     )
     self.assertEqual(
         date(2018, 1, 1), get_start_of_quarter(date(2018, 1, 10))
     )
Beispiel #2
0
 def test_january(self):
     self.assertEqual(
         date(2018, 1, 1),
         get_start_of_quarter(date(2018, 1, 1))
     )
     self.assertEqual(
         date(2018, 1, 1),
         get_start_of_quarter(date(2018, 1, 10))
     )
def upload_recap_data(options):
    """Upload RECAP data to Internet Archive."""
    q = options["queue"]
    database = options["database"]
    r = make_redis_interface("CACHE")
    redis_key = "recap-docket-last-id"
    last_pk = r.getset(redis_key, 0)
    ds = (Docket.objects.filter(
        Q(ia_upload_failure_count__lte=3)
        | Q(ia_upload_failure_count=None),
        ia_needs_upload=True,
        source__in=Docket.RECAP_SOURCES,
        pk__gt=last_pk,
    ).order_by("pk").only("pk"))

    chunk_size = 100  # Small to save memory
    i = 0
    previous_i = None
    delay_count = 0
    t1 = now()
    logger.info("Sending recap dockets to Internet Archive")
    throttle = CeleryThrottle(queue_name=q, min_items=5)
    while True:
        # Start of quarter needs to be re-analyzed every time through the loop.
        # This ensures that if the quarter changes while this runs, we get the
        # new value.
        params = {
            "pk__gt": last_pk,
            "ia_date_first_change__lt": get_start_of_quarter(),
        }
        for d in ds.filter(**params)[:chunk_size]:
            throttle.maybe_wait()
            upload_recap_json.apply_async(args=(d.pk, database), queue=q)
            i += 1
            if i % 100 == 0:
                # Print a useful log line with expected finish date.
                t2 = now()
                elapsed_minutes = float((t2 - t1).seconds) / 60
                try:
                    rate = i / float(elapsed_minutes)
                    logger.info("Uploaded %s dockets to IA so far (%.01f/m)",
                                i, rate)
                except ZeroDivisionError:
                    # First lap through can be completed in less than 1s.
                    pass
            last_pk = d.pk
            r.set(redis_key, last_pk)

        # Detect if we've hit the end of the loop and reset it if so. We do
        # this by keeping track of the last_pk that we saw the last time the
        # for loop changed. If that PK doesn't change after the for loop has
        # run again, then we know we've hit the end of the loop and we should
        # reset it.
        empty_loop = i == previous_i
        if empty_loop:
            # i is the same as the last time the
            # for loop finished. Reset things.
            if last_pk == 0:
                # We went through the for loop a second time and still didn't
                # do anything. Stall with capped back off.
                delay_count += 1
                max_delay = 60 * 30  # Thirty minutes
                delay = min(delay_count * 60, max_delay)
                time.sleep(delay)
            else:
                delay_count = 0
                last_pk = 0
                r.set(redis_key, 0)
        else:
            previous_i = i
def upload_recap_data(options):
    """Upload RECAP data to Internet Archive."""
    q = options['queue']
    database = options['database']
    r = redis.StrictRedis(host=settings.REDIS_HOST,
                          port=settings.REDIS_PORT,
                          db=settings.REDIS_DATABASES['CACHE'])
    redis_key = 'recap-docket-last-id'
    last_pk = r.getset(redis_key, 0)
    ds = Docket.objects.filter(
        Q(ia_upload_failure_count__lte=3) | Q(ia_upload_failure_count=None),
        ia_needs_upload=True,
        source__in=Docket.RECAP_SOURCES,
        pk__gt=last_pk,
    ).order_by('pk').only('pk')

    chunk_size = 100  # Small to save memory
    i = 0
    previous_i = None
    delay_count = 0
    t1 = now()
    logger.info("Sending recap dockets to Internet Archive")
    throttle = CeleryThrottle(queue_name=q, min_items=5)
    while True:
        # Start of quarter needs to be re-analyzed every time through the loop.
        # This ensures that if the quarter changes while this runs, we get the
        # new value.
        params = {
            'pk__gt': last_pk,
            'ia_date_first_change__lt': get_start_of_quarter(),
        }
        for d in ds.filter(**params)[:chunk_size]:
            throttle.maybe_wait()
            upload_recap_json.apply_async(args=(d.pk, database), queue=q)
            i += 1
            if i % 100 == 0:
                # Print a useful log line with expected finish date.
                t2 = now()
                elapsed_minutes = float((t2 - t1).seconds) / 60
                try:
                    rate = i / float(elapsed_minutes)
                    logger.info("Uploaded %s dockets to IA so far (%.01f/m)",
                                i, rate)
                except ZeroDivisionError:
                    # First lap through can be completed in less than 1s.
                    pass
            last_pk = d.pk
            r.set(redis_key, last_pk)

        # Detect if we've hit the end of the loop and reset it if so. We do
        # this by keeping track of the last_pk that we saw the last time the
        # for loop changed. If that PK doesn't change after the for loop has
        # run again, then we know we've hit the end of the loop and we should
        # reset it.
        empty_loop = i == previous_i
        if empty_loop:
            # i is the same as the last time the
            # for loop finished. Reset things.
            if last_pk == 0:
                # We went through the for loop a second time and still didn't
                # do anything. Stall with capped back off.
                delay_count += 1
                max_delay = 60 * 30  # Thirty minutes
                delay = min(delay_count * 60, max_delay)
                time.sleep(delay)
            else:
                delay_count = 0
                last_pk = 0
                r.set(redis_key, 0)
        else:
            previous_i = i
Beispiel #5
0
 def test_december(self):
     self.assertEqual(date(2018, 10, 1),
                      get_start_of_quarter(date(2018, 12, 1)))
Beispiel #6
0
 def test_december(self):
     self.assertEqual(
         date(2018, 10, 1),
         get_start_of_quarter(date(2018, 12, 1))
     )