def test_january(self): self.assertEqual( date(2018, 1, 1), get_start_of_quarter(date(2018, 1, 1)) ) self.assertEqual( date(2018, 1, 1), get_start_of_quarter(date(2018, 1, 10)) )
def test_january(self): self.assertEqual( date(2018, 1, 1), get_start_of_quarter(date(2018, 1, 1)) ) self.assertEqual( date(2018, 1, 1), get_start_of_quarter(date(2018, 1, 10)) )
def upload_recap_data(options): """Upload RECAP data to Internet Archive.""" q = options["queue"] database = options["database"] r = make_redis_interface("CACHE") redis_key = "recap-docket-last-id" last_pk = r.getset(redis_key, 0) ds = (Docket.objects.filter( Q(ia_upload_failure_count__lte=3) | Q(ia_upload_failure_count=None), ia_needs_upload=True, source__in=Docket.RECAP_SOURCES, pk__gt=last_pk, ).order_by("pk").only("pk")) chunk_size = 100 # Small to save memory i = 0 previous_i = None delay_count = 0 t1 = now() logger.info("Sending recap dockets to Internet Archive") throttle = CeleryThrottle(queue_name=q, min_items=5) while True: # Start of quarter needs to be re-analyzed every time through the loop. # This ensures that if the quarter changes while this runs, we get the # new value. params = { "pk__gt": last_pk, "ia_date_first_change__lt": get_start_of_quarter(), } for d in ds.filter(**params)[:chunk_size]: throttle.maybe_wait() upload_recap_json.apply_async(args=(d.pk, database), queue=q) i += 1 if i % 100 == 0: # Print a useful log line with expected finish date. t2 = now() elapsed_minutes = float((t2 - t1).seconds) / 60 try: rate = i / float(elapsed_minutes) logger.info("Uploaded %s dockets to IA so far (%.01f/m)", i, rate) except ZeroDivisionError: # First lap through can be completed in less than 1s. pass last_pk = d.pk r.set(redis_key, last_pk) # Detect if we've hit the end of the loop and reset it if so. We do # this by keeping track of the last_pk that we saw the last time the # for loop changed. If that PK doesn't change after the for loop has # run again, then we know we've hit the end of the loop and we should # reset it. empty_loop = i == previous_i if empty_loop: # i is the same as the last time the # for loop finished. Reset things. if last_pk == 0: # We went through the for loop a second time and still didn't # do anything. Stall with capped back off. delay_count += 1 max_delay = 60 * 30 # Thirty minutes delay = min(delay_count * 60, max_delay) time.sleep(delay) else: delay_count = 0 last_pk = 0 r.set(redis_key, 0) else: previous_i = i
def upload_recap_data(options): """Upload RECAP data to Internet Archive.""" q = options['queue'] database = options['database'] r = redis.StrictRedis(host=settings.REDIS_HOST, port=settings.REDIS_PORT, db=settings.REDIS_DATABASES['CACHE']) redis_key = 'recap-docket-last-id' last_pk = r.getset(redis_key, 0) ds = Docket.objects.filter( Q(ia_upload_failure_count__lte=3) | Q(ia_upload_failure_count=None), ia_needs_upload=True, source__in=Docket.RECAP_SOURCES, pk__gt=last_pk, ).order_by('pk').only('pk') chunk_size = 100 # Small to save memory i = 0 previous_i = None delay_count = 0 t1 = now() logger.info("Sending recap dockets to Internet Archive") throttle = CeleryThrottle(queue_name=q, min_items=5) while True: # Start of quarter needs to be re-analyzed every time through the loop. # This ensures that if the quarter changes while this runs, we get the # new value. params = { 'pk__gt': last_pk, 'ia_date_first_change__lt': get_start_of_quarter(), } for d in ds.filter(**params)[:chunk_size]: throttle.maybe_wait() upload_recap_json.apply_async(args=(d.pk, database), queue=q) i += 1 if i % 100 == 0: # Print a useful log line with expected finish date. t2 = now() elapsed_minutes = float((t2 - t1).seconds) / 60 try: rate = i / float(elapsed_minutes) logger.info("Uploaded %s dockets to IA so far (%.01f/m)", i, rate) except ZeroDivisionError: # First lap through can be completed in less than 1s. pass last_pk = d.pk r.set(redis_key, last_pk) # Detect if we've hit the end of the loop and reset it if so. We do # this by keeping track of the last_pk that we saw the last time the # for loop changed. If that PK doesn't change after the for loop has # run again, then we know we've hit the end of the loop and we should # reset it. empty_loop = i == previous_i if empty_loop: # i is the same as the last time the # for loop finished. Reset things. if last_pk == 0: # We went through the for loop a second time and still didn't # do anything. Stall with capped back off. delay_count += 1 max_delay = 60 * 30 # Thirty minutes delay = min(delay_count * 60, max_delay) time.sleep(delay) else: delay_count = 0 last_pk = 0 r.set(redis_key, 0) else: previous_i = i
def test_december(self): self.assertEqual(date(2018, 10, 1), get_start_of_quarter(date(2018, 12, 1)))
def test_december(self): self.assertEqual( date(2018, 10, 1), get_start_of_quarter(date(2018, 12, 1)) )