Esempio n. 1
0
    def test_counter_taskset(self):
        increment_counter.count = 0
        ts = TaskSet(tasks=[
            increment_counter.s(),
            increment_counter.s(increment_by=2),
            increment_counter.s(increment_by=3),
            increment_counter.s(increment_by=4),
            increment_counter.s(increment_by=5),
            increment_counter.s(increment_by=6),
            increment_counter.s(increment_by=7),
            increment_counter.s(increment_by=8),
            increment_counter.s(increment_by=9),
        ])
        self.assertEqual(ts.total, 9)

        consumer = increment_counter.get_consumer()
        consumer.purge()
        consumer.close()
        taskset_res = ts.apply_async()
        subtasks = taskset_res.subtasks
        taskset_id = taskset_res.taskset_id
        consumer = increment_counter.get_consumer()
        for subtask in subtasks:
            m = consumer.queues[0].get().payload
            self.assertDictContainsSubset({'taskset': taskset_id,
                                           'task': increment_counter.name,
                                           'id': subtask.id}, m)
            increment_counter(
                    increment_by=m.get('kwargs', {}).get('increment_by'))
        self.assertEqual(increment_counter.count, sum(xrange(1, 10)))
Esempio n. 2
0
    def test_counter_taskset(self):
        increment_counter.count = 0
        ts = TaskSet(tasks=[
            increment_counter.s(),
            increment_counter.s(increment_by=2),
            increment_counter.s(increment_by=3),
            increment_counter.s(increment_by=4),
            increment_counter.s(increment_by=5),
            increment_counter.s(increment_by=6),
            increment_counter.s(increment_by=7),
            increment_counter.s(increment_by=8),
            increment_counter.s(increment_by=9),
        ])
        self.assertEqual(ts.total, 9)

        consumer = increment_counter.get_consumer()
        consumer.purge()
        consumer.close()
        taskset_res = ts.apply_async()
        subtasks = taskset_res.subtasks
        taskset_id = taskset_res.taskset_id
        consumer = increment_counter.get_consumer()
        for subtask in subtasks:
            m = consumer.queues[0].get().payload
            self.assertDictContainsSubset(
                {
                    'taskset': taskset_id,
                    'task': increment_counter.name,
                    'id': subtask.id
                }, m)
            increment_counter(
                increment_by=m.get('kwargs', {}).get('increment_by'))
        self.assertEqual(increment_counter.count, sum(range(1, 10)))
Esempio n. 3
0
def hotspotsRange_ts(start_time, stop_time, location, **kwargs):
    ''' Run ofver a range of timesteps at 5 minute intervals in between '''
    start = datetime.strptime(start_time, '%Y%m%d.%H%M%S')
    stop = datetime.strptime(stop_time, '%Y%m%d.%H%M%S')
    kwargs.update({'task_id': hotspotsRange.request.id})
    job = TaskSet(tasks=[ cybercomq.gis.hotspotpysal.hotspots.subtask(args=(ts,location), kwargs=kwargs, queue="gis", track_started=True) for ts in date_range(start,stop) ])
    job.apply_async(job)
    return '%s' % (hotspotsRange_ts.request.id)
Esempio n. 4
0
    def run(self, xlsx_file, record=None, *args, **kwargs):
        self.record = record

        self.update_state(state="INITIALIZING", meta=self.progress)

        csv_metadata = self.convert_excel_to_csv(xlsx_file)
        for csv_filename, headers, types in csv_metadata:
            self.load_csv_into_db(csv_filename, headers, types)

        clear_db()

        self.translate_data()

        for meta in csv_metadata:
            self.drop_csv_table(meta[0])

        cursor = connection.cursor()
        cursor.execute(
            """
            SELECT DISTINCT postcode FROM advisers_location"""
        )
        postcodes = cursor.fetchall()

        self.total = len(postcodes)

        def chunks(n=1000):
            for i in xrange(0, len(postcodes), n):
                yield postcodes[i : i + n]

        self.update_count()

        tasks = []
        for chunk in chunks():
            t = GeocoderTask().subtask(args=(chunk,))
            tasks.append(t)
        ts = TaskSet(tasks=tasks)
        res = ts.apply_async()

        task_counts = {}
        task_errors = {}

        def update_task_process(task_id, result):
            task_counts[task_id] = result.get("count")
            task_errors[task_id] = result.get("errors")

        while res.completed_count() < len(tasks):
            [update_task_process(r.task_id, r.result) for r in res if r.result]

            count = sum(task_counts.values())
            errors = list(itertools.chain(*task_errors.values()))
            self.update_count(count, errors)
            time.sleep(1)

        cache.clear()
Esempio n. 5
0
def encode_video(cls, video_pk):
    '''
    Task to encode a ``Video`` into one ore more ``VideoFile``'s.
    '''
    try:
        video_obj = cls.objects.get(pk=video_pk)
    except cls.DoesNotExist:
        # video was removed
        return

    filecls = video_obj.videofile_set.model

    files = list(video_obj.videofile_set.all())

    if video_obj.is_encoded and all(vfile.is_encoded for vfile in files):
        # video has been processed
        return

    video = video_obj.video

    # a video is required to meet requirments
    # to be encoded in specific a resolution
    if len(files) == 0:
        for setting in VideoSetting.objects.all():
            if video.width >= setting.width or video.height >= setting.height:
                vfile = filecls.objects.create(original=video_obj,\
                        format=setting.format,\
                        width=setting.width, height=setting.height)
                files.append(vfile)
                log.info('%r can be encoded to %r' % (video, setting))

    tasks = []
    for vfile in files:
        if vfile.is_encoded:
            continue
        if vfile.width > 600 or vfile.height > 300:
            encode_video_file.delay(filecls, vfile.pk)
        else:
            task = encode_video_file_quick.subtask(args=(filecls, vfile.pk))
            tasks.append(task)

    # create a taskset of the quick encodings
    job = TaskSet(tasks=tasks)
    result = job.apply_async()
    result.save()

    # start the publish_video callback when the taskset completes
    callback = publish_video.subtask(args=(cls, video_pk))
    # max_tries has to be set otherwise it uses the default
    # check every 60 seconds if the set has completed
    join_taskset.delay(result.taskset_id, callback, interval=60, max_retries=300, propagate=False)
Esempio n. 6
0
    def test_run(self):
        class Chord(chords.Chord):
            backend = Mock()

        body = dict()
        Chord()(TaskSet(add.subtask((i, i)) for i in xrange(5)), body)
        Chord()([add.subtask((i, i)) for i in xrange(5)], body)
        self.assertEqual(Chord.backend.on_chord_apply.call_count, 2)
    def handle(self, *args, **options):
        docs = RECAPDocument.objects.exclude(filepath_local='')

        if options['skip_ocr']:
            # Focus on the items that we don't know if they need OCR.
            docs = docs.filter(ocr_status=None)
        else:
            # We're doing OCR. Only work with those items that require it.
            docs = docs.filter(ocr_status=RECAPDocument.OCR_NEEDED)

        count = docs.count()
        print("There are %s documents to process." % count)

        if options.get('order') is not None:
            if options['order'] == 'small-first':
                docs = docs.order_by('page_count')
            elif options['order'] == 'big-first':
                docs = docs.order_by('-page_count')

        subtasks = []
        completed = 0
        for pk in docs.values_list('pk', flat=True):
            # Send the items off for processing.
            last_item = (count == completed)
            subtasks.append(extract_recap_pdf.subtask(
                (pk, options['skip_ocr']),
                priority=5,
                queue=options['queue']
            ))

            # Every enqueue_length items, send the subtasks to Celery.
            if (len(subtasks) >= options['queue_length']) or last_item:
                msg = ("Sent %s subtasks to celery. We have sent %s "
                       "items so far." % (len(subtasks), completed + 1))
                logger.info(msg)
                print(msg)
                job = TaskSet(tasks=subtasks)
                job.apply_async().join()
                subtasks = []

            completed += 1
Esempio n. 8
0
def _task_set_wrapper(inner_task, iterable):
    """

    :param inner_task:
    :param iterable:
    :return:
    """
    if not iterable:
        return

    tasks = (inner_task.si(chunk) for chunk in chunks(iter(iterable), 10))
    return TaskSet(tasks).apply_async()
Esempio n. 9
0
    def test_run(self):
        prev, current_app.backend = current_app.backend, Mock()
        current_app.backend.cleanup = Mock()
        current_app.backend.cleanup.__name__ = 'cleanup'
        try:
            Chord = current_app.tasks['celery.chord']

            body = dict()
            Chord(TaskSet(add.subtask((i, i)) for i in xrange(5)), body)
            Chord([add.subtask((i, i)) for i in xrange(5)], body)
            self.assertEqual(current_app.backend.on_chord_apply.call_count, 2)
        finally:
            current_app.backend = prev
Esempio n. 10
0
def make_download_tasks(data, line_count, start_line):
    """For every item in the CSV, send it to Celery for processing"""
    previous_casenum = None
    subtasks = []
    completed = 0
    for index, item in data.iterrows():
        if completed < start_line - 1:
            # Skip ahead if start_lines is provided.
            completed += 1
            continue

        if item['casenum'] != previous_casenum:
            # New case, get the docket before getting the pdf
            logger.info("New docket found with casenum: %s" % item['casenum'])
            previous_casenum = item['casenum']
            filename = get_docket_filename(item['court'], item['casenum'])
            url = get_docketxml_url(item['court'], item['casenum'])
            subtasks.append(download_recap_item.subtask((url, filename)))

        # Get the document
        filename = get_document_filename(item['court'], item['casenum'],
                                         item['docnum'], item['subdocnum'])
        url = get_pdf_url(item['court'], item['casenum'], filename)
        subtasks.append(download_recap_item.subtask((url, filename)))

        # Every n items or on the last item, send the subtasks to Celery.
        last_item = (line_count == completed + 1)
        if (len(subtasks) >= 1000) or last_item:
            msg = ("Sent %s subtasks to celery. We have processed %s "
                   "rows so far." % (len(subtasks), completed + 1))
            logger.info(msg)
            print msg
            job = TaskSet(tasks=subtasks)
            job.apply_async().join()
            subtasks = []

        completed += 1
Esempio n. 11
0
def make_download_tasks(data, line_count, start_line):
    """For every item in the CSV, send it to Celery for processing"""
    previous_casenum = None
    subtasks = []
    completed = 0
    for index, item in data.iterrows():
        if completed < start_line - 1:
            # Skip ahead if start_lines is provided.
            completed += 1
            continue

        last_item = (line_count == completed + 1)
        if item['casenum'] != previous_casenum:
            # New case, get the docket before getting the pdf
            logger.info("New docket found with casenum: %s" % item['casenum'])
            previous_casenum = item['casenum']
            filename = get_docket_filename(item['court'], item['casenum'])
            url = get_docketxml_url(item['court'], item['casenum'])
            subtasks.append(download_recap_item.subtask((url, filename)))

        # Get the document
        filename = get_document_filename(item['court'], item['casenum'],
                                         item['docnum'], item['subdocnum'])
        url = get_pdf_url(item['court'], item['casenum'], filename)
        subtasks.append(download_recap_item.subtask((url, filename)))

        # Every n items send the subtasks to Celery.
        if (len(subtasks) >= 1000) or last_item:
            msg = ("Sent %s subtasks to celery. We have processed %s "
                   "rows so far." % (len(subtasks), completed + 1))
            logger.info(msg)
            print msg
            job = TaskSet(tasks=subtasks)
            job.apply_async().join()
            subtasks = []

        completed += 1
Esempio n. 12
0
 def test_named_taskset(self):
     prefix = 'test_named_taskset-'
     ts = TaskSet([return_True_task.subtask([1])])
     res = ts.apply(taskset_id=prefix + uuid())
     self.assertTrue(res.taskset_id.startswith(prefix))
Esempio n. 13
0
 def test_function_taskset(self):
     with eager_tasks(self.app):
         subtasks = [return_True_task.s(i) for i in range(1, 6)]
         ts = TaskSet(subtasks)
         res = ts.apply_async()
         self.assertListEqual(res.join(), [True, True, True, True, True])
Esempio n. 14
0
def process_lots_of_items(ids_to_process):
    return TaskSet(
        process_chunk.subtask((chunk, ))
        for chunk in chunks(iter(ids_to_process), 25)).apply_async()
Esempio n. 15
0
 def test_function_taskset(self):
     subtasks = [return_True_task.s(i) for i in range(1, 6)]
     ts = TaskSet(subtasks)
     res = ts.apply_async()
     self.assertListEqual(res.join(), [True, True, True, True, True])
Esempio n. 16
0
    def run(self, image_pk, destination_image_format, image_filepath, **kwargs):
        logger = self.get_logger(**kwargs)
        task_id = kwargs.get("task_id", "")
        logger.info("Starting request %s. image_pk: %s, destination_image_format: %s, image_filepath: %s" %
                (task_id,
                 image_pk,
                 destination_image_format,
                 image_filepath))
        try:
            # -----------------------------------------------------------------
            #   Resize the image into large, medium, and small images.
            # -----------------------------------------------------------------
            logger.info("Starting resize tasks...")
            resize_tasks = TaskSet(tasks=[
                ResizeImageTask.subtask((image_filepath, destination_image_format, Image.image_xlarge_dimensions)),
                ResizeImageTask.subtask((image_filepath, destination_image_format, Image.image_large_dimensions)),
                ResizeImageTask.subtask((image_filepath, destination_image_format, Image.image_medium_dimensions)),
                ResizeImageTask.subtask((image_filepath, destination_image_format, Image.image_small_dimensions)),
            ])
            resize_tasks_results = resize_tasks.apply()
            if not resize_tasks_results.successful():
                logger.error("Error during resize tasks, bail out.")
                for result in resize_tasks_results:
                    if result.exception:
                        raise result.exception
            resize_tasks_results = [unicode(result) for result in resize_tasks_results]
            logger.info("Resize tasks done:\n%s." % (pprint.pformat(resize_tasks_results), ))
            # -----------------------------------------------------------------

            # -----------------------------------------------------------------
            #   Save each of the resized images to S3.
            # -----------------------------------------------------------------
            logger.info("Starting save tasks...")
            save_tasks = TaskSet(tasks=[
                SaveImageDataTask.subtask((resize_tasks_results[0], image_pk, "image_xlarge")),
                SaveImageDataTask.subtask((resize_tasks_results[1], image_pk, "image_large")),
                SaveImageDataTask.subtask((resize_tasks_results[2], image_pk, "image_medium")),
                SaveImageDataTask.subtask((resize_tasks_results[3], image_pk, "image_small")),
            ])
            save_tasks_results = save_tasks.apply()
            if not save_tasks_results.successful():
                logger.error("Error during save tasks, bail out.")
                for result in save_tasks_results:
                    if result.exception:
                        raise result.exception
            #save_tasks_results = [result.get() for result in save_tasks_results]
            logger.info("Save tasks done\n%s" % (pprint.pformat(save_tasks_results), ))
            # -----------------------------------------------------------------

            # -----------------------------------------------------------------
            #   Mark the Image object as being uploaded.
            # -----------------------------------------------------------------
            image = Image.objects.get(pk=image_pk)
            image.is_uploaded = True
            image.save()

            # Clean up the original temporary file and the other three new
            # temporary files created for the resized images.
            for filepath in resize_tasks_results + [image_filepath]:
                logger.info("Deleting file %s" % filepath)
                os.remove(filepath)
            # -----------------------------------------------------------------
        except Exception, exc:
            logger.exception("HandleImageUploadTask_%s: unhandled exception." % (task_id, ))
            self.retry(exc=exc,
                       args=(image_pk, destination_image_format, image_filepath),
                       kwargs=kwargs)
Esempio n. 17
0
def A():
    return TaskSet(B.subtask((c, )) for c in "ABCDEFGH").apply_async()
Esempio n. 18
0
 def test_named_taskset(self):
     prefix = 'test_named_taskset-'
     ts = TaskSet([return_True_task.subtask([1])])
     res = ts.apply(taskset_id=prefix + uuid())
     self.assertTrue(res.taskset_id.startswith(prefix))