Esempio n. 1
0
def hotspotsRange_ts(start_time, stop_time, location, **kwargs):
    ''' Run ofver a range of timesteps at 5 minute intervals in between '''
    start = datetime.strptime(start_time, '%Y%m%d.%H%M%S')
    stop = datetime.strptime(stop_time, '%Y%m%d.%H%M%S')
    kwargs.update({'task_id': hotspotsRange.request.id})
    job = TaskSet(tasks=[ cybercomq.gis.hotspotpysal.hotspots.subtask(args=(ts,location), kwargs=kwargs, queue="gis", track_started=True) for ts in date_range(start,stop) ])
    job.apply_async(job)
    return '%s' % (hotspotsRange_ts.request.id)
Esempio n. 2
0
    def test_counter_taskset(self):
        increment_counter.count = 0
        ts = TaskSet(tasks=[
            increment_counter.s(),
            increment_counter.s(increment_by=2),
            increment_counter.s(increment_by=3),
            increment_counter.s(increment_by=4),
            increment_counter.s(increment_by=5),
            increment_counter.s(increment_by=6),
            increment_counter.s(increment_by=7),
            increment_counter.s(increment_by=8),
            increment_counter.s(increment_by=9),
        ])
        self.assertEqual(ts.total, 9)

        consumer = increment_counter.get_consumer()
        consumer.purge()
        consumer.close()
        taskset_res = ts.apply_async()
        subtasks = taskset_res.subtasks
        taskset_id = taskset_res.taskset_id
        consumer = increment_counter.get_consumer()
        for subtask in subtasks:
            m = consumer.queues[0].get().payload
            self.assertDictContainsSubset(
                {
                    'taskset': taskset_id,
                    'task': increment_counter.name,
                    'id': subtask.id
                }, m)
            increment_counter(
                increment_by=m.get('kwargs', {}).get('increment_by'))
        self.assertEqual(increment_counter.count, sum(range(1, 10)))
Esempio n. 3
0
    def test_counter_taskset(self):
        increment_counter.count = 0
        ts = TaskSet(tasks=[
            increment_counter.s(),
            increment_counter.s(increment_by=2),
            increment_counter.s(increment_by=3),
            increment_counter.s(increment_by=4),
            increment_counter.s(increment_by=5),
            increment_counter.s(increment_by=6),
            increment_counter.s(increment_by=7),
            increment_counter.s(increment_by=8),
            increment_counter.s(increment_by=9),
        ])
        self.assertEqual(ts.total, 9)

        consumer = increment_counter.get_consumer()
        consumer.purge()
        consumer.close()
        taskset_res = ts.apply_async()
        subtasks = taskset_res.subtasks
        taskset_id = taskset_res.taskset_id
        consumer = increment_counter.get_consumer()
        for subtask in subtasks:
            m = consumer.queues[0].get().payload
            self.assertDictContainsSubset({'taskset': taskset_id,
                                           'task': increment_counter.name,
                                           'id': subtask.id}, m)
            increment_counter(
                    increment_by=m.get('kwargs', {}).get('increment_by'))
        self.assertEqual(increment_counter.count, sum(xrange(1, 10)))
Esempio n. 4
0
    def run(self, xlsx_file, record=None, *args, **kwargs):
        self.record = record

        self.update_state(state="INITIALIZING", meta=self.progress)

        csv_metadata = self.convert_excel_to_csv(xlsx_file)
        for csv_filename, headers, types in csv_metadata:
            self.load_csv_into_db(csv_filename, headers, types)

        clear_db()

        self.translate_data()

        for meta in csv_metadata:
            self.drop_csv_table(meta[0])

        cursor = connection.cursor()
        cursor.execute(
            """
            SELECT DISTINCT postcode FROM advisers_location"""
        )
        postcodes = cursor.fetchall()

        self.total = len(postcodes)

        def chunks(n=1000):
            for i in xrange(0, len(postcodes), n):
                yield postcodes[i : i + n]

        self.update_count()

        tasks = []
        for chunk in chunks():
            t = GeocoderTask().subtask(args=(chunk,))
            tasks.append(t)
        ts = TaskSet(tasks=tasks)
        res = ts.apply_async()

        task_counts = {}
        task_errors = {}

        def update_task_process(task_id, result):
            task_counts[task_id] = result.get("count")
            task_errors[task_id] = result.get("errors")

        while res.completed_count() < len(tasks):
            [update_task_process(r.task_id, r.result) for r in res if r.result]

            count = sum(task_counts.values())
            errors = list(itertools.chain(*task_errors.values()))
            self.update_count(count, errors)
            time.sleep(1)

        cache.clear()
    def handle(self, *args, **options):
        docs = RECAPDocument.objects.exclude(filepath_local='')

        if options['skip_ocr']:
            # Focus on the items that we don't know if they need OCR.
            docs = docs.filter(ocr_status=None)
        else:
            # We're doing OCR. Only work with those items that require it.
            docs = docs.filter(ocr_status=RECAPDocument.OCR_NEEDED)

        count = docs.count()
        print("There are %s documents to process." % count)

        if options.get('order') is not None:
            if options['order'] == 'small-first':
                docs = docs.order_by('page_count')
            elif options['order'] == 'big-first':
                docs = docs.order_by('-page_count')

        subtasks = []
        completed = 0
        for pk in docs.values_list('pk', flat=True):
            # Send the items off for processing.
            last_item = (count == completed)
            subtasks.append(extract_recap_pdf.subtask(
                (pk, options['skip_ocr']),
                priority=5,
                queue=options['queue']
            ))

            # Every enqueue_length items, send the subtasks to Celery.
            if (len(subtasks) >= options['queue_length']) or last_item:
                msg = ("Sent %s subtasks to celery. We have sent %s "
                       "items so far." % (len(subtasks), completed + 1))
                logger.info(msg)
                print(msg)
                job = TaskSet(tasks=subtasks)
                job.apply_async().join()
                subtasks = []

            completed += 1
Esempio n. 6
0
def encode_video(cls, video_pk):
    '''
    Task to encode a ``Video`` into one ore more ``VideoFile``'s.
    '''
    try:
        video_obj = cls.objects.get(pk=video_pk)
    except cls.DoesNotExist:
        # video was removed
        return

    filecls = video_obj.videofile_set.model

    files = list(video_obj.videofile_set.all())

    if video_obj.is_encoded and all(vfile.is_encoded for vfile in files):
        # video has been processed
        return

    video = video_obj.video

    # a video is required to meet requirments
    # to be encoded in specific a resolution
    if len(files) == 0:
        for setting in VideoSetting.objects.all():
            if video.width >= setting.width or video.height >= setting.height:
                vfile = filecls.objects.create(original=video_obj,\
                        format=setting.format,\
                        width=setting.width, height=setting.height)
                files.append(vfile)
                log.info('%r can be encoded to %r' % (video, setting))

    tasks = []
    for vfile in files:
        if vfile.is_encoded:
            continue
        if vfile.width > 600 or vfile.height > 300:
            encode_video_file.delay(filecls, vfile.pk)
        else:
            task = encode_video_file_quick.subtask(args=(filecls, vfile.pk))
            tasks.append(task)

    # create a taskset of the quick encodings
    job = TaskSet(tasks=tasks)
    result = job.apply_async()
    result.save()

    # start the publish_video callback when the taskset completes
    callback = publish_video.subtask(args=(cls, video_pk))
    # max_tries has to be set otherwise it uses the default
    # check every 60 seconds if the set has completed
    join_taskset.delay(result.taskset_id, callback, interval=60, max_retries=300, propagate=False)
def make_download_tasks(data, line_count, start_line):
    """For every item in the CSV, send it to Celery for processing"""
    previous_casenum = None
    subtasks = []
    completed = 0
    for index, item in data.iterrows():
        if completed < start_line - 1:
            # Skip ahead if start_lines is provided.
            completed += 1
            continue

        if item['casenum'] != previous_casenum:
            # New case, get the docket before getting the pdf
            logger.info("New docket found with casenum: %s" % item['casenum'])
            previous_casenum = item['casenum']
            filename = get_docket_filename(item['court'], item['casenum'])
            url = get_docketxml_url(item['court'], item['casenum'])
            subtasks.append(download_recap_item.subtask((url, filename)))

        # Get the document
        filename = get_document_filename(item['court'], item['casenum'],
                                         item['docnum'], item['subdocnum'])
        url = get_pdf_url(item['court'], item['casenum'], filename)
        subtasks.append(download_recap_item.subtask((url, filename)))

        # Every n items or on the last item, send the subtasks to Celery.
        last_item = (line_count == completed + 1)
        if (len(subtasks) >= 1000) or last_item:
            msg = ("Sent %s subtasks to celery. We have processed %s "
                   "rows so far." % (len(subtasks), completed + 1))
            logger.info(msg)
            print msg
            job = TaskSet(tasks=subtasks)
            job.apply_async().join()
            subtasks = []

        completed += 1
def make_download_tasks(data, line_count, start_line):
    """For every item in the CSV, send it to Celery for processing"""
    previous_casenum = None
    subtasks = []
    completed = 0
    for index, item in data.iterrows():
        if completed < start_line - 1:
            # Skip ahead if start_lines is provided.
            completed += 1
            continue

        last_item = (line_count == completed + 1)
        if item['casenum'] != previous_casenum:
            # New case, get the docket before getting the pdf
            logger.info("New docket found with casenum: %s" % item['casenum'])
            previous_casenum = item['casenum']
            filename = get_docket_filename(item['court'], item['casenum'])
            url = get_docketxml_url(item['court'], item['casenum'])
            subtasks.append(download_recap_item.subtask((url, filename)))

        # Get the document
        filename = get_document_filename(item['court'], item['casenum'],
                                         item['docnum'], item['subdocnum'])
        url = get_pdf_url(item['court'], item['casenum'], filename)
        subtasks.append(download_recap_item.subtask((url, filename)))

        # Every n items send the subtasks to Celery.
        if (len(subtasks) >= 1000) or last_item:
            msg = ("Sent %s subtasks to celery. We have processed %s "
                   "rows so far." % (len(subtasks), completed + 1))
            logger.info(msg)
            print msg
            job = TaskSet(tasks=subtasks)
            job.apply_async().join()
            subtasks = []

        completed += 1
Esempio n. 9
0
 def test_function_taskset(self):
     subtasks = [return_True_task.s(i) for i in range(1, 6)]
     ts = TaskSet(subtasks)
     res = ts.apply_async()
     self.assertListEqual(res.join(), [True, True, True, True, True])
Esempio n. 10
0
 def test_function_taskset(self):
     with eager_tasks(self.app):
         subtasks = [return_True_task.s(i) for i in range(1, 6)]
         ts = TaskSet(subtasks)
         res = ts.apply_async()
         self.assertListEqual(res.join(), [True, True, True, True, True])