def _initiate_subbasin_mapshed_job_chain(mapshed_input, job_id): errback = save_job_error.s(job_id) area_of_interest, wkaoi = _parse_input(mapshed_input) if not wkaoi: raise ValidationError('You must provide the `wkaoi` key: ' + 'a HUC id is currently required for ' + 'subbasin modeling.') [layer_code, shape_id] = wkaoi.split('__') if layer_code not in ['huc8', 'huc10']: raise ValidationError('Only HUC-08s and HUC-10s are valid for ' + 'subbasin modeling.') huc12s = split_into_huc12s(layer_code, shape_id) if not huc12s: raise EmptyResultSet('No subbasins found') job_chain = (multi_subbasin(area_of_interest, huc12s) | collect_subbasin.s(huc12s) | tasks.subbasin_results_to_dict.s() | save_job_result.s(job_id, mapshed_input)) return job_chain.apply_async(link_error=errback)
def start_celery_job(task_list, job_input, user=None): """ Given a list of Celery tasks and it's input, starts a Celery async job with those tasks, adds save_job_result and save_job_error handlers, and returns the job's id which is used to query status and retrieve results via get_job :param task_list: A list of Celery tasks to execute. Is made into a chain :param job_input: Input to the first task, used in recording started jobs :param user: The user requesting the job. Optional. :return: A Response contianing the job id, marked as 'started' """ created = now() job = Job.objects.create(created_at=created, result='', error='', traceback='', user=user, status='started') success = save_job_result.s(job.id, job_input) error = save_job_error.s(job.id) task_list.append(success) task_chain = chain(task_list).apply_async(link_error=error) job.uuid = task_chain.id job.save() return Response({ 'job': task_chain.id, 'status': 'started', })
def _initiate_subbasin_mapshed_job_chain(mapshed_input, job_id): errback = save_job_error.s(job_id) area_of_interest, wkaoi = _parse_input(mapshed_input) if not wkaoi: raise ValidationError('You must provide the `wkaoi` key: ' + 'a HUC id is currently required for ' + 'subbasin modeling.') [layer_code, shape_id] = wkaoi.split('__') if layer_code not in ['huc8', 'huc10']: raise ValidationError('Only HUC-08s and HUC-10s are valid for ' + 'subbasin modeling.') huc12s = split_into_huc12s(layer_code, shape_id) if not huc12s: raise EmptyResultSet('No subbasins found') huc12_job_chains = [] for (huc12_id, huc12, huc12_aoi) in huc12s: huc12_wkaoi = 'huc12__{id}'.format(id=huc12_id) huc12_job_chains.append( chain( (group(geoprocessing_chains(huc12_aoi, huc12_wkaoi, errback)) | collect_data.s(huc12_aoi, huc12).set(link_error=errback)))) return chain( group(huc12_job_chains) | tasks.subbasin_results_to_dict.s() | save_job_result.s(job_id, mapshed_input)).apply_async()
def _initiate_gwlfe_job_chain(model_input, inputmod_hash, job_id): chain = (tasks.run_gwlfe.s(model_input, inputmod_hash) | save_job_result.s(job_id, model_input)) errback = save_job_error.s(job_id) return chain.apply_async(link_error=errback)
def _initiate_rwd_job_chain(location, snapping, job_id, testing=False): exchange = MAGIC_EXCHANGE routing_key = choose_worker() return chain(tasks.start_rwd_job.s(location, snapping) .set(exchange=exchange, routing_key=routing_key), save_job_result.s(job_id, location)) \ .apply_async(link_error=save_job_error.s(job_id))
def _initiate_mapshed_job_chain(input, job_id): exchange = MAGIC_EXCHANGE routing_key = choose_worker() return chain(tasks.start_mapshed_job.s(input) .set(exchange=exchange, routing_key=routing_key), save_job_result.s(job_id, input)) \ .apply_async(link_error=save_job_error.s(job_id))
def _initiate_mapshed_job_chain(mapshed_input, job_id): errback = save_job_error.s(job_id) area_of_interest, wkaoi = _parse_input(mapshed_input) job_chain = (group(geoprocessing_chains(area_of_interest, wkaoi, errback)) | collect_data.s(area_of_interest).set(link_error=errback) | save_job_result.s(job_id, mapshed_input)) return chain(job_chain).apply_async()
def _initiate_gwlfe_job_chain(model_input, inputmod_hash, job_id): chain = (tasks.run_gwlfe.s(model_input, inputmod_hash).set( exchange=MAGIC_EXCHANGE, routing_key=choose_worker()) | save_job_result.s(job_id, model_input).set( exchange=MAGIC_EXCHANGE, routing_key=choose_worker())) errback = save_job_error.s(job_id).set(exchange=MAGIC_EXCHANGE, routing_key=choose_worker()) return chain.apply_async(link_error=errback)
def _initiate_mapshed_job_chain(mapshed_input, job_id): errback = save_job_error.s(job_id) area_of_interest, wkaoi = _parse_input(mapshed_input) job_chain = (multi_mapshed(area_of_interest, wkaoi) | convert_data.s(wkaoi) | collect_data.s(area_of_interest) | save_job_result.s(job_id, mapshed_input)) return chain(job_chain).apply_async(link_error=errback)
def _initiate_analyze_job_chain(area_of_interest, job_id, testing=False): exchange = MAGIC_EXCHANGE routing_key = choose_worker() return chain(tasks.start_histogram_job.s(area_of_interest) .set(exchange=exchange, routing_key=routing_key), tasks.get_histogram_job_results.s() .set(exchange=exchange, routing_key=routing_key), tasks.histogram_to_survey.s(), save_job_result.s(job_id, area_of_interest)) \ .apply_async(link_error=save_job_error.s(job_id))
def _initiate_rwd_job_chain(location, snapping, simplify, data_source, job_id, testing=False): errback = save_job_error.s(job_id) return chain(tasks.start_rwd_job.s(location, snapping, simplify, data_source), save_job_result.s(job_id, location)) \ .apply_async(link_error=errback)
def _initiate_subbasin_gwlfe_job_chain(model_input, inputmod_hash, job_id): huc12_jobs = [] errback = save_job_error.s(job_id) for (id, gms) in model_input.iteritems(): huc12_jobs.append( tasks.run_gwlfe.s(gms, inputmod_hash, id).set(link_error=errback)) return chain( group(huc12_jobs) | tasks.subbasin_results_to_dict.s() | tasks.run_srat.s() | save_job_result.s(job_id, model_input)).apply_async()
def _initiate_mapshed_job_chain(mapshed_input, job_id): workers = get_living_workers() get_worker = lambda: random.choice(workers) errback = save_job_error.s(job_id).set(exchange=MAGIC_EXCHANGE, routing_key=get_worker()) area_of_interest, wkaoi = parse_input(mapshed_input) job_chain = ( group( geoprocessing_chains(area_of_interest, wkaoi, MAGIC_EXCHANGE, errback, choose_worker)) | combine.s().set(exchange=MAGIC_EXCHANGE, routing_key=get_worker()) | collect_data.s(area_of_interest).set(link_error=errback, exchange=MAGIC_EXCHANGE, routing_key=get_worker()) | save_job_result.s(job_id, mapshed_input).set( exchange=MAGIC_EXCHANGE, routing_key=get_worker())) return chain(job_chain).apply_async(link_error=errback)
def start_celery_job(task_list, job_input, user=None, exchange=MAGIC_EXCHANGE, routing_key=None): """ Given a list of Celery tasks and it's input, starts a Celery async job with those tasks, adds save_job_result and save_job_error handlers, and returns the job's id which is used to query status and retrieve results via get_job :param task_list: A list of Celery tasks to execute. Is made into a chain :param job_input: Input to the first task, used in recording started jobs :param user: The user requesting the job. Optional. :param exchange: Allows restricting jobs to specific exchange. Optional. :param routing_key: Allows restricting jobs to specific workers. Optional. :return: A Response contianing the job id, marked as 'started' """ created = now() job = Job.objects.create(created_at=created, result='', error='', traceback='', user=user, status='started', model_input=job_input) routing_key = routing_key if routing_key else choose_worker() success = save_job_result.s(job.id, job_input).set(exchange=exchange, routing_key=routing_key) error = save_job_error.s(job.id).set(exchange=exchange, routing_key=routing_key) task_list.append(success) task_chain = chain(task_list).apply_async(link_error=error) job.uuid = task_chain.id job.save() return Response({ 'job': task_chain.id, 'status': 'started', })
def _initiate_subbasin_gwlfe_job_chain(model_input, mapshed_job_uuid, modifications, inputmod_hash, job_id, chunk_size=8): errback = save_job_error.s(job_id) # Split the sub-basin ids into a list of lists. (We'll refer to # each inner list as a "chunk") watershed_ids = list(model_input.keys()) watershed_id_chunks = [ watershed_ids[x:x + chunk_size] for x in range(0, len(watershed_ids), chunk_size) ] stream_lengths = sum_subbasin_stream_lengths(model_input) # Create a celery group where each task in the group # runs gwlfe synchronously on a chunk of subbasin ids. # This is to keep the number of tasks in the group low. Celery will # not return the aggregate chain's job_id (which we need for the job # submission response) until all tasks have been submitted. # If we don't chunk, a shape that has 60+ subbasins could take >60sec # to generate a response (and thus timeout) because we'll be waiting to # submit one task for each subbasin. gwlfe_chunked_group = group( iter([ tasks.run_subbasin_gwlfe_chunks.s( mapshed_job_uuid, modifications, stream_lengths, inputmod_hash, watershed_id_chunk).set(link_error=errback) for watershed_id_chunk in watershed_id_chunks ])) post_process = \ tasks.subbasin_results_to_dict.s().set(link_error=errback) | \ tasks.run_srat.s(mapshed_job_uuid).set(link_error=errback) | \ save_job_result.s(job_id, mapshed_job_uuid) return (gwlfe_chunked_group | post_process).apply_async()
def _initiate_analyze_job_chain(area_of_interest, job_id): return chain(tasks.run_analyze.s(area_of_interest), save_job_result.s(job_id, area_of_interest)) \ .apply_async(link_error=save_job_error.s(job_id))
def _initiate_tr55_job_chain(model_input, job_id): job_chain = _construct_tr55_job_chain(model_input, job_id) errback = save_job_error.s(job_id).set(exchange=MAGIC_EXCHANGE, routing_key=choose_worker()) return chain(job_chain).apply_async(link_error=errback)
def _initiate_tr55_job_chain(model_input, job_id): return chain(tasks.make_gt_service_call_task.s(model_input), tasks.run_tr55.s(model_input), save_job_result.s(job_id, model_input)) \ .apply_async(link_error=save_job_error.s(job_id))
def _initiate_tr55_job_chain(model_input, job_id): job_chain = _construct_tr55_job_chain(model_input, job_id) return chain(job_chain).apply_async(link_error=save_job_error.s(job_id))