def _initiate_subbasin_mapshed_job_chain(mapshed_input, job_id): errback = save_job_error.s(job_id) area_of_interest, wkaoi = _parse_input(mapshed_input) if not wkaoi: raise ValidationError('You must provide the `wkaoi` key: ' + 'a HUC id is currently required for ' + 'subbasin modeling.') [layer_code, shape_id] = wkaoi.split('__') if layer_code not in ['huc8', 'huc10']: raise ValidationError('Only HUC-08s and HUC-10s are valid for ' + 'subbasin modeling.') huc12s = split_into_huc12s(layer_code, shape_id) if not huc12s: raise EmptyResultSet('No subbasins found') huc12_job_chains = [] for (huc12_id, huc12, huc12_aoi) in huc12s: huc12_wkaoi = 'huc12__{id}'.format(id=huc12_id) huc12_job_chains.append( chain( (group(geoprocessing_chains(huc12_aoi, huc12_wkaoi, errback)) | collect_data.s(huc12_aoi, huc12).set(link_error=errback)))) return chain( group(huc12_job_chains) | tasks.subbasin_results_to_dict.s() | save_job_result.s(job_id, mapshed_input)).apply_async()
def _initiate_gwlfe_job_chain(model_input, inputmod_hash, job_id): chain = (tasks.run_gwlfe.s(model_input, inputmod_hash) | save_job_result.s(job_id, model_input)) errback = save_job_error.s(job_id) return chain.apply_async(link_error=errback)
def start_celery_job(task_list, job_input, user=None): """ Given a list of Celery tasks and it's input, starts a Celery async job with those tasks, adds save_job_result and save_job_error handlers, and returns the job's id which is used to query status and retrieve results via get_job :param task_list: A list of Celery tasks to execute. Is made into a chain :param job_input: Input to the first task, used in recording started jobs :param user: The user requesting the job. Optional. :return: A Response contianing the job id, marked as 'started' """ created = now() job = Job.objects.create(created_at=created, result='', error='', traceback='', user=user, status='started') success = save_job_result.s(job.id, job_input) error = save_job_error.s(job.id) task_list.append(success) task_chain = chain(task_list).apply_async(link_error=error) job.uuid = task_chain.id job.save() return Response({ 'job': task_chain.id, 'status': 'started', })
def _initiate_subbasin_mapshed_job_chain(mapshed_input, job_id): errback = save_job_error.s(job_id) area_of_interest, wkaoi = _parse_input(mapshed_input) if not wkaoi: raise ValidationError('You must provide the `wkaoi` key: ' + 'a HUC id is currently required for ' + 'subbasin modeling.') [layer_code, shape_id] = wkaoi.split('__') if layer_code not in ['huc8', 'huc10']: raise ValidationError('Only HUC-08s and HUC-10s are valid for ' + 'subbasin modeling.') huc12s = split_into_huc12s(layer_code, shape_id) if not huc12s: raise EmptyResultSet('No subbasins found') job_chain = (multi_subbasin(area_of_interest, huc12s) | collect_subbasin.s(huc12s) | tasks.subbasin_results_to_dict.s() | save_job_result.s(job_id, mapshed_input)) return job_chain.apply_async(link_error=errback)
def _initiate_mapshed_job_chain(input, job_id): exchange = MAGIC_EXCHANGE routing_key = choose_worker() return chain(tasks.start_mapshed_job.s(input) .set(exchange=exchange, routing_key=routing_key), save_job_result.s(job_id, input)) \ .apply_async(link_error=save_job_error.s(job_id))
def _initiate_rwd_job_chain(location, snapping, job_id, testing=False): exchange = MAGIC_EXCHANGE routing_key = choose_worker() return chain(tasks.start_rwd_job.s(location, snapping) .set(exchange=exchange, routing_key=routing_key), save_job_result.s(job_id, location)) \ .apply_async(link_error=save_job_error.s(job_id))
def _initiate_gwlfe_job_chain(model_input, inputmod_hash, job_id): chain = (tasks.run_gwlfe.s(model_input, inputmod_hash).set( exchange=MAGIC_EXCHANGE, routing_key=choose_worker()) | save_job_result.s(job_id, model_input).set( exchange=MAGIC_EXCHANGE, routing_key=choose_worker())) errback = save_job_error.s(job_id).set(exchange=MAGIC_EXCHANGE, routing_key=choose_worker()) return chain.apply_async(link_error=errback)
def _initiate_mapshed_job_chain(mapshed_input, job_id): errback = save_job_error.s(job_id) area_of_interest, wkaoi = _parse_input(mapshed_input) job_chain = (group(geoprocessing_chains(area_of_interest, wkaoi, errback)) | collect_data.s(area_of_interest).set(link_error=errback) | save_job_result.s(job_id, mapshed_input)) return chain(job_chain).apply_async()
def _construct_tr55_job_chain(model_input, job_id): exchange = MAGIC_EXCHANGE routing_key = choose_worker() job_chain = [] aoi = model_input.get('area_of_interest') aoi_census = model_input.get('aoi_census') modification_censuses = model_input.get('modification_censuses') # Non-overlapping polygons derived from the modifications pieces = model_input.get('modification_pieces', []) # The hash of the current modifications current_hash = model_input.get('modification_hash') # The hash of the modifications whose censuses we already have census_hash = None # The list of already-computed censuses of the modifications modification_census_items = [] if modification_censuses: census_hash = modification_censuses.get('modification_hash') modification_census_items = modification_censuses.get('censuses') if (aoi_census and ((modification_census_items and census_hash == current_hash) or not pieces)): censuses = [aoi_census] + modification_census_items job_chain.append(tasks.run_tr55.s(censuses, model_input)) else: job_chain.append(tasks.get_histogram_job_results.s().set( exchange=exchange, routing_key=routing_key)) job_chain.append(tasks.histograms_to_censuses.s().set( exchange=exchange, routing_key=routing_key)) if aoi_census and pieces: polygons = [m['shape']['geometry'] for m in pieces] job_chain.insert( 0, tasks.start_histograms_job.s(polygons).set( exchange=exchange, routing_key=routing_key)) job_chain.insert( len(job_chain), tasks.run_tr55.s(model_input, cached_aoi_census=aoi_census)) else: polygons = [aoi] + [m['shape']['geometry'] for m in pieces] job_chain.insert( 0, tasks.start_histograms_job.s(polygons).set( exchange=exchange, routing_key=routing_key)) job_chain.insert(len(job_chain), tasks.run_tr55.s(model_input)) job_chain.append(save_job_result.s(job_id, model_input)) return job_chain
def _initiate_mapshed_job_chain(mapshed_input, job_id): errback = save_job_error.s(job_id) area_of_interest, wkaoi = _parse_input(mapshed_input) job_chain = (multi_mapshed(area_of_interest, wkaoi) | convert_data.s(wkaoi) | collect_data.s(area_of_interest) | save_job_result.s(job_id, mapshed_input)) return chain(job_chain).apply_async(link_error=errback)
def _initiate_analyze_job_chain(area_of_interest, job_id, testing=False): exchange = MAGIC_EXCHANGE routing_key = choose_worker() return chain(tasks.start_histogram_job.s(area_of_interest) .set(exchange=exchange, routing_key=routing_key), tasks.get_histogram_job_results.s() .set(exchange=exchange, routing_key=routing_key), tasks.histogram_to_survey.s(), save_job_result.s(job_id, area_of_interest)) \ .apply_async(link_error=save_job_error.s(job_id))
def _initiate_rwd_job_chain(location, snapping, simplify, data_source, job_id, testing=False): errback = save_job_error.s(job_id) return chain(tasks.start_rwd_job.s(location, snapping, simplify, data_source), save_job_result.s(job_id, location)) \ .apply_async(link_error=errback)
def _initiate_subbasin_gwlfe_job_chain(model_input, inputmod_hash, job_id): huc12_jobs = [] errback = save_job_error.s(job_id) for (id, gms) in model_input.iteritems(): huc12_jobs.append( tasks.run_gwlfe.s(gms, inputmod_hash, id).set(link_error=errback)) return chain( group(huc12_jobs) | tasks.subbasin_results_to_dict.s() | tasks.run_srat.s() | save_job_result.s(job_id, model_input)).apply_async()
def _construct_tr55_job_chain(model_input, job_id): job_chain = [] aoi_json_str, wkaoi = _parse_input(model_input) aoi = json.loads(aoi_json_str) aoi_census = model_input.get('aoi_census') modification_censuses = model_input.get('modification_censuses') # Non-overlapping polygons derived from the modifications pieces = model_input.get('modification_pieces', []) # The hash of the current modifications current_hash = model_input.get('modification_hash') # The hash of the modifications whose censuses we already have census_hash = None # The list of already-computed censuses of the modifications modification_census_items = [] if modification_censuses: census_hash = modification_censuses.get('modification_hash') modification_census_items = modification_censuses.get('censuses') if (aoi_census and ((modification_census_items and census_hash == current_hash) or not pieces)): censuses = [aoi_census] + modification_census_items job_chain.append(tasks.run_tr55.s(censuses, aoi, model_input)) else: job_chain.append(tasks.nlcd_soil.s()) if aoi_census and pieces: polygons = [m['shape']['geometry'] for m in pieces] geop_input = {'polygon': [json.dumps(p) for p in polygons]} job_chain.insert(0, geoprocessing.run.s('nlcd_soil', geop_input)) job_chain.append( tasks.run_tr55.s(aoi, model_input, cached_aoi_census=aoi_census)) else: polygons = [aoi] + [m['shape']['geometry'] for m in pieces] geop_input = {'polygon': [json.dumps(p) for p in polygons]} # Use WKAoI only if there are no pieces to modify the AoI wkaoi = wkaoi if not pieces else None job_chain.insert( 0, geoprocessing.run.s('nlcd_soil', geop_input, wkaoi)) job_chain.append(tasks.run_tr55.s(aoi, model_input)) job_chain.append(save_job_result.s(job_id, model_input)) return job_chain
def _construct_tr55_job_chain(model_input, job_id): exchange = MAGIC_EXCHANGE routing_key = choose_worker() job_chain = [] aoi = model_input.get('area_of_interest') aoi_census = model_input.get('aoi_census') modification_censuses = model_input.get('modification_censuses') # Non-overlapping polygons derived from the modifications pieces = model_input.get('modification_pieces', []) # The hash of the current modifications current_hash = model_input.get('modification_hash') # The hash of the modifications whose censuses we already have census_hash = None # The list of already-computed censuses of the modifications modification_census_items = [] if modification_censuses: census_hash = modification_censuses.get('modification_hash') modification_census_items = modification_censuses.get('censuses') if (aoi_census and ((modification_census_items and census_hash == current_hash) or not pieces)): censuses = [aoi_census] + modification_census_items job_chain.append(tasks.run_tr55.s(censuses, model_input)) else: job_chain.append(tasks.get_histogram_job_results.s() .set(exchange=exchange, routing_key=routing_key)) job_chain.append(tasks.histograms_to_censuses.s() .set(exchange=exchange, routing_key=routing_key)) if aoi_census and pieces: polygons = [m['shape']['geometry'] for m in pieces] job_chain.insert(0, tasks.start_histograms_job.s(polygons) .set(exchange=exchange, routing_key=routing_key)) job_chain.insert(len(job_chain), tasks.run_tr55.s(model_input, cached_aoi_census=aoi_census)) else: polygons = [aoi] + [m['shape']['geometry'] for m in pieces] job_chain.insert(0, tasks.start_histograms_job.s(polygons) .set(exchange=exchange, routing_key=routing_key)) job_chain.insert(len(job_chain), tasks.run_tr55.s(model_input)) job_chain.append(save_job_result.s(job_id, model_input)) return job_chain
def _construct_tr55_job_chain(model_input, job_id): job_chain = [] current_hash = model_input['modification_hash'] census = model_input.get('census') census_hash = None if census is not None: census_hash = model_input['census'].get('modification_hash') if census is None or current_hash != census_hash: job_chain.append(tasks.prepare_census.s(model_input)) job_chain.append(tasks.run_tr55.s(model_input)) else: job_chain.append(tasks.run_tr55.s(census, model_input)) job_chain.append(save_job_result.s(job_id, model_input)) return job_chain
def _initiate_mapshed_job_chain(mapshed_input, job_id): workers = get_living_workers() get_worker = lambda: random.choice(workers) errback = save_job_error.s(job_id).set(exchange=MAGIC_EXCHANGE, routing_key=get_worker()) area_of_interest, wkaoi = parse_input(mapshed_input) job_chain = ( group( geoprocessing_chains(area_of_interest, wkaoi, MAGIC_EXCHANGE, errback, choose_worker)) | combine.s().set(exchange=MAGIC_EXCHANGE, routing_key=get_worker()) | collect_data.s(area_of_interest).set(link_error=errback, exchange=MAGIC_EXCHANGE, routing_key=get_worker()) | save_job_result.s(job_id, mapshed_input).set( exchange=MAGIC_EXCHANGE, routing_key=get_worker())) return chain(job_chain).apply_async(link_error=errback)
def start_celery_job(task_list, job_input, user=None, exchange=MAGIC_EXCHANGE, routing_key=None): """ Given a list of Celery tasks and it's input, starts a Celery async job with those tasks, adds save_job_result and save_job_error handlers, and returns the job's id which is used to query status and retrieve results via get_job :param task_list: A list of Celery tasks to execute. Is made into a chain :param job_input: Input to the first task, used in recording started jobs :param user: The user requesting the job. Optional. :param exchange: Allows restricting jobs to specific exchange. Optional. :param routing_key: Allows restricting jobs to specific workers. Optional. :return: A Response contianing the job id, marked as 'started' """ created = now() job = Job.objects.create(created_at=created, result='', error='', traceback='', user=user, status='started', model_input=job_input) routing_key = routing_key if routing_key else choose_worker() success = save_job_result.s(job.id, job_input).set(exchange=exchange, routing_key=routing_key) error = save_job_error.s(job.id).set(exchange=exchange, routing_key=routing_key) task_list.append(success) task_chain = chain(task_list).apply_async(link_error=error) job.uuid = task_chain.id job.save() return Response({ 'job': task_chain.id, 'status': 'started', })
def _initiate_subbasin_gwlfe_job_chain(model_input, mapshed_job_uuid, modifications, inputmod_hash, job_id, chunk_size=8): errback = save_job_error.s(job_id) # Split the sub-basin ids into a list of lists. (We'll refer to # each inner list as a "chunk") watershed_ids = list(model_input.keys()) watershed_id_chunks = [ watershed_ids[x:x + chunk_size] for x in range(0, len(watershed_ids), chunk_size) ] stream_lengths = sum_subbasin_stream_lengths(model_input) # Create a celery group where each task in the group # runs gwlfe synchronously on a chunk of subbasin ids. # This is to keep the number of tasks in the group low. Celery will # not return the aggregate chain's job_id (which we need for the job # submission response) until all tasks have been submitted. # If we don't chunk, a shape that has 60+ subbasins could take >60sec # to generate a response (and thus timeout) because we'll be waiting to # submit one task for each subbasin. gwlfe_chunked_group = group( iter([ tasks.run_subbasin_gwlfe_chunks.s( mapshed_job_uuid, modifications, stream_lengths, inputmod_hash, watershed_id_chunk).set(link_error=errback) for watershed_id_chunk in watershed_id_chunks ])) post_process = \ tasks.subbasin_results_to_dict.s().set(link_error=errback) | \ tasks.run_srat.s(mapshed_job_uuid).set(link_error=errback) | \ save_job_result.s(job_id, mapshed_job_uuid) return (gwlfe_chunked_group | post_process).apply_async()
def _initiate_analyze_job_chain(area_of_interest, job_id): return chain(tasks.run_analyze.s(area_of_interest), save_job_result.s(job_id, area_of_interest)) \ .apply_async(link_error=save_job_error.s(job_id))
def _initiate_tr55_job_chain(model_input, job_id): return chain(tasks.make_gt_service_call_task.s(model_input), tasks.run_tr55.s(model_input), save_job_result.s(job_id, model_input)) \ .apply_async(link_error=save_job_error.s(job_id))