def submit_sling_job2(id_hash, project, spyddder_extract_version, multi_acquisition_localizer_version, acq_list, priority): """Map function for spyddder-man extract job.""" job_submit_url = '%s/mozart/api/v0.1/job/submit' % MOZART_URL logger.info("\njob_submit_url : %s" % job_submit_url) # set job type and disk space reqs job_type = "job-acquisition_localizer_multi:{}".format( multi_acquisition_localizer_version) disk_usage = "100GB" # set job queue based on project job_queue = "%s-job_worker-large" % project rule = { "rule_name": "standard-product-sling", "queue": job_queue, "priority": priority, "kwargs": '{}' } sling_job_name = "standard_product-%s-%s" % (job_type, id_hash) params = [{ "name": "asf_ngap_download_queue", "from": "value", "value": "factotum-job_worker-asf_throttled" }, { "name": "esa_download_queue", "from": "value", "value": "factotum-job_worker-scihub_throttled" }, { "name": "spyddder_extract_version", "from": "value", "value": spyddder_extract_version }, { "name": "products", "from": "value", "value": acq_list }] logger.info("PARAMS : %s" % params) logger.info("RULE : %s" % rule) logger.info(job_type) logger.info(sling_job_name) mozart_job_id = submit_mozart_job({}, rule, hysdsio={ "id": "internal-temporary-wiring", "params": params, "job-specification": job_type }, job_name=sling_job_name) logger.info("\nSubmitted sling job with id %s" % mozart_job_id) return mozart_job_id
def submit_job(job_name, job_spec, params, queue, priority, dedup): '''submits job through hysds wiring''' rule = { "rule_name": job_spec, "queue": queue, "priority": int(priority), "kwargs": '{}' } hysdsio = { "id": "internal-temporary-wiring", "params": params, "job-specification": job_spec } submit_mozart_job({}, rule, hysdsio=hysdsio, job_name=job_name, enable_dedup=dedup)
def submit_job(id, url, ds_es_url, tag, dataset_version): """Submit job for orbit dataset generation.""" job_spec = "job-s1_orbit_ingest:%s" % tag job_name = "%s-%s" % (job_spec, id) job_name = job_name.lstrip('job-') #Setup input arguments here rule = { "rule_name": "s1_orbit_ingest", "queue": "factotum-job_worker-large", "priority": 0, "kwargs": '{}' } params = [{ "name": "version_opt", "from": "value", "value": "--dataset_version", }, { "name": "version", "from": "value", "value": dataset_version, }, { "name": "orbit_url", "from": "value", "value": url, }, { "name": "orbit_file", "from": "value", "value": os.path.basename(url), }, { "name": "es_dataset_url", "from": "value", "value": ds_es_url, }] print("submitting orbit ingest job for %s" % id) submit_mozart_job({}, rule, hysdsio={ "id": "internal-temporary-wiring", "params": params, "job-specification": job_spec }, job_name=job_name)
def submit_job(start_time, end_time): tag = "master" ds_es_url = "http://128.149.127.152:9200/grq_v2.0_acquisition-s1-iw_slc/acquisition-S1-IW_SLC" job_spec = "job-acquisition_ingest-scihub:{}".format( tag) #job-acquisition_ingest-scihub:dev-malarout job_name = "%s-%s-%s" % (job_spec, start_time.replace("-", "").replace( ":", ""), end_time.replace("-", "").replace(":", "")) # Setup input arguments here rule = { "rule_name": "acquistion_ingest-scihub", "queue": "factotum-job_worker-apihub_scraper_throttled", "priority": 0, "kwargs": '{}' } params = [{ "name": "es_dataset_url", "from": "value", "value": ds_es_url, }, { "name": "ds_cfg", "from": "value", "value": "datasets.json" }, { "name": "starttime", "from": "value", "value": start_time }, { "name": "endtime", "from": "value", "value": end_time }, { "name": "ingest_flag", "from": "value", "value": "--ingest" }] id = submit_mozart_job({}, rule, hysdsio={ "id": "internal-temporary-wiring", "params": params, "job-specification": job_spec }, job_name=job_name) print("Submitted job for window {} to {}, JOB ID: {}".format( start_time, end_time, id))
def submit_ipf_scraper(acq, tag, endpoint): params = [ { "name": "acq_id", "from": "value", "value": acq.get("id") }, { "name": "acq_met", "from": "value", "value": acq.get("metadata") }, { "name": "index", "from": "value", "value": "grq_v2.0_acquisition-s1-iw_slc" }, { "name": "dataset_type", "from": "value", "value": "acquisition-S1-IW_SLC" }, { "name": "endpoint", "from": "value", "value": endpoint }, { "name": "ds_cfg", "from": "value", "value": "datasets.json" } ] rule = { "rule_name": "ipf_scraper_{}".format(endpoint), "queue": job_queues.get(endpoint), "priority": '5', "kwargs": '{}' } # based on the print('submitting jobs with params:') print(json.dumps(params, sort_keys=True, indent=4, separators=(',', ': '))) mozart_job_id = submit_mozart_job({}, rule, hysdsio={"id": "internal-temporary-wiring", "params": params, "job-specification": "{}:{}".format(job_types.get(endpoint),tag)}, job_name='%s-%s-%s' % (job_types.get(endpoint), acq.get("id"), tag)) print("For {} , IPF scrapper Job ID: {}".format(acq.get("id"), mozart_job_id))
def submit_scrubber_job(params): rule = { "rule_name": "bos_sarcat_scrubber", "queue": CRAWLER_QUEUE, "priority": '8', "kwargs": '{}' } print('submitting jobs with params:') print(json.dumps(params, sort_keys=True, indent=4, separators=(',', ': '))) mozart_job_id = submit_mozart_job({}, rule, hysdsio={"id": "internal-temporary-wiring", "params": params, "job-specification": "job-scrub_outdated_bos_acqs:master"}, job_name='job_%s-%s' % ('scrub_outdated_bos_acqs', "master"), enable_dedup=False) LOGGER.info("Job ID: " + mozart_job_id) print("Job ID: " + mozart_job_id) return
def submit_aoi_ipf(aoi): params = [{ "name": "AOI_name", "from": "value", "value": aoi.get("id") }, { "name": "spatial_extent", "from": "value", "value": aoi.get("location") }, { "name": "start_time", "from": "value", "value": aoi.get("starttime") }, { "name": "end_time", "from": "value", "value": aoi.get("endtime") }] rule = { "rule_name": "{}_ipf_scraper".format(aoi.get("id")), "queue": "factotum-job_worker-apihub_scraper_throttled", "priority": '4', "kwargs": '{}' } print('submitting jobs with params:') print(json.dumps(params, sort_keys=True, indent=4, separators=(',', ': '))) mozart_job_id = submit_mozart_job( {}, rule, hysdsio={ "id": "internal-temporary-wiring", "params": params, "job-specification": "job-AOI_based_ipf_submitter:master" }, job_name='job-%s-%s-%s' % ("aoi_ipf_submitter", aoi.get("id"), "master"), enable_dedup=False) print("For {} , AOI IPF Submitter Job ID: {}".format( aoi.get("_id"), mozart_job_id))
def submit_global_ipf(spatial_extent, start_time, end_time, release): params = [{ "name": "AOI_name", "from": "value", "value": "Global" }, { "name": "spatial_extent", "from": "value", "value": spatial_extent }, { "name": "start_time", "from": "value", "value": start_time }, { "name": "end_time", "from": "value", "value": end_time }] rule = { "rule_name": "{}_ipf_scraper".format("global"), "queue": "factotum-job_worker-apihub_scraper_throttled", "priority": '5', "kwargs": '{}' } print('submitting jobs with params:') print(json.dumps(params, sort_keys=True, indent=4, separators=(',', ': '))) mozart_job_id = submit_mozart_job( {}, rule, hysdsio={ "id": "internal-temporary-wiring", "params": params, "job-specification": "job-AOI_based_ipf_submitter:{}".format(release) }, job_name='job-%s-%s-%s' % ("ipf_submitter", "global", release), enable_dedup=False) print("For {} , IPF Submitter Job ID: {}".format("Global", mozart_job_id))
def submit_job(start, end, job_queue): tag = 'master' job_spec = 'job-bos_ingest:%s' % tag start_time_tag = start.replace('-', '').replace(':', '') end_time_tag = end.replace('-', '').replace(':', '') job_name = '%s-%s-%s' % (job_spec, start_time_tag, end_time_tag) # Setup input arguments here rule = { 'rule_name': 'bos_sarcat_scraper', 'queue': job_queue, 'priority': '7', 'kwargs': '{}' } params = [{ 'name': 'bos_ingest_time', 'from': 'value', 'value': '' }, { 'name': 'from_time', 'from': 'value', 'value': start }, { 'name': 'end_time', 'from': 'value', 'value': end }] hysds_io = { 'id': 'internal-temporary-wiring', 'params': params, 'job-specification': job_spec } job_id = submit_mozart_job({}, rule, hysdsio=hysds_io, job_name=job_name) print('Submitted job for window {} to {}, JOB ID: {}'.format( start_time, end_time, job_id)) print('Submitted job for window {} to {}'.format(start_time, end_time, id))
def submit_sling(ctx_file): """Submit sling for S1 SLC from acquisition.""" # get context with open(ctx_file) as f: ctx = json.load(f) logger.info("ctx: {}".format(json.dumps(ctx, indent=2))) # get ES url es_url = app.conf.GRQ_ES_URL index = "grq" # get ids ids = ctx['ids'] # build query query = { "query": { "ids": { "type": "acquisition-S1-IW_SLC", "values": ids, } }, "partial_fields": { "partial": { "exclude": ["city", "context", "metadata.context"], } } } # query r = requests.post( "%s/grq_*_acquisition-s1-iw_slc/_search?search_type=scan&scroll=60&size=100" % es_url, data=json.dumps(query)) r.raise_for_status() scan_result = r.json() scroll_id = scan_result['_scroll_id'] matches = [] while True: r = requests.post('%s/_search/scroll?scroll=60m' % es_url, data=scroll_id) res = r.json() scroll_id = res['_scroll_id'] if len(res['hits']['hits']) == 0: break matches.extend( [i['fields']['partial'][0] for i in res['hits']['hits']]) #logger.info("matches: {}".format([m['_id'] for m in matches])) logger.info("matches: {}".format(len(matches))) #logger.info("matches[-1]: {}".format(json.dumps(matches[-1], indent=2))) #required params for job submission qtype = "scihub" job_type = "job:spyddder-sling_%s" % qtype oauth_url = None queue = "factotum-job_worker-%s_throttled" % qtype # job submission queue # loop over acquisitions and submit sling jobs for res in matches: id = res['id'] # filter non acquisitions if res.get('dataset', None) != "acquisition-S1-IW_SLC": logger.info("Skipping invalid acquisition dataset: {}".format(id)) # get metadata md = res['metadata'] # build payload items for job submission archive_fname = md['archive_filename'] title, ext = archive_fname.split('.') start_dt = get_date(res['starttime']) yr = start_dt.year mo = start_dt.month dy = start_dt.day logger.info("starttime: {}".format(start_dt)) md5 = hashlib.md5("{}\n".format(archive_fname)).hexdigest() repo_url = "{}/{}/{}/{}/{}/{}".format(ctx['repo_url'], md5[0:8], md5[8:16], md5[16:24], md5[24:32], archive_fname) logger.info("repo_url: {}".format(repo_url)) prod_met = {} prod_met['source'] = qtype prod_met['dataset_type'] = title[0:3] prod_met['spatial_extent'] = { 'type': 'polygon', 'aoi': None, 'coordinates': res['location']['coordinates'], } prod_met['tag'] = [] #set sling job spec release/branch #job_spec = "job-sling:release-20170619" job_spec = "job-sling:{}".format(ctx['sling_release']) rtime = datetime.utcnow() job_name = "%s-%s-%s-%s" % (job_spec, queue, archive_fname, rtime.strftime("%d_%b_%Y_%H:%M:%S")) job_name = job_name.lstrip('job-') #Setup input arguments here rule = { "rule_name": job_spec, "queue": queue, "priority": ctx.get('job_priority', 0), "kwargs": '{}' } params = [{ "name": "download_url", "from": "value", "value": md['download_url'], }, { "name": "repo_url", "from": "value", "value": repo_url, }, { "name": "prod_name", "from": "value", "value": title, }, { "name": "file_type", "from": "value", "value": ext, }, { "name": "prod_date", "from": "value", "value": "{}".format("%04d-%02d-%02d" % (yr, mo, dy)), }, { "name": "prod_met", "from": "value", "value": prod_met, }, { "name": "options", "from": "value", "value": "--force_extract" }] logger.info("rule: {}".format(json.dumps(rule, indent=2))) logger.info("params: {}".format(json.dumps(params, indent=2))) submit_mozart_job({}, rule, hysdsio={ "id": "internal-temporary-wiring", "params": params, "job-specification": job_spec }, job_name=job_name)
def iterate(component, rule): """ Iterator used to iterate across a query result and submit jobs for every hit @param component - "mozart" or "tosca" where this submission came from @param rule - rule containing information for running jobs, note - NOT A USER RULE """ ids = [] # Accumulators variables error_count = 0 errors = [] es_index, ignore1 = get_component_config( component) # Read config from "origin" # Read in JSON formatted args and setup passthrough if 'query' in rule.get('query', {}): queryobj = rule["query"] else: queryobj = {"query": rule["query"]} rule['query'] = {"query": rule['query']} logger.info("Elasticsearch queryobj: %s" % json.dumps(queryobj)) # Get hysds_ios wiring hysds_io_index = HYSDS_IOS_MOZART if component in ( 'mozart', 'figaro') else HYSDS_IOS_GRQ hysdsio = mozart_es.get_by_id(index=hysds_io_index, id=rule["job_type"]) hysdsio = hysdsio['_source'] # Is this a single submission passthru = rule.get('passthru_query', False) single = hysdsio.get( "submission_type", "individual" if passthru is True else "iteration") == "individual" logger.info("single submission type: %s" % single) # Do we need the results run_query = False if single else True if not run_query: # check if we need the results anyway run_query = any( (i["from"].startswith('dataset_jpath') for i in hysdsio["params"])) logger.info("run_query: %s" % run_query) # Run the query to get the products; for efficiency, run query only if we need the results results = [{"_id": "Transient Faux-Results"}] if run_query: if component == "mozart" or component == "figaro": results = mozart_es.query(index=es_index, body=queryobj) else: results = grq_es.query(index=es_index, body=queryobj) # What to iterate for submission submission_iterable = [{ "_id": "Global Single Submission" }] if single else results # Iterator loop for item in submission_iterable: try: # For single submissions, submit all results as one product = results if single else item logger.info("Submitting mozart job for product: %s" % product) # set clean descriptive job name job_type = rule['job_type'] if job_type.startswith('hysds-io-'): job_type = job_type.replace('hysds-io-', '', 1) if isinstance(product, dict): job_name = "%s-%s" % (job_type, product.get('_id', 'unknown')) else: job_name = "%s-single_submission" % job_type # get enable_dedup flag: rule > hysdsio if rule.get("enable_dedup") is None: rule['enable_dedup'] = hysdsio.get("enable_dedup", True) task_id = submit_mozart_job(product, rule, hysdsio, job_name=job_name) ids.append(task_id) except Exception as e: error_count = error_count + 1 if not str(e) in errors: errors.append(str(e)) logger.warning("Failed to submit jobs: {0}:{1}".format( type(e), str(e))) logger.warning(traceback.format_exc()) if error_count > 0: logger.error("Failed to submit: {0} of {1} jobs. {2}".format( error_count, len(list(results)), " ".join(errors))) raise Exception("Job Submitter Job failed to submit all actions")
"name": "version_opt", "from": "value", "value": "--dataset_version", }, { "name": "version", "from": "value", "value": dataset_version, }, { "name": "tag_opt", "from": "value", "value": "--tag", }, { "name": "tag", "from": "value", "value": tag, }, { "name": "es_dataset_url", "from": "value", "value": ds_es_url, }] print("submitting %s crawler job" % qc_type) submit_mozart_job({}, rule, hysdsio={ "id": "internal-temporary-wiring", "params": params, "job-specification": job_spec }, job_name=job_name, enable_dedup=False)
"value": acq_data["metadata"]["archive_filename"] }, { "name": "prod_met", "from": "value", "value": acq_data["metadata"] } ] logger.info("PARAMS : %s" %params) logger.info("RULE : %s"%rule) logger.info(job_type) logger.info(sling_job_name) mozart_job_id = submit_mozart_job({}, rule,hysdsio={"id": "internal-temporary-wiring", "params": params, "job-specification": job_type}, job_name=sling_job_name) logger.info("\nSubmitted sling job with id %s for %s" %(acq_data["metadata"]["identifier"], mozart_job_id)) return mozart_job_id def check_ES_status(doc_id): """ There is a latency in the update of ES job status after celery signals job completion. To handle that case, we much poll ES (after sciflo returns status after blocking) until the job status is correctly reflected. :param doc_id: ID of the Job ES doc :return: True if the ES has updated job status within 5 minutes otherwise raise a run time error """ es_url = app.conf['JOBS_ES_URL']
def submit_qquery_job(region, query_endpoint, dns_list, qquery_rtag, sling_rtag, pds_queue=None): # set query priority priority = 0 if "priority" in region["metadata"].keys(): priority = int(region["metadata"]["priority"]) # determine qquery job submission branch job_header = 'job-qquery-opds' if pds_queue else 'job-qquery' job_spec = job_header + ":" + qquery_rtag # determine the repo to query from the types_map in the aoi for qtype in region["metadata"]["query"].keys( ): # list of endpoints to query if qtype != query_endpoint: continue p = priority if priority == 0 and "priority" in region["metadata"]["query"][ qtype].keys(): p = int(region["metadata"]["query"][qtype]["priority"]) rtime = datetime.datetime.utcnow() job_name = "%s-%s-%s-%s" % (job_spec, qtype, region["id"], rtime.strftime("%d_%b_%Y_%H:%M:%S")) job_name = job_name.lstrip('job-') # Setup input arguments here rule = { "rule_name": "qquery", "queue": "factotum-job_worker-%s_throttled" % qtype, # job submission queue "priority": p, "kwargs": '{}' } params = [{ "name": "aoi", "from": "value", "value": "{}".format(region["id"]), }, { "name": "endpoint", "from": "value", "value": "{}".format(qtype), }, { "name": "dns_list", "from": "value", "value": "{}".format(dns_list), }, { "name": "sling_version", "from": "value", "value": "{}".format(sling_rtag), }] if pds_queue: queue = { "name": "pds_queue", "from": "value", "value": "{}".format(pds_queue), } params.append(queue) # for each aoi and endpoint, submit a query job print("{0: <60}: {1}".format( "Submitting %s query job for %s over aoi" % (job_header, qtype), region["id"])) submit_mozart_job({}, rule, hysdsio={ "id": "internal-temporary-wiring", "params": params, "job-specification": job_spec }, job_name=job_name, enable_dedup=False)
def submit_sling_job(project, spyddder_extract_version, acquisition_localizer_versions, acq_data, priority): """Map function for spyddder-man extract job.""" acquisition_localizer_version = "standard-product" spyddder_extract_version = "standard-product" job_submit_url = '%s/mozart/api/v0.1/job/submit' % MOZART_URL # set job type and disk space reqs job_type = "job-acquisition_localizer:{}".format(acquisition_localizer_versions) # set job type and disk space reqs disk_usage = "300GB" #logger.info(acq_data) #acq_id = acq_data['acq_id'] # set job queue based on project #job_queue = "%s-job_worker-large" % project job_queue = "factotum-job_worker-small" rule = { "rule_name": "standard-product-sling", "queue": job_queue, "priority": '5', "kwargs":'{}' } sling_job_name = "standard_product-%s-%s" %(job_type, acq_data["metadata"]["identifier"]) params = [ { "name": "workflow", "from": "value", "value": "acquisition_localizer.sf.xml" }, { "name": "project", "from": "value", "value": project }, { "name": "spyddder_extract_version", "from": "value", "value": spyddder_extract_version }, { "name": "dataset_type", "from": "value", "value": acq_data["dataset_type"] }, { "name": "dataset", "from": "value", "value": acq_data["dataset"] }, { "name": "identifier", "from": "value", "value": acq_data["metadata"]["identifier"] }, { "name": "download_url", "from": "value", "value": acq_data["metadata"]["download_url"] }, { "name": "archive_filename", "from": "value", "value": acq_data["metadata"]["archive_filename"] }, { "name": "prod_met", "from": "value", "value": acq_data["metadata"] } ] logger.info("PARAMS : %s" %params) logger.info("RULE : %s"%rule) logger.info(job_type) logger.info(sling_job_name) mozart_job_id = submit_mozart_job({}, rule,hysdsio={"id": "internal-temporary-wiring", "params": params, "job-specification": job_type}, job_name=sling_job_name) logger.info("\nSubmitted sling job with id %s for %s" %(acq_data["metadata"]["identifier"], mozart_job_id)) return mozart_job_id
def submit_sling_job(self, aoi, query_params, qtype, queue_grp, title, link, rtag=None, pds_queue=None): #Query for all products, and return a list of (Title,URL) yr, mo, dy = self.getDataDateFromTitle(title) #date filename = title + "." + self.getFileType() if not pds_queue: # build payload items for job submission tags = query_params["tag"] md5 = hashlib.md5("{0}.{1}\n".format( title, self.getFileType())).hexdigest() cfg = config() # load settings.json repo_url = "%s/%s/%s/%s/%s/%s.%s" % ( cfg["repository-base"], md5[0:8], md5[8:16], md5[16:24], md5[24:32], title, self.getFileType()) location = {} location['type'] = 'polygon' location['aoi'] = aoi['id'] location['coordinates'] = aoi['location']['coordinates'] prod_met = {} prod_met['source'] = qtype prod_met['dataset_type'] = title[0:3] prod_met['spatial_extent'] = location prod_met['tag'] = tags queue = "factotum-job_worker-%s_throttled" % ( qtype + str(queue_grp)) # job submission queue job_header = 'job-sling:' dedup_key = DEDUP_KEY params = [{ "name": "download_url", "from": "value", "value": link, }, { "name": "repo_url", "from": "value", "value": repo_url, }, { "name": "prod_name", "from": "value", "value": title, }, { "name": "file_type", "from": "value", "value": self.getFileType(), }, { "name": "prod_date", "from": "value", "value": "{}".format("%s-%s-%s" % (yr, mo, dy)), }, { "name": "prod_met", "from": "value", "value": prod_met, }, { "name": "options", "from": "value", "value": "--force_extract" }] else: # queue = "opds-%s-job_worker-small" % (qtype) queue = pds_queue # job submission queue, no queue group for autoscalers job_header = 'job-sling-extract-opds:' dedup_key = DEDUP_KEY_PDS params = [{ "name": "download_url", "from": "value", "value": link, }, { "name": "prod_name", "from": "value", "value": "%s-pds" % title, }, { "name": "file", "from": "value", "value": filename, }, { "name": "prod_date", "from": "value", "value": "{}".format("%s-%s-%s" % (yr, mo, dy)), }] #set sling job spec release/branch if rtag is None: try: with open('_context.json') as json_data: context = json.load(json_data) job_spec = job_header + context['job_specification'][ 'job-version'] except: print('Failed on loading context.json') else: job_spec = job_header + rtag rtime = datetime.datetime.utcnow() job_name = "%s-%s-%s-%s-%s" % (job_spec, queue, title, rtime.strftime("%d_%b_%Y_%H:%M:%S"), aoi['id']) job_name = job_name.lstrip('job-') priority = query_params["priority"] #Setup input arguments here rule = { "rule_name": job_spec, "queue": queue, "priority": priority, "kwargs": '{}' } #check for dedup, if clear, submit job if not self.deduplicate(filename, dedup_key): submit_mozart_job({}, rule, hysdsio={ "id": "internal-temporary-wiring", "params": params, "job-specification": job_spec }, job_name=job_name) else: location = " to OpenDataset" if pds_queue else "to own bucket" reason = "in OpenDataset" if pds_queue else "in OpenDataset or own bucket" print( "Will not submit sling job {0} to {1}, already processed {2}". format(title, location, reason))
def submit_sling_job(self, aoi, query_params, qtype, queue_grp, title, link, rtag=None): #Query for all products, and return a list of (Title,URL) cfg = config() #load settings.json priority = query_params["priority"] products = query_params["products"] tags = query_params["tag"] #build payload items for job submission yr, mo, dy = self.getDataDateFromTitle(title) #date md5 = hashlib.md5("{0}.{1}\n".format(title, self.getFileType())).hexdigest() repo_url = "%s/%s/%s/%s/%s/%s.%s" % (cfg["repository-base"], md5[0:8], md5[8:16], md5[16:24], md5[24:32], title, self.getFileType()) location = {} location['type'] = 'polygon' location['aoi'] = aoi['id'] location['coordinates'] = aoi['location']['coordinates'] prod_met = {} prod_met['source'] = qtype prod_met['dataset_type'] = title[0:3] prod_met['spatial_extent'] = location prod_met['tag'] = tags #required params for job submission if hasattr(self, 'getOauthUrl'): #sling via oauth oauth_url = self.getOauthUrl() job_type = "job:spyddder-sling-oauth_%s" % qtype job_name = "spyddder-sling-oauth_%s-%s-%s.%s" % ( qtype, aoi['id'], title, self.getFileType()) else: #normal sling job_type = "job:spyddder-sling_%s" % qtype job_name = "spyddder-sling_%s-%s-%s.%s" % (qtype, aoi['id'], title, self.getFileType()) oauth_url = None queue = "factotum-job_worker-%s_throttled" % (qtype + str(queue_grp) ) # job submission queue #set sling job spec release/branch if rtag is None: try: with open('_context.json') as json_data: context = json.load(json_data) job_spec = 'job-sling:' + context['job_specification'][ 'job-version'] except: print('Failed on loading context.json') else: job_spec = 'job-sling:' + rtag rtime = datetime.datetime.utcnow() job_name = "%s-%s-%s-%s-%s" % (job_spec, queue, title, rtime.strftime("%d_%b_%Y_%H:%M:%S"), aoi['id']) job_name = job_name.lstrip('job-') #Setup input arguments here rule = { "rule_name": job_spec, "queue": queue, "priority": priority, "kwargs": '{}' } params = [{ "name": "download_url", "from": "value", "value": link, }, { "name": "repo_url", "from": "value", "value": repo_url, }, { "name": "prod_name", "from": "value", "value": title, }, { "name": "file_type", "from": "value", "value": self.getFileType(), }, { "name": "prod_date", "from": "value", "value": "{}".format("%s-%s-%s" % (yr, mo, dy)), }, { "name": "prod_met", "from": "value", "value": prod_met, }, { "name": "options", "from": "value", "value": "--force_extract" }] #check for dedup, if clear, submit job if not self.deduplicate(title + "." + self.getFileType()): submit_mozart_job({}, rule, hysdsio={ "id": "internal-temporary-wiring", "params": params, "job-specification": job_spec }, job_name=job_name) else: print( "Will not submit sling job for {0}, already processed".format( title))
"name": "ds_cfg", "from": "value", "value": "datasets.json" }, { "name": "starttime", "from": "value", "value": starttime }, { "name": "endtime", "from": "value", "value": endtime }, { "name": "create_flag", "from": "value", "value": "--create" }] print("submitting scraper job for %s" % qtype) submit_mozart_job({}, rule, hysdsio={ "id": "internal-temporary-wiring", "params": params, "job-specification": job_spec }, job_name=job_name, soft_time_limit=604800, time_limit=605100) mis_date += timedelta(days=1)
def submit_sling_job(spyddder_extract_version, acquisition_localizer_version, esa_download_queue, asf_ngap_download_queue, acq_data, priority): """Map function for spyddder-man extract job.""" #acquisition_localizer_version = "master" #spyddder_extract_version = "develop" job_submit_url = '%s/mozart/api/v0.1/job/submit' % MOZART_URL # set job type and disk space reqs job_type = "job-acquisition_localizer_single:{}".format( acquisition_localizer_version) logger.info("\nSubmitting job of type : %s" % job_type) # set job type and disk space reqs disk_usage = "10GB" #logger.info(acq_data) #acq_id = acq_data['acq_id'] # set job queue job_queue = "system-job_worker-small" rule = { "rule_name": "acquisition_localizer_multi-sling", "queue": job_queue, "priority": priority, "kwargs": '{}' } sling_job_name = "sling-%s-%s" % (job_type, acq_data["metadata"]["identifier"]) params = [{ "name": "workflow", "from": "value", "value": "acquisition_localizer.sf.xml" }, { "name": "asf_ngap_download_queue", "from": "value", "value": asf_ngap_download_queue }, { "name": "esa_download_queue", "from": "value", "value": esa_download_queue }, { "name": "spyddder_extract_version", "from": "value", "value": spyddder_extract_version }, { "name": "dataset_type", "from": "value", "value": acq_data["dataset_type"] }, { "name": "dataset", "from": "value", "value": acq_data["dataset"] }, { "name": "identifier", "from": "value", "value": acq_data["metadata"]["identifier"] }, { "name": "download_url", "from": "value", "value": acq_data["metadata"]["download_url"] }, { "name": "archive_filename", "from": "value", "value": acq_data["metadata"]["archive_filename"] }, { "name": "prod_met", "from": "value", "value": acq_data["metadata"] }] logger.info("PARAMS : %s" % params) logger.info("RULE : %s" % rule) logger.info(job_type) logger.info(sling_job_name) mozart_job_id = submit_mozart_job({}, rule, hysdsio={ "id": "internal-temporary-wiring", "params": params, "job-specification": job_type }, job_name=sling_job_name) logger.info("\nSubmitted sling job with id %s for %s" % (acq_data["metadata"]["identifier"], mozart_job_id)) return mozart_job_id
def submit_sling(ctx_file): """Submit sling for S1 SLC from acquisition.""" # get context with open(ctx_file) as f: ctx = json.load(f) logger.info("ctx: {}".format(json.dumps(ctx, indent=2))) # filter non acquisitions if ctx.get('source_dataset', None) != "acquisition-S1-IW_SLC": raise RuntimeError("Skipping invalid acquisition dataset.") # build payload items for job submission qtype = "scihub" archive_fname = ctx['archive_filename'] title, ext = archive_fname.split('.') start_dt = get_date(ctx['starttime']) yr = start_dt.year mo = start_dt.month dy = start_dt.day logger.info("starttime: {}".format(start_dt)) md5 = hashlib.md5("{}\n".format(archive_fname)).hexdigest() repo_url = "{}/{}/{}/{}/{}/{}".format(ctx['repo_url'], md5[0:8], md5[8:16], md5[16:24], md5[24:32], archive_fname) logger.info("repo_url: {}".format(repo_url)) prod_met = {} prod_met['source'] = qtype prod_met['dataset_type'] = title[0:3] prod_met['spatial_extent'] = { 'type': 'polygon', 'aoi': None, 'coordinates': ctx['prod_met']['location']['coordinates'], } prod_met['tag'] = [] #required params for job submission job_type = "job:spyddder-sling_%s" % qtype oauth_url = None queue = "factotum-job_worker-%s_throttled" % qtype # job submission queue #set sling job spec release/branch #job_spec = "job-sling:release-20170619" job_spec = "job-sling:{}".format(ctx['sling_release']) rtime = datetime.utcnow() job_name = "%s-%s-%s-%s" % (job_spec, queue, archive_fname, rtime.strftime("%d_%b_%Y_%H:%M:%S")) job_name = job_name.lstrip('job-') #Setup input arguments here rule = { "rule_name": job_spec, "queue": queue, "priority": ctx.get('job_priority', 0), "kwargs": '{}' } params = [{ "name": "download_url", "from": "value", "value": ctx['download_url'], }, { "name": "repo_url", "from": "value", "value": repo_url, }, { "name": "prod_name", "from": "value", "value": title, }, { "name": "file_type", "from": "value", "value": ext, }, { "name": "prod_date", "from": "value", "value": "{}".format("%04d-%02d-%02d" % (yr, mo, dy)), }, { "name": "prod_met", "from": "value", "value": prod_met, }, { "name": "options", "from": "value", "value": "--force_extract" }] logger.info("rule: {}".format(json.dumps(rule, indent=2))) logger.info("params: {}".format(json.dumps(params, indent=2))) submit_mozart_job({}, rule, hysdsio={ "id": "internal-temporary-wiring", "params": params, "job-specification": job_spec }, job_name=job_name)