Ejemplo n.º 1
0
def submit_sling_job2(id_hash, project, spyddder_extract_version,
                      multi_acquisition_localizer_version, acq_list, priority):
    """Map function for spyddder-man extract job."""

    job_submit_url = '%s/mozart/api/v0.1/job/submit' % MOZART_URL
    logger.info("\njob_submit_url : %s" % job_submit_url)

    # set job type and disk space reqs
    job_type = "job-acquisition_localizer_multi:{}".format(
        multi_acquisition_localizer_version)
    disk_usage = "100GB"

    # set job queue based on project
    job_queue = "%s-job_worker-large" % project
    rule = {
        "rule_name": "standard-product-sling",
        "queue": job_queue,
        "priority": priority,
        "kwargs": '{}'
    }

    sling_job_name = "standard_product-%s-%s" % (job_type, id_hash)

    params = [{
        "name": "asf_ngap_download_queue",
        "from": "value",
        "value": "factotum-job_worker-asf_throttled"
    }, {
        "name": "esa_download_queue",
        "from": "value",
        "value": "factotum-job_worker-scihub_throttled"
    }, {
        "name": "spyddder_extract_version",
        "from": "value",
        "value": spyddder_extract_version
    }, {
        "name": "products",
        "from": "value",
        "value": acq_list
    }]

    logger.info("PARAMS : %s" % params)
    logger.info("RULE : %s" % rule)
    logger.info(job_type)
    logger.info(sling_job_name)

    mozart_job_id = submit_mozart_job({},
                                      rule,
                                      hysdsio={
                                          "id": "internal-temporary-wiring",
                                          "params": params,
                                          "job-specification": job_type
                                      },
                                      job_name=sling_job_name)
    logger.info("\nSubmitted sling job with id %s" % mozart_job_id)

    return mozart_job_id
Ejemplo n.º 2
0
def submit_job(job_name, job_spec, params, queue, priority, dedup):
    '''submits job through hysds wiring'''
    rule = {
        "rule_name": job_spec,
        "queue": queue,
        "priority": int(priority),
        "kwargs": '{}'
    }
    hysdsio = {
        "id": "internal-temporary-wiring",
        "params": params,
        "job-specification": job_spec
    }
    submit_mozart_job({},
                      rule,
                      hysdsio=hysdsio,
                      job_name=job_name,
                      enable_dedup=dedup)
Ejemplo n.º 3
0
def submit_job(id, url, ds_es_url, tag, dataset_version):
    """Submit job for orbit dataset generation."""

    job_spec = "job-s1_orbit_ingest:%s" % tag
    job_name = "%s-%s" % (job_spec, id)
    job_name = job_name.lstrip('job-')

    #Setup input arguments here
    rule = {
        "rule_name": "s1_orbit_ingest",
        "queue": "factotum-job_worker-large",
        "priority": 0,
        "kwargs": '{}'
    }
    params = [{
        "name": "version_opt",
        "from": "value",
        "value": "--dataset_version",
    }, {
        "name": "version",
        "from": "value",
        "value": dataset_version,
    }, {
        "name": "orbit_url",
        "from": "value",
        "value": url,
    }, {
        "name": "orbit_file",
        "from": "value",
        "value": os.path.basename(url),
    }, {
        "name": "es_dataset_url",
        "from": "value",
        "value": ds_es_url,
    }]
    print("submitting orbit ingest job for %s" % id)
    submit_mozart_job({},
                      rule,
                      hysdsio={
                          "id": "internal-temporary-wiring",
                          "params": params,
                          "job-specification": job_spec
                      },
                      job_name=job_name)
def submit_job(start_time, end_time):

    tag = "master"
    ds_es_url = "http://128.149.127.152:9200/grq_v2.0_acquisition-s1-iw_slc/acquisition-S1-IW_SLC"
    job_spec = "job-acquisition_ingest-scihub:{}".format(
        tag)  #job-acquisition_ingest-scihub:dev-malarout
    job_name = "%s-%s-%s" % (job_spec, start_time.replace("-", "").replace(
        ":", ""), end_time.replace("-", "").replace(":", ""))

    # Setup input arguments here
    rule = {
        "rule_name": "acquistion_ingest-scihub",
        "queue": "factotum-job_worker-apihub_scraper_throttled",
        "priority": 0,
        "kwargs": '{}'
    }

    params = [{
        "name": "es_dataset_url",
        "from": "value",
        "value": ds_es_url,
    }, {
        "name": "ds_cfg",
        "from": "value",
        "value": "datasets.json"
    }, {
        "name": "starttime",
        "from": "value",
        "value": start_time
    }, {
        "name": "endtime",
        "from": "value",
        "value": end_time
    }, {
        "name": "ingest_flag",
        "from": "value",
        "value": "--ingest"
    }]

    id = submit_mozart_job({},
                           rule,
                           hysdsio={
                               "id": "internal-temporary-wiring",
                               "params": params,
                               "job-specification": job_spec
                           },
                           job_name=job_name)

    print("Submitted job for window {} to {}, JOB ID: {}".format(
        start_time, end_time, id))
Ejemplo n.º 5
0
def submit_ipf_scraper(acq, tag, endpoint):
    params = [
        {
            "name": "acq_id",
            "from": "value",
            "value": acq.get("id")
        },
        {
            "name": "acq_met",
            "from": "value",
            "value": acq.get("metadata")
        },
        {
            "name": "index",
            "from": "value",
            "value": "grq_v2.0_acquisition-s1-iw_slc"
        },
        {
            "name": "dataset_type",
            "from": "value",
            "value": "acquisition-S1-IW_SLC"
        },
        {
            "name": "endpoint",
            "from": "value",
            "value": endpoint
        },
        {
            "name": "ds_cfg",
            "from": "value",
            "value": "datasets.json"
        }
    ]

    rule = {
        "rule_name": "ipf_scraper_{}".format(endpoint),
        "queue": job_queues.get(endpoint),
        "priority": '5',
        "kwargs": '{}'
    }

    # based on the

    print('submitting jobs with params:')
    print(json.dumps(params, sort_keys=True, indent=4, separators=(',', ': ')))
    mozart_job_id = submit_mozart_job({}, rule, hysdsio={"id": "internal-temporary-wiring", "params": params,
                                                         "job-specification": "{}:{}".format(job_types.get(endpoint),tag)},
                                      job_name='%s-%s-%s' % (job_types.get(endpoint), acq.get("id"), tag))
    print("For {} , IPF scrapper Job ID: {}".format(acq.get("id"), mozart_job_id))
Ejemplo n.º 6
0
def submit_scrubber_job(params):

    rule = {
        "rule_name": "bos_sarcat_scrubber",
        "queue": CRAWLER_QUEUE,
        "priority": '8',
        "kwargs": '{}'
    }

    print('submitting jobs with params:')
    print(json.dumps(params, sort_keys=True, indent=4, separators=(',', ': ')))
    mozart_job_id = submit_mozart_job({}, rule, hysdsio={"id": "internal-temporary-wiring", "params": params,
                                                         "job-specification": "job-scrub_outdated_bos_acqs:master"},
                                      job_name='job_%s-%s' % ('scrub_outdated_bos_acqs', "master"),
                                      enable_dedup=False)

    LOGGER.info("Job ID: " + mozart_job_id)
    print("Job ID: " + mozart_job_id)
    return
Ejemplo n.º 7
0
def submit_aoi_ipf(aoi):
    params = [{
        "name": "AOI_name",
        "from": "value",
        "value": aoi.get("id")
    }, {
        "name": "spatial_extent",
        "from": "value",
        "value": aoi.get("location")
    }, {
        "name": "start_time",
        "from": "value",
        "value": aoi.get("starttime")
    }, {
        "name": "end_time",
        "from": "value",
        "value": aoi.get("endtime")
    }]

    rule = {
        "rule_name": "{}_ipf_scraper".format(aoi.get("id")),
        "queue": "factotum-job_worker-apihub_scraper_throttled",
        "priority": '4',
        "kwargs": '{}'
    }

    print('submitting jobs with params:')
    print(json.dumps(params, sort_keys=True, indent=4, separators=(',', ': ')))
    mozart_job_id = submit_mozart_job(
        {},
        rule,
        hysdsio={
            "id": "internal-temporary-wiring",
            "params": params,
            "job-specification": "job-AOI_based_ipf_submitter:master"
        },
        job_name='job-%s-%s-%s' %
        ("aoi_ipf_submitter", aoi.get("id"), "master"),
        enable_dedup=False)
    print("For {} , AOI IPF Submitter Job ID: {}".format(
        aoi.get("_id"), mozart_job_id))
Ejemplo n.º 8
0
def submit_global_ipf(spatial_extent, start_time, end_time, release):
    params = [{
        "name": "AOI_name",
        "from": "value",
        "value": "Global"
    }, {
        "name": "spatial_extent",
        "from": "value",
        "value": spatial_extent
    }, {
        "name": "start_time",
        "from": "value",
        "value": start_time
    }, {
        "name": "end_time",
        "from": "value",
        "value": end_time
    }]

    rule = {
        "rule_name": "{}_ipf_scraper".format("global"),
        "queue": "factotum-job_worker-apihub_scraper_throttled",
        "priority": '5',
        "kwargs": '{}'
    }

    print('submitting jobs with params:')
    print(json.dumps(params, sort_keys=True, indent=4, separators=(',', ': ')))
    mozart_job_id = submit_mozart_job(
        {},
        rule,
        hysdsio={
            "id": "internal-temporary-wiring",
            "params": params,
            "job-specification":
            "job-AOI_based_ipf_submitter:{}".format(release)
        },
        job_name='job-%s-%s-%s' % ("ipf_submitter", "global", release),
        enable_dedup=False)
    print("For {} , IPF Submitter Job ID: {}".format("Global", mozart_job_id))
def submit_job(start, end, job_queue):
    tag = 'master'
    job_spec = 'job-bos_ingest:%s' % tag

    start_time_tag = start.replace('-', '').replace(':', '')
    end_time_tag = end.replace('-', '').replace(':', '')
    job_name = '%s-%s-%s' % (job_spec, start_time_tag, end_time_tag)

    # Setup input arguments here
    rule = {
        'rule_name': 'bos_sarcat_scraper',
        'queue': job_queue,
        'priority': '7',
        'kwargs': '{}'
    }

    params = [{
        'name': 'bos_ingest_time',
        'from': 'value',
        'value': ''
    }, {
        'name': 'from_time',
        'from': 'value',
        'value': start
    }, {
        'name': 'end_time',
        'from': 'value',
        'value': end
    }]

    hysds_io = {
        'id': 'internal-temporary-wiring',
        'params': params,
        'job-specification': job_spec
    }
    job_id = submit_mozart_job({}, rule, hysdsio=hysds_io, job_name=job_name)

    print('Submitted job for window {} to {}, JOB ID: {}'.format(
        start_time, end_time, job_id))
    print('Submitted job for window {} to {}'.format(start_time, end_time, id))
def submit_sling(ctx_file):
    """Submit sling for S1 SLC from acquisition."""

    # get context
    with open(ctx_file) as f:
        ctx = json.load(f)
    logger.info("ctx: {}".format(json.dumps(ctx, indent=2)))

    # get ES url
    es_url = app.conf.GRQ_ES_URL
    index = "grq"

    # get ids
    ids = ctx['ids']

    # build query
    query = {
        "query": {
            "ids": {
                "type": "acquisition-S1-IW_SLC",
                "values": ids,
            }
        },
        "partial_fields": {
            "partial": {
                "exclude": ["city", "context", "metadata.context"],
            }
        }
    }

    # query
    r = requests.post(
        "%s/grq_*_acquisition-s1-iw_slc/_search?search_type=scan&scroll=60&size=100"
        % es_url,
        data=json.dumps(query))
    r.raise_for_status()
    scan_result = r.json()
    scroll_id = scan_result['_scroll_id']
    matches = []
    while True:
        r = requests.post('%s/_search/scroll?scroll=60m' % es_url,
                          data=scroll_id)
        res = r.json()
        scroll_id = res['_scroll_id']
        if len(res['hits']['hits']) == 0: break
        matches.extend(
            [i['fields']['partial'][0] for i in res['hits']['hits']])
    #logger.info("matches: {}".format([m['_id'] for m in matches]))
    logger.info("matches: {}".format(len(matches)))
    #logger.info("matches[-1]: {}".format(json.dumps(matches[-1], indent=2)))

    #required params for job submission
    qtype = "scihub"
    job_type = "job:spyddder-sling_%s" % qtype
    oauth_url = None
    queue = "factotum-job_worker-%s_throttled" % qtype  # job submission queue

    # loop over acquisitions and submit sling jobs
    for res in matches:
        id = res['id']

        # filter non acquisitions
        if res.get('dataset', None) != "acquisition-S1-IW_SLC":
            logger.info("Skipping invalid acquisition dataset: {}".format(id))

        # get metadata
        md = res['metadata']

        # build payload items for job submission
        archive_fname = md['archive_filename']
        title, ext = archive_fname.split('.')
        start_dt = get_date(res['starttime'])
        yr = start_dt.year
        mo = start_dt.month
        dy = start_dt.day
        logger.info("starttime: {}".format(start_dt))
        md5 = hashlib.md5("{}\n".format(archive_fname)).hexdigest()
        repo_url = "{}/{}/{}/{}/{}/{}".format(ctx['repo_url'], md5[0:8],
                                              md5[8:16], md5[16:24],
                                              md5[24:32], archive_fname)
        logger.info("repo_url: {}".format(repo_url))
        prod_met = {}
        prod_met['source'] = qtype
        prod_met['dataset_type'] = title[0:3]
        prod_met['spatial_extent'] = {
            'type': 'polygon',
            'aoi': None,
            'coordinates': res['location']['coordinates'],
        }
        prod_met['tag'] = []

        #set sling job spec release/branch
        #job_spec = "job-sling:release-20170619"
        job_spec = "job-sling:{}".format(ctx['sling_release'])
        rtime = datetime.utcnow()
        job_name = "%s-%s-%s-%s" % (job_spec, queue, archive_fname,
                                    rtime.strftime("%d_%b_%Y_%H:%M:%S"))
        job_name = job_name.lstrip('job-')

        #Setup input arguments here
        rule = {
            "rule_name": job_spec,
            "queue": queue,
            "priority": ctx.get('job_priority', 0),
            "kwargs": '{}'
        }
        params = [{
            "name": "download_url",
            "from": "value",
            "value": md['download_url'],
        }, {
            "name": "repo_url",
            "from": "value",
            "value": repo_url,
        }, {
            "name": "prod_name",
            "from": "value",
            "value": title,
        }, {
            "name": "file_type",
            "from": "value",
            "value": ext,
        }, {
            "name": "prod_date",
            "from": "value",
            "value": "{}".format("%04d-%02d-%02d" % (yr, mo, dy)),
        }, {
            "name": "prod_met",
            "from": "value",
            "value": prod_met,
        }, {
            "name": "options",
            "from": "value",
            "value": "--force_extract"
        }]

        logger.info("rule: {}".format(json.dumps(rule, indent=2)))
        logger.info("params: {}".format(json.dumps(params, indent=2)))

        submit_mozart_job({},
                          rule,
                          hysdsio={
                              "id": "internal-temporary-wiring",
                              "params": params,
                              "job-specification": job_spec
                          },
                          job_name=job_name)
Ejemplo n.º 11
0
def iterate(component, rule):
    """
    Iterator used to iterate across a query result and submit jobs for every hit
    @param component - "mozart" or "tosca" where this submission came from
    @param rule - rule containing information for running jobs, note - NOT A USER RULE
    """
    ids = []  # Accumulators variables
    error_count = 0
    errors = []

    es_index, ignore1 = get_component_config(
        component)  # Read config from "origin"

    # Read in JSON formatted args and setup passthrough
    if 'query' in rule.get('query', {}):
        queryobj = rule["query"]
    else:
        queryobj = {"query": rule["query"]}
        rule['query'] = {"query": rule['query']}
    logger.info("Elasticsearch queryobj: %s" % json.dumps(queryobj))

    # Get hysds_ios wiring
    hysds_io_index = HYSDS_IOS_MOZART if component in (
        'mozart', 'figaro') else HYSDS_IOS_GRQ
    hysdsio = mozart_es.get_by_id(index=hysds_io_index, id=rule["job_type"])
    hysdsio = hysdsio['_source']

    # Is this a single submission
    passthru = rule.get('passthru_query', False)
    single = hysdsio.get(
        "submission_type",
        "individual" if passthru is True else "iteration") == "individual"
    logger.info("single submission type: %s" % single)

    # Do we need the results
    run_query = False if single else True
    if not run_query:  # check if we need the results anyway
        run_query = any(
            (i["from"].startswith('dataset_jpath') for i in hysdsio["params"]))
    logger.info("run_query: %s" % run_query)

    # Run the query to get the products; for efficiency, run query only if we need the results
    results = [{"_id": "Transient Faux-Results"}]
    if run_query:
        if component == "mozart" or component == "figaro":
            results = mozart_es.query(index=es_index, body=queryobj)
        else:
            results = grq_es.query(index=es_index, body=queryobj)

    # What to iterate for submission
    submission_iterable = [{
        "_id": "Global Single Submission"
    }] if single else results

    # Iterator loop
    for item in submission_iterable:
        try:
            # For single submissions, submit all results as one
            product = results if single else item
            logger.info("Submitting mozart job for product: %s" % product)

            # set clean descriptive job name
            job_type = rule['job_type']
            if job_type.startswith('hysds-io-'):
                job_type = job_type.replace('hysds-io-', '', 1)
            if isinstance(product, dict):
                job_name = "%s-%s" % (job_type, product.get('_id', 'unknown'))
            else:
                job_name = "%s-single_submission" % job_type

            # get enable_dedup flag: rule > hysdsio
            if rule.get("enable_dedup") is None:
                rule['enable_dedup'] = hysdsio.get("enable_dedup", True)

            task_id = submit_mozart_job(product,
                                        rule,
                                        hysdsio,
                                        job_name=job_name)
            ids.append(task_id)

        except Exception as e:
            error_count = error_count + 1
            if not str(e) in errors:
                errors.append(str(e))
            logger.warning("Failed to submit jobs: {0}:{1}".format(
                type(e), str(e)))
            logger.warning(traceback.format_exc())

    if error_count > 0:
        logger.error("Failed to submit: {0} of {1} jobs. {2}".format(
            error_count, len(list(results)), " ".join(errors)))
        raise Exception("Job Submitter Job failed to submit all actions")
Ejemplo n.º 12
0
        "name": "version_opt",
        "from": "value",
        "value": "--dataset_version",
    }, {
        "name": "version",
        "from": "value",
        "value": dataset_version,
    }, {
        "name": "tag_opt",
        "from": "value",
        "value": "--tag",
    }, {
        "name": "tag",
        "from": "value",
        "value": tag,
    }, {
        "name": "es_dataset_url",
        "from": "value",
        "value": ds_es_url,
    }]
    print("submitting %s crawler job" % qc_type)
    submit_mozart_job({},
                      rule,
                      hysdsio={
                          "id": "internal-temporary-wiring",
                          "params": params,
                          "job-specification": job_spec
                      },
                      job_name=job_name,
                      enable_dedup=False)
Ejemplo n.º 13
0
            "value": acq_data["metadata"]["archive_filename"]
        },
        {
            "name": "prod_met",
            "from": "value",
            "value": acq_data["metadata"]
        }
    ]
    

    logger.info("PARAMS : %s" %params)
    logger.info("RULE : %s"%rule)
    logger.info(job_type)
    logger.info(sling_job_name)

    mozart_job_id = submit_mozart_job({}, rule,hysdsio={"id": "internal-temporary-wiring", "params": params, "job-specification": job_type}, job_name=sling_job_name)
    logger.info("\nSubmitted sling job with id %s for  %s" %(acq_data["metadata"]["identifier"], mozart_job_id))

    return mozart_job_id

def check_ES_status(doc_id):
    """
    There is a latency in the update of ES job status after
    celery signals job completion.
    To handle that case, we much poll ES (after sciflo returns status after blocking)
    until the job status is correctly reflected.
    :param doc_id: ID of the Job ES doc
    :return: True  if the ES has updated job status within 5 minutes
            otherwise raise a run time error
    """
    es_url = app.conf['JOBS_ES_URL']
Ejemplo n.º 14
0
def submit_qquery_job(region,
                      query_endpoint,
                      dns_list,
                      qquery_rtag,
                      sling_rtag,
                      pds_queue=None):

    # set query priority
    priority = 0
    if "priority" in region["metadata"].keys():
        priority = int(region["metadata"]["priority"])

    # determine qquery job submission branch
    job_header = 'job-qquery-opds' if pds_queue else 'job-qquery'
    job_spec = job_header + ":" + qquery_rtag

    # determine the repo to query from the types_map in the aoi
    for qtype in region["metadata"]["query"].keys(
    ):  # list of endpoints to query
        if qtype != query_endpoint:
            continue
        p = priority
        if priority == 0 and "priority" in region["metadata"]["query"][
                qtype].keys():
            p = int(region["metadata"]["query"][qtype]["priority"])

        rtime = datetime.datetime.utcnow()
        job_name = "%s-%s-%s-%s" % (job_spec, qtype, region["id"],
                                    rtime.strftime("%d_%b_%Y_%H:%M:%S"))
        job_name = job_name.lstrip('job-')
        # Setup input arguments here
        rule = {
            "rule_name": "qquery",
            "queue":
            "factotum-job_worker-%s_throttled" % qtype,  # job submission queue
            "priority": p,
            "kwargs": '{}'
        }
        params = [{
            "name": "aoi",
            "from": "value",
            "value": "{}".format(region["id"]),
        }, {
            "name": "endpoint",
            "from": "value",
            "value": "{}".format(qtype),
        }, {
            "name": "dns_list",
            "from": "value",
            "value": "{}".format(dns_list),
        }, {
            "name": "sling_version",
            "from": "value",
            "value": "{}".format(sling_rtag),
        }]
        if pds_queue:
            queue = {
                "name": "pds_queue",
                "from": "value",
                "value": "{}".format(pds_queue),
            }
            params.append(queue)

        # for each aoi and endpoint, submit a query job
        print("{0: <60}:  {1}".format(
            "Submitting %s query job for %s over aoi" % (job_header, qtype),
            region["id"]))
        submit_mozart_job({},
                          rule,
                          hysdsio={
                              "id": "internal-temporary-wiring",
                              "params": params,
                              "job-specification": job_spec
                          },
                          job_name=job_name,
                          enable_dedup=False)
Ejemplo n.º 15
0
def submit_sling_job(project, spyddder_extract_version, acquisition_localizer_versions, acq_data, priority):

    """Map function for spyddder-man extract job."""

    acquisition_localizer_version = "standard-product"
    spyddder_extract_version = "standard-product"
    job_submit_url = '%s/mozart/api/v0.1/job/submit' % MOZART_URL

    # set job type and disk space reqs
    job_type = "job-acquisition_localizer:{}".format(acquisition_localizer_versions)

     # set job type and disk space reqs
    disk_usage = "300GB"
    #logger.info(acq_data)
    #acq_id = acq_data['acq_id']

    # set job queue based on project
    #job_queue = "%s-job_worker-large" % project
    job_queue = "factotum-job_worker-small" 
    rule = {
        "rule_name": "standard-product-sling",
        "queue": job_queue,
        "priority": '5',
        "kwargs":'{}'
    }

    sling_job_name = "standard_product-%s-%s" %(job_type, acq_data["metadata"]["identifier"])


    params = [
	{
            "name": "workflow",
            "from": "value",
            "value": "acquisition_localizer.sf.xml"
        },
        {
            "name": "project",
            "from": "value",
            "value": project
        },
        {
            "name": "spyddder_extract_version",
            "from": "value",
            "value": spyddder_extract_version
        },
        {
            "name": "dataset_type",
            "from": "value",
            "value": acq_data["dataset_type"]
        },
        {
            "name": "dataset",
            "from": "value",
            "value": acq_data["dataset"]
        },
        {
            "name": "identifier",
            "from": "value",
            "value": acq_data["metadata"]["identifier"]
        },
        {
            "name": "download_url",
            "from": "value",
            "value": acq_data["metadata"]["download_url"]
        },
        {
            "name": "archive_filename",
            "from": "value",
            "value": acq_data["metadata"]["archive_filename"]
        },
        {
            "name": "prod_met",
            "from": "value",
            "value": acq_data["metadata"]
        }
    ]
    

    logger.info("PARAMS : %s" %params)
    logger.info("RULE : %s"%rule)
    logger.info(job_type)
    logger.info(sling_job_name)

    mozart_job_id = submit_mozart_job({}, rule,hysdsio={"id": "internal-temporary-wiring", "params": params, "job-specification": job_type}, job_name=sling_job_name)
    logger.info("\nSubmitted sling job with id %s for  %s" %(acq_data["metadata"]["identifier"], mozart_job_id))

    return mozart_job_id
Ejemplo n.º 16
0
    def submit_sling_job(self,
                         aoi,
                         query_params,
                         qtype,
                         queue_grp,
                         title,
                         link,
                         rtag=None,
                         pds_queue=None):
        #Query for all products, and return a list of (Title,URL)
        yr, mo, dy = self.getDataDateFromTitle(title)  #date
        filename = title + "." + self.getFileType()

        if not pds_queue:
            # build payload items for job submission
            tags = query_params["tag"]
            md5 = hashlib.md5("{0}.{1}\n".format(
                title, self.getFileType())).hexdigest()
            cfg = config()  # load settings.json
            repo_url = "%s/%s/%s/%s/%s/%s.%s" % (
                cfg["repository-base"], md5[0:8], md5[8:16], md5[16:24],
                md5[24:32], title, self.getFileType())
            location = {}
            location['type'] = 'polygon'
            location['aoi'] = aoi['id']
            location['coordinates'] = aoi['location']['coordinates']
            prod_met = {}
            prod_met['source'] = qtype
            prod_met['dataset_type'] = title[0:3]
            prod_met['spatial_extent'] = location
            prod_met['tag'] = tags
            queue = "factotum-job_worker-%s_throttled" % (
                qtype + str(queue_grp))  # job submission queue
            job_header = 'job-sling:'
            dedup_key = DEDUP_KEY
            params = [{
                "name": "download_url",
                "from": "value",
                "value": link,
            }, {
                "name": "repo_url",
                "from": "value",
                "value": repo_url,
            }, {
                "name": "prod_name",
                "from": "value",
                "value": title,
            }, {
                "name": "file_type",
                "from": "value",
                "value": self.getFileType(),
            }, {
                "name": "prod_date",
                "from": "value",
                "value": "{}".format("%s-%s-%s" % (yr, mo, dy)),
            }, {
                "name": "prod_met",
                "from": "value",
                "value": prod_met,
            }, {
                "name": "options",
                "from": "value",
                "value": "--force_extract"
            }]
        else:
            # queue = "opds-%s-job_worker-small" % (qtype)
            queue = pds_queue  # job submission queue, no queue group for autoscalers
            job_header = 'job-sling-extract-opds:'
            dedup_key = DEDUP_KEY_PDS
            params = [{
                "name": "download_url",
                "from": "value",
                "value": link,
            }, {
                "name": "prod_name",
                "from": "value",
                "value": "%s-pds" % title,
            }, {
                "name": "file",
                "from": "value",
                "value": filename,
            }, {
                "name": "prod_date",
                "from": "value",
                "value": "{}".format("%s-%s-%s" % (yr, mo, dy)),
            }]

        #set sling job spec release/branch
        if rtag is None:
            try:
                with open('_context.json') as json_data:
                    context = json.load(json_data)
                job_spec = job_header + context['job_specification'][
                    'job-version']
            except:
                print('Failed on loading context.json')
        else:
            job_spec = job_header + rtag

        rtime = datetime.datetime.utcnow()
        job_name = "%s-%s-%s-%s-%s" % (job_spec, queue, title,
                                       rtime.strftime("%d_%b_%Y_%H:%M:%S"),
                                       aoi['id'])
        job_name = job_name.lstrip('job-')
        priority = query_params["priority"]

        #Setup input arguments here
        rule = {
            "rule_name": job_spec,
            "queue": queue,
            "priority": priority,
            "kwargs": '{}'
        }

        #check for dedup, if clear, submit job
        if not self.deduplicate(filename, dedup_key):
            submit_mozart_job({},
                              rule,
                              hysdsio={
                                  "id": "internal-temporary-wiring",
                                  "params": params,
                                  "job-specification": job_spec
                              },
                              job_name=job_name)
        else:
            location = " to OpenDataset" if pds_queue else "to own bucket"
            reason = "in OpenDataset" if pds_queue else "in OpenDataset or own bucket"
            print(
                "Will not submit sling job {0} to {1}, already processed {2}".
                format(title, location, reason))
Ejemplo n.º 17
0
    def submit_sling_job(self,
                         aoi,
                         query_params,
                         qtype,
                         queue_grp,
                         title,
                         link,
                         rtag=None):
        #Query for all products, and return a list of (Title,URL)
        cfg = config()  #load settings.json
        priority = query_params["priority"]
        products = query_params["products"]
        tags = query_params["tag"]

        #build payload items for job submission
        yr, mo, dy = self.getDataDateFromTitle(title)  #date
        md5 = hashlib.md5("{0}.{1}\n".format(title,
                                             self.getFileType())).hexdigest()
        repo_url = "%s/%s/%s/%s/%s/%s.%s" % (cfg["repository-base"], md5[0:8],
                                             md5[8:16], md5[16:24], md5[24:32],
                                             title, self.getFileType())
        location = {}
        location['type'] = 'polygon'
        location['aoi'] = aoi['id']
        location['coordinates'] = aoi['location']['coordinates']
        prod_met = {}
        prod_met['source'] = qtype
        prod_met['dataset_type'] = title[0:3]
        prod_met['spatial_extent'] = location
        prod_met['tag'] = tags

        #required params for job submission
        if hasattr(self, 'getOauthUrl'):
            #sling via oauth
            oauth_url = self.getOauthUrl()
            job_type = "job:spyddder-sling-oauth_%s" % qtype
            job_name = "spyddder-sling-oauth_%s-%s-%s.%s" % (
                qtype, aoi['id'], title, self.getFileType())
        else:
            #normal sling
            job_type = "job:spyddder-sling_%s" % qtype
            job_name = "spyddder-sling_%s-%s-%s.%s" % (qtype, aoi['id'], title,
                                                       self.getFileType())
            oauth_url = None
        queue = "factotum-job_worker-%s_throttled" % (qtype + str(queue_grp)
                                                      )  # job submission queue

        #set sling job spec release/branch
        if rtag is None:
            try:
                with open('_context.json') as json_data:
                    context = json.load(json_data)
                job_spec = 'job-sling:' + context['job_specification'][
                    'job-version']
            except:
                print('Failed on loading context.json')
        else:
            job_spec = 'job-sling:' + rtag

        rtime = datetime.datetime.utcnow()
        job_name = "%s-%s-%s-%s-%s" % (job_spec, queue, title,
                                       rtime.strftime("%d_%b_%Y_%H:%M:%S"),
                                       aoi['id'])
        job_name = job_name.lstrip('job-')

        #Setup input arguments here
        rule = {
            "rule_name": job_spec,
            "queue": queue,
            "priority": priority,
            "kwargs": '{}'
        }
        params = [{
            "name": "download_url",
            "from": "value",
            "value": link,
        }, {
            "name": "repo_url",
            "from": "value",
            "value": repo_url,
        }, {
            "name": "prod_name",
            "from": "value",
            "value": title,
        }, {
            "name": "file_type",
            "from": "value",
            "value": self.getFileType(),
        }, {
            "name": "prod_date",
            "from": "value",
            "value": "{}".format("%s-%s-%s" % (yr, mo, dy)),
        }, {
            "name": "prod_met",
            "from": "value",
            "value": prod_met,
        }, {
            "name": "options",
            "from": "value",
            "value": "--force_extract"
        }]
        #check for dedup, if clear, submit job
        if not self.deduplicate(title + "." + self.getFileType()):
            submit_mozart_job({},
                              rule,
                              hysdsio={
                                  "id": "internal-temporary-wiring",
                                  "params": params,
                                  "job-specification": job_spec
                              },
                              job_name=job_name)
        else:
            print(
                "Will not submit sling job for {0}, already processed".format(
                    title))
Ejemplo n.º 18
0
            "name": "ds_cfg",
            "from": "value",
            "value": "datasets.json"
        }, {
            "name": "starttime",
            "from": "value",
            "value": starttime
        }, {
            "name": "endtime",
            "from": "value",
            "value": endtime
        }, {
            "name": "create_flag",
            "from": "value",
            "value": "--create"
        }]

        print("submitting scraper job for %s" % qtype)
        submit_mozart_job({},
                          rule,
                          hysdsio={
                              "id": "internal-temporary-wiring",
                              "params": params,
                              "job-specification": job_spec
                          },
                          job_name=job_name,
                          soft_time_limit=604800,
                          time_limit=605100)

        mis_date += timedelta(days=1)
Ejemplo n.º 19
0
def submit_sling_job(spyddder_extract_version, acquisition_localizer_version,
                     esa_download_queue, asf_ngap_download_queue, acq_data,
                     priority):
    """Map function for spyddder-man extract job."""

    #acquisition_localizer_version = "master"
    #spyddder_extract_version = "develop"
    job_submit_url = '%s/mozart/api/v0.1/job/submit' % MOZART_URL

    # set job type and disk space reqs
    job_type = "job-acquisition_localizer_single:{}".format(
        acquisition_localizer_version)
    logger.info("\nSubmitting job of type : %s" % job_type)
    # set job type and disk space reqs
    disk_usage = "10GB"
    #logger.info(acq_data)
    #acq_id = acq_data['acq_id']

    # set job queue
    job_queue = "system-job_worker-small"
    rule = {
        "rule_name": "acquisition_localizer_multi-sling",
        "queue": job_queue,
        "priority": priority,
        "kwargs": '{}'
    }

    sling_job_name = "sling-%s-%s" % (job_type,
                                      acq_data["metadata"]["identifier"])

    params = [{
        "name": "workflow",
        "from": "value",
        "value": "acquisition_localizer.sf.xml"
    }, {
        "name": "asf_ngap_download_queue",
        "from": "value",
        "value": asf_ngap_download_queue
    }, {
        "name": "esa_download_queue",
        "from": "value",
        "value": esa_download_queue
    }, {
        "name": "spyddder_extract_version",
        "from": "value",
        "value": spyddder_extract_version
    }, {
        "name": "dataset_type",
        "from": "value",
        "value": acq_data["dataset_type"]
    }, {
        "name": "dataset",
        "from": "value",
        "value": acq_data["dataset"]
    }, {
        "name": "identifier",
        "from": "value",
        "value": acq_data["metadata"]["identifier"]
    }, {
        "name": "download_url",
        "from": "value",
        "value": acq_data["metadata"]["download_url"]
    }, {
        "name": "archive_filename",
        "from": "value",
        "value": acq_data["metadata"]["archive_filename"]
    }, {
        "name": "prod_met",
        "from": "value",
        "value": acq_data["metadata"]
    }]

    logger.info("PARAMS : %s" % params)
    logger.info("RULE : %s" % rule)
    logger.info(job_type)
    logger.info(sling_job_name)

    mozart_job_id = submit_mozart_job({},
                                      rule,
                                      hysdsio={
                                          "id": "internal-temporary-wiring",
                                          "params": params,
                                          "job-specification": job_type
                                      },
                                      job_name=sling_job_name)
    logger.info("\nSubmitted sling job with id %s for  %s" %
                (acq_data["metadata"]["identifier"], mozart_job_id))

    return mozart_job_id
Ejemplo n.º 20
0
def submit_sling(ctx_file):
    """Submit sling for S1 SLC from acquisition."""

    # get context
    with open(ctx_file) as f:
        ctx = json.load(f)
    logger.info("ctx: {}".format(json.dumps(ctx, indent=2)))

    # filter non acquisitions
    if ctx.get('source_dataset', None) != "acquisition-S1-IW_SLC":
        raise RuntimeError("Skipping invalid acquisition dataset.")

    # build payload items for job submission
    qtype = "scihub"
    archive_fname = ctx['archive_filename']
    title, ext = archive_fname.split('.')
    start_dt = get_date(ctx['starttime'])
    yr = start_dt.year
    mo = start_dt.month
    dy = start_dt.day
    logger.info("starttime: {}".format(start_dt))
    md5 = hashlib.md5("{}\n".format(archive_fname)).hexdigest()
    repo_url = "{}/{}/{}/{}/{}/{}".format(ctx['repo_url'], md5[0:8], md5[8:16],
                                          md5[16:24], md5[24:32],
                                          archive_fname)
    logger.info("repo_url: {}".format(repo_url))
    prod_met = {}
    prod_met['source'] = qtype
    prod_met['dataset_type'] = title[0:3]
    prod_met['spatial_extent'] = {
        'type': 'polygon',
        'aoi': None,
        'coordinates': ctx['prod_met']['location']['coordinates'],
    }
    prod_met['tag'] = []

    #required params for job submission
    job_type = "job:spyddder-sling_%s" % qtype
    oauth_url = None
    queue = "factotum-job_worker-%s_throttled" % qtype  # job submission queue

    #set sling job spec release/branch
    #job_spec = "job-sling:release-20170619"
    job_spec = "job-sling:{}".format(ctx['sling_release'])
    rtime = datetime.utcnow()
    job_name = "%s-%s-%s-%s" % (job_spec, queue, archive_fname,
                                rtime.strftime("%d_%b_%Y_%H:%M:%S"))
    job_name = job_name.lstrip('job-')

    #Setup input arguments here
    rule = {
        "rule_name": job_spec,
        "queue": queue,
        "priority": ctx.get('job_priority', 0),
        "kwargs": '{}'
    }
    params = [{
        "name": "download_url",
        "from": "value",
        "value": ctx['download_url'],
    }, {
        "name": "repo_url",
        "from": "value",
        "value": repo_url,
    }, {
        "name": "prod_name",
        "from": "value",
        "value": title,
    }, {
        "name": "file_type",
        "from": "value",
        "value": ext,
    }, {
        "name": "prod_date",
        "from": "value",
        "value": "{}".format("%04d-%02d-%02d" % (yr, mo, dy)),
    }, {
        "name": "prod_met",
        "from": "value",
        "value": prod_met,
    }, {
        "name": "options",
        "from": "value",
        "value": "--force_extract"
    }]

    logger.info("rule: {}".format(json.dumps(rule, indent=2)))
    logger.info("params: {}".format(json.dumps(params, indent=2)))

    submit_mozart_job({},
                      rule,
                      hysdsio={
                          "id": "internal-temporary-wiring",
                          "params": params,
                          "job-specification": job_spec
                      },
                      job_name=job_name)