コード例 #1
0
ファイル: log_utils.py プロジェクト: fgreg/hysds
def log_publish_prov_es(prov_es_info, prov_es_file, prod_path, pub_urls,
                        prod_metrics, objectid):
    """Log publish step in PROV-ES document."""

    # create PROV-ES doc
    doc = ProvEsDocument(namespaces=prov_es_info['prefix'])

    # get bundle
    #bndl = doc.bundle(bundle_id)
    bndl = None

    # add input entity
    execute_node = socket.getfqdn()
    prod_url = "file://%s%s" % (execute_node, prod_path)
    input_id = "hysds:%s" % get_uuid(prod_url)
    input_ent = doc.granule(input_id,
                            None, [prod_url], [],
                            None,
                            None,
                            None,
                            label=os.path.basename(prod_url),
                            bundle=bndl)

    # add output entity
    output_id = "hysds:%s" % get_uuid(pub_urls[0])
    output_ent = doc.product(output_id,
                             None, [pub_urls[0]], [],
                             None,
                             None,
                             None,
                             label=objectid,
                             bundle=bndl)

    # software and algorithm
    algorithm = "eos:product_publishing"
    software_version = hysds.__version__
    software_title = "%s v%s" % (hysds.__description__, software_version)
    software = "eos:HySDS-%s" % software_version
    software_location = hysds.__url__
    doc.software(software, [algorithm],
                 software_version,
                 label=software_title,
                 location=software_location,
                 bundle=bndl)

    # create sofware agent
    pid = os.getpid()
    sa_label = "hysds:publish_dataset/%s/%d/%s" % (execute_node, pid,
                                                   prod_metrics['time_start'])
    sa_id = "hysds:%s" % get_uuid(sa_label)
    doc.softwareAgent(sa_id,
                      str(pid),
                      execute_node,
                      role="invoked",
                      label=sa_label,
                      bundle=bndl)

    # create processStep
    job_id = "publish_dataset-%s" % os.path.basename(prod_path)
    doc.processStep("hysds:%s" % get_uuid(job_id),
                    prod_metrics['time_start'],
                    prod_metrics['time_end'], [software],
                    sa_id,
                    None, [input_id], [output_id],
                    label=job_id,
                    bundle=bndl,
                    prov_type="hysds:publish_dataset")

    # get json
    pd = json.loads(doc.serialize())

    # update input entity
    orig_ent = prov_es_info.get('entity', {}).get(input_id, {})
    pd['entity'][input_id].update(orig_ent)

    # update output entity
    for attr in orig_ent:
        if attr in ('prov:location', 'prov:label', 'prov:type'):
            continue
        pd['entity'][output_id][attr] = orig_ent[attr]

    # write prov
    with open(prov_es_file, 'w') as f:
        json.dump(pd, f, indent=2)
コード例 #2
0
ファイル: log_utils.py プロジェクト: fgreg/hysds
def log_prov_es(job, prov_es_info, prov_es_file):
    """Log PROV-ES document. Create temp PROV-ES document to populate 
       attributes that only the worker has access to (e.g. PID)."""

    # create PROV-ES doc to generate attributes that only verdi know
    ps_id = "hysds:%s" % get_uuid(job['job_id'])
    bundle_id = "hysds:%s" % get_uuid('bundle-%s' % job['job_id'])
    doc = ProvEsDocument()

    # get bundle
    #bndl = doc.bundle(bundle_id)
    bndl = None

    # create sofware agent
    sa_label = "hysds:pge_wrapper/%s/%d/%s" % (job['job_info']['execute_node'],
                                               job['job_info']['pid'],
                                               datetime.utcnow().isoformat())
    sa_id = "hysds:%s" % get_uuid(sa_label)
    doc.softwareAgent(sa_id,
                      str(job['job_info']['pid']),
                      job['job_info']['execute_node'],
                      role=job.get('username', None),
                      label=sa_label,
                      bundle=bndl)

    # create processStep
    doc.processStep(ps_id,
                    job['job_info']['cmd_start'],
                    job['job_info']['cmd_end'], [],
                    sa_id,
                    None, [], [],
                    bundle=bndl,
                    prov_type="hysds:%s" % job['type'])

    # get json
    pd = json.loads(doc.serialize())

    # update software agent and process step
    if 'bundle' in prov_es_info:
        if len(prov_es_info['bundle']) == 1:
            bundle_id_orig = list(prov_es_info['bundle'].keys())[0]

            # update software agent
            prov_es_info['bundle'][bundle_id_orig].setdefault(
                'agent', {}).update(pd['bundle'][bundle_id]['agent'])

            # update wasAssociatedWith
            prov_es_info['bundle'][bundle_id_orig].setdefault(
                'wasAssociatedWith',
                {}).update(pd['bundle'][bundle_id]['wasAssociatedWith'])

            # update activity
            if 'activity' in prov_es_info['bundle'][bundle_id_orig]:
                if len(prov_es_info['bundle'][bundle_id_orig]
                       ['activity']) == 1:
                    ps_id_orig = list(prov_es_info['bundle'][bundle_id_orig]
                                      ['activity'].keys())[0]
                    prov_es_info['bundle'][bundle_id_orig]['activity'][
                        ps_id_orig]['prov:startTime'] = pd['bundle'][
                            bundle_id]['activity'][ps_id]['prov:startTime']
                    prov_es_info['bundle'][bundle_id_orig]['activity'][
                        ps_id_orig]['prov:endTime'] = pd['bundle'][bundle_id][
                            'activity'][ps_id]['prov:endTime']
                    prov_es_info['bundle'][bundle_id_orig]['activity'][
                        ps_id_orig]['hysds:job_id'] = job['job_id']
                    prov_es_info['bundle'][bundle_id_orig]['activity'][
                        ps_id_orig]['hysds:job_type'] = job['type']
                    prov_es_info['bundle'][bundle_id_orig]['activity'][
                        ps_id_orig]['hysds:job_url'] = job['job_info'][
                            'job_url']
                    prov_es_info['bundle'][bundle_id_orig]['activity'][
                        ps_id_orig]['hysds:mozart_url'] = app.conf.MOZART_URL
                    if 'prov:type' not in prov_es_info['bundle'][
                            bundle_id_orig]['activity'][ps_id_orig]:
                        prov_es_info['bundle'][bundle_id_orig]['activity'][
                            ps_id_orig]['prov:type'] = pd['bundle'][bundle_id][
                                'activity'][ps_id]['prov:type']

                    # update wasAssociatedWith activity ids
                    for waw_id in prov_es_info['bundle'][bundle_id_orig][
                            'wasAssociatedWith']:
                        if prov_es_info['bundle'][bundle_id_orig][
                                'wasAssociatedWith'][waw_id][
                                    'prov:activity'] == ps_id:
                            prov_es_info['bundle'][bundle_id_orig][
                                'wasAssociatedWith'][waw_id][
                                    'prov:activity'] = ps_id_orig
                else:
                    prov_es_info['bundle'][bundle_id_orig]['activity'].update(
                        pd['bundle'][bundle_id]['activity'])
            else:
                prov_es_info['bundle'][bundle_id_orig]['activity'] = pd[
                    'bundle'][bundle_id]['activity']
    else:
        # update software agent
        prov_es_info.setdefault('agent', {}).update(pd['agent'])

        # update wasAssociatedWith
        prov_es_info.setdefault('wasAssociatedWith',
                                {}).update(pd['wasAssociatedWith'])

        # update process step
        if 'activity' in prov_es_info:
            if len(prov_es_info['activity']) == 1:
                ps_id_orig = list(prov_es_info['activity'].keys())[0]
                prov_es_info['activity'][ps_id_orig]['prov:startTime'] = pd[
                    'activity'][ps_id]['prov:startTime']
                prov_es_info['activity'][ps_id_orig]['prov:endTime'] = pd[
                    'activity'][ps_id]['prov:endTime']
                prov_es_info['activity'][ps_id_orig]['hysds:job_id'] = job[
                    'job_id']
                prov_es_info['activity'][ps_id_orig]['hysds:job_type'] = job[
                    'type']
                prov_es_info['activity'][ps_id_orig]['hysds:job_url'] = job[
                    'job_info']['job_url']
                prov_es_info['activity'][ps_id_orig][
                    'hysds:mozart_url'] = app.conf.MOZART_URL
                if 'prov:type' not in prov_es_info['activity'][ps_id_orig]:
                    prov_es_info['activity'][ps_id_orig]['prov:type'] = pd[
                        'activity'][ps_id]['prov:type']

                # update wasAssociatedWith activity ids
                for waw_id in prov_es_info['wasAssociatedWith']:
                    if prov_es_info['wasAssociatedWith'][waw_id][
                            'prov:activity'] == ps_id:
                        prov_es_info['wasAssociatedWith'][waw_id][
                            'prov:activity'] = ps_id_orig
            else:
                prov_es_info['activity'].update(pd['activity'])
        else:
            prov_es_info['activity'] = pd['activity']

    # write prov
    with open(prov_es_file, 'w') as f:
        json.dump(prov_es_info, f, indent=2)
コード例 #3
0
ファイル: test_doc.py プロジェクト: pymonger/prov_es
def test_ProvEsDocument():
    """Test dataset()."""

    # create doc
    doc = ProvEsDocument()

    # input dataset
    id = "hysds:INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629"
    doi = "10.5067/ARIAMH/INSAR/Scene"
    downloadURL = "https://dav.domain.com/repository/products/insar/v0.2/2014/09/22/INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629/INSAR20140922_913686_3720875"
    instrument = "eos:INSAR2-SAR"
    level = "L0"
    doc.dataset(id, doi, [downloadURL], [instrument], None, level)

    # input DEM
    dem_id = "hysds:srtm/version2_1/SRTM1/Region_01/N31W114"
    dem_doi = None
    dem_downloadURL = "https://dav.domain.com/repository/products/srtm/version2_1/SRTM1/Region_01/N31W114.hgt.zip"
    dem_level = "L0"
    doc.dataset(dem_id, dem_doi, [dem_downloadURL], [], None, dem_level)

    # platform
    platform = "eos:INSAR2"
    doc.platform(platform, [instrument])

    # second instrument/platform from same org
    instrument2 = "eos:INSAR4-SAR"
    platform2 = "eos:INSAR4"
    doc.platform(platform2, [instrument2])

    # instrument
    sensor = "eos:SAR"
    gov_org = "eos:ASI"
    doc.instrument(instrument, platform, [sensor], [gov_org])
    doc.sensor(sensor, instrument)
    doc.instrument(instrument2, platform2, [sensor], [gov_org])
    doc.sensor(sensor, instrument2)

    # software
    software = "eos:ISCE"
    algorithm = "eos:interferogram_creation"
    doc.software(software, [algorithm])

    # document
    atbd_id = "eos:interferogram_creation_atbd"
    atbd_doi = "10.5067/SOME/FAKE/ATBD_DOI"
    atbd_url = "http://aria.domain.com/docs/ATBD.pdf"
    doc.document(atbd_id, atbd_doi, [atbd_url])

    # algorithm
    doc.algorithm(algorithm, [software], [atbd_id])

    # output dataset
    out_id = "hysds:interferogram__T22_F314-330_INSAR1_20130828-INSAR1_20130609"
    out_doi = "10.5067/ARIAMH/INSAR/Interferogram"
    out_accessURL = 'https://aria-search.domain.com/?source={"query":{"bool":{"must":[{"term":{"dataset":"interferogram"}},{"query_string":{"query":""interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906"","default_operator":"OR"}}]}},"sort":[{"_timestamp":{"order":"desc"}}],"fields":["_timestamp","_source"]}'
    out_downloadURL = "https://dav.domain.com/repository/products/interferograms/v0.2/2014/09/06/interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906/2014-09-22T224943.621648"
    out_level = "L1"
    doc.dataset(out_id, out_doi, [out_downloadURL], [instrument], None, out_level)

    # software agent
    sa_id = "hysds:ariamh-worker-32.domain.com/12353"
    pid = "12353"
    worker_node = "ariamh-worker-32.domain.com"
    doc.softwareAgent(sa_id, pid, worker_node)

    # runtime context
    rt_ctx_id = "hysds:runtime_context"
    doc.runtimeContext(rt_ctx_id, [downloadURL])

    # process step
    proc_id = "hysds:create_interferogram-INSAR20130625_673969_2940232"
    start_time = datetime.utcnow()
    end_time = start_time + timedelta(seconds=12233)
    ps = doc.processStep(
        proc_id,
        start_time.isoformat() + "Z",
        end_time.isoformat() + "Z",
        [software],
        sa_id,
        rt_ctx_id,
        [id, dem_id],
        [out_id],
        wasAssociatedWithRole="softwareAgent",
    )

    print doc.serialize(indent=2)
コード例 #4
0
ファイル: log_utils.py プロジェクト: hysds/hysds
def log_prov_es(job, prov_es_info, prov_es_file):
    """Log PROV-ES document. Create temp PROV-ES document to populate
    attributes that only the worker has access to (e.g. PID)."""

    # create PROV-ES doc to generate attributes that only verdi know
    ps_id = "hysds:%s" % get_uuid(job["job_id"])
    bundle_id = "hysds:%s" % get_uuid("bundle-%s" % job["job_id"])
    doc = ProvEsDocument()

    # get bundle
    # bndl = doc.bundle(bundle_id)
    bndl = None

    # create sofware agent
    sa_label = "hysds:pge_wrapper/%s/%d/%s" % (
        job["job_info"]["execute_node"],
        job["job_info"]["pid"],
        datetime.utcnow().isoformat(),
    )
    sa_id = "hysds:%s" % get_uuid(sa_label)
    doc.softwareAgent(
        sa_id,
        str(job["job_info"]["pid"]),
        job["job_info"]["execute_node"],
        role=job.get("username", None),
        label=sa_label,
        bundle=bndl,
    )

    # create processStep
    doc.processStep(
        ps_id,
        job["job_info"]["cmd_start"],
        job["job_info"]["cmd_end"],
        [],
        sa_id,
        None,
        [],
        [],
        bundle=bndl,
        prov_type="hysds:%s" % job["type"],
    )

    # get json
    pd = json.loads(doc.serialize())

    # update software agent and process step
    if "bundle" in prov_es_info:
        if len(prov_es_info["bundle"]) == 1:
            bundle_id_orig = list(prov_es_info["bundle"].keys())[0]

            # update software agent
            prov_es_info["bundle"][bundle_id_orig].setdefault(
                "agent", {}).update(pd["bundle"][bundle_id]["agent"])

            # update wasAssociatedWith
            prov_es_info["bundle"][bundle_id_orig].setdefault(
                "wasAssociatedWith",
                {}).update(pd["bundle"][bundle_id]["wasAssociatedWith"])

            # update activity
            if "activity" in prov_es_info["bundle"][bundle_id_orig]:
                if len(prov_es_info["bundle"][bundle_id_orig]
                       ["activity"]) == 1:
                    ps_id_orig = list(prov_es_info["bundle"][bundle_id_orig]
                                      ["activity"].keys())[0]
                    prov_es_info["bundle"][bundle_id_orig]["activity"][
                        ps_id_orig]["prov:startTime"] = pd["bundle"][
                            bundle_id]["activity"][ps_id]["prov:startTime"]
                    prov_es_info["bundle"][bundle_id_orig]["activity"][
                        ps_id_orig]["prov:endTime"] = pd["bundle"][bundle_id][
                            "activity"][ps_id]["prov:endTime"]
                    prov_es_info["bundle"][bundle_id_orig]["activity"][
                        ps_id_orig]["hysds:job_id"] = job["job_id"]
                    prov_es_info["bundle"][bundle_id_orig]["activity"][
                        ps_id_orig]["hysds:job_type"] = job["type"]
                    prov_es_info["bundle"][bundle_id_orig]["activity"][
                        ps_id_orig]["hysds:job_url"] = job["job_info"][
                            "job_url"]
                    prov_es_info["bundle"][bundle_id_orig]["activity"][
                        ps_id_orig]["hysds:mozart_url"] = app.conf.MOZART_URL
                    if ("prov:type" not in prov_es_info["bundle"]
                        [bundle_id_orig]["activity"][ps_id_orig]):
                        prov_es_info["bundle"][bundle_id_orig]["activity"][
                            ps_id_orig]["prov:type"] = pd["bundle"][bundle_id][
                                "activity"][ps_id]["prov:type"]

                    # update wasAssociatedWith activity ids
                    for waw_id in prov_es_info["bundle"][bundle_id_orig][
                            "wasAssociatedWith"]:
                        if (prov_es_info["bundle"][bundle_id_orig]
                            ["wasAssociatedWith"][waw_id]["prov:activity"] ==
                                ps_id):
                            prov_es_info["bundle"][bundle_id_orig][
                                "wasAssociatedWith"][waw_id][
                                    "prov:activity"] = ps_id_orig
                else:
                    prov_es_info["bundle"][bundle_id_orig]["activity"].update(
                        pd["bundle"][bundle_id]["activity"])
            else:
                prov_es_info["bundle"][bundle_id_orig]["activity"] = pd[
                    "bundle"][bundle_id]["activity"]
    else:
        # update software agent
        prov_es_info.setdefault("agent", {}).update(pd["agent"])

        # update wasAssociatedWith
        prov_es_info.setdefault("wasAssociatedWith",
                                {}).update(pd["wasAssociatedWith"])

        # update process step
        if "activity" in prov_es_info:
            if len(prov_es_info["activity"]) == 1:
                ps_id_orig = list(prov_es_info["activity"].keys())[0]
                prov_es_info["activity"][ps_id_orig]["prov:startTime"] = pd[
                    "activity"][ps_id]["prov:startTime"]
                prov_es_info["activity"][ps_id_orig]["prov:endTime"] = pd[
                    "activity"][ps_id]["prov:endTime"]
                prov_es_info["activity"][ps_id_orig]["hysds:job_id"] = job[
                    "job_id"]
                prov_es_info["activity"][ps_id_orig]["hysds:job_type"] = job[
                    "type"]
                prov_es_info["activity"][ps_id_orig]["hysds:job_url"] = job[
                    "job_info"]["job_url"]
                prov_es_info["activity"][ps_id_orig][
                    "hysds:mozart_url"] = app.conf.MOZART_URL
                if "prov:type" not in prov_es_info["activity"][ps_id_orig]:
                    prov_es_info["activity"][ps_id_orig]["prov:type"] = pd[
                        "activity"][ps_id]["prov:type"]

                # update wasAssociatedWith activity ids
                for waw_id in prov_es_info["wasAssociatedWith"]:
                    if (prov_es_info["wasAssociatedWith"][waw_id]
                        ["prov:activity"] == ps_id):
                        prov_es_info["wasAssociatedWith"][waw_id][
                            "prov:activity"] = ps_id_orig
            else:
                prov_es_info["activity"].update(pd["activity"])
        else:
            prov_es_info["activity"] = pd["activity"]

    # write prov
    with open(prov_es_file, "w") as f:
        json.dump(prov_es_info, f, indent=2)
コード例 #5
0
ファイル: test_doc.py プロジェクト: hysds/prov_es
def test_ProvEsDocument():
    """Test dataset()."""

    # create doc
    doc = ProvEsDocument()

    # input dataset
    id = "hysds:INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629"
    doi = "10.5067/ARIAMH/INSAR/Scene"
    downloadURL = 'https://dav.domain.com/repository/products/insar/v0.2/2014/09/22/INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629/INSAR20140922_913686_3720875'
    instrument = "eos:INSAR2-SAR"
    level = "L0"
    doc.dataset(id, doi, [downloadURL], [instrument], None, level)

    # input DEM
    dem_id = "hysds:srtm/version2_1/SRTM1/Region_01/N31W114"
    dem_doi = None
    dem_downloadURL = 'https://dav.domain.com/repository/products/srtm/version2_1/SRTM1/Region_01/N31W114.hgt.zip'
    dem_level = "L0"
    doc.dataset(dem_id, dem_doi, [dem_downloadURL], [], None, dem_level)

    # platform
    platform = "eos:INSAR2"
    doc.platform(platform, [instrument])

    # second instrument/platform from same org
    instrument2 = "eos:INSAR4-SAR"
    platform2 = "eos:INSAR4"
    doc.platform(platform2, [instrument2])

    # instrument
    sensor = "eos:SAR"
    gov_org = "eos:ASI"
    doc.instrument(instrument, platform, [sensor], [gov_org])
    doc.sensor(sensor, instrument)
    doc.instrument(instrument2, platform2, [sensor], [gov_org])
    doc.sensor(sensor, instrument2)

    # software
    software = "eos:ISCE"
    algorithm = "eos:interferogram_creation"
    doc.software(software, [algorithm])

    # document
    atbd_id = "eos:interferogram_creation_atbd"
    atbd_doi = "10.5067/SOME/FAKE/ATBD_DOI"
    atbd_url = "http://aria.domain.com/docs/ATBD.pdf"
    doc.document(atbd_id, atbd_doi, [atbd_url])

    # algorithm
    doc.algorithm(algorithm, [software], [atbd_id])

    # output dataset
    out_id = "hysds:interferogram__T22_F314-330_INSAR1_20130828-INSAR1_20130609"
    out_doi = "10.5067/ARIAMH/INSAR/Interferogram"
    out_accessURL = 'https://aria-search.domain.com/?source={"query":{"bool":{"must":[{"term":{"dataset":"interferogram"}},{"query_string":{"query":"\"interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906\"","default_operator":"OR"}}]}},"sort":[{"_timestamp":{"order":"desc"}}],"fields":["_timestamp","_source"]}'
    out_downloadURL = 'https://dav.domain.com/repository/products/interferograms/v0.2/2014/09/06/interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906/2014-09-22T224943.621648'
    out_level = "L1"
    doc.dataset(out_id, out_doi, [out_downloadURL], [instrument], None,
                out_level)

    # software agent
    sa_id = "hysds:ariamh-worker-32.domain.com/12353"
    pid = "12353"
    worker_node = "ariamh-worker-32.domain.com"
    doc.softwareAgent(sa_id, pid, worker_node)

    # runtime context
    rt_ctx_id = "hysds:runtime_context"
    doc.runtimeContext(rt_ctx_id, [downloadURL])

    # process step
    proc_id = "hysds:create_interferogram-INSAR20130625_673969_2940232"
    start_time = datetime.utcnow()
    end_time = start_time + timedelta(seconds=12233)
    ps = doc.processStep(proc_id,
                         start_time.isoformat() + 'Z',
                         end_time.isoformat() + 'Z', [software],
                         sa_id,
                         rt_ctx_id, [id, dem_id], [out_id],
                         wasAssociatedWithRole="softwareAgent")

    print(doc.serialize(indent=2))