def log_publish_prov_es(prov_es_info, prov_es_file, prod_path, pub_urls, prod_metrics, objectid): """Log publish step in PROV-ES document.""" # create PROV-ES doc doc = ProvEsDocument(namespaces=prov_es_info['prefix']) # get bundle #bndl = doc.bundle(bundle_id) bndl = None # add input entity execute_node = socket.getfqdn() prod_url = "file://%s%s" % (execute_node, prod_path) input_id = "hysds:%s" % get_uuid(prod_url) input_ent = doc.granule(input_id, None, [prod_url], [], None, None, None, label=os.path.basename(prod_url), bundle=bndl) # add output entity output_id = "hysds:%s" % get_uuid(pub_urls[0]) output_ent = doc.product(output_id, None, [pub_urls[0]], [], None, None, None, label=objectid, bundle=bndl) # software and algorithm algorithm = "eos:product_publishing" software_version = hysds.__version__ software_title = "%s v%s" % (hysds.__description__, software_version) software = "eos:HySDS-%s" % software_version software_location = hysds.__url__ doc.software(software, [algorithm], software_version, label=software_title, location=software_location, bundle=bndl) # create sofware agent pid = os.getpid() sa_label = "hysds:publish_dataset/%s/%d/%s" % (execute_node, pid, prod_metrics['time_start']) sa_id = "hysds:%s" % get_uuid(sa_label) doc.softwareAgent(sa_id, str(pid), execute_node, role="invoked", label=sa_label, bundle=bndl) # create processStep job_id = "publish_dataset-%s" % os.path.basename(prod_path) doc.processStep("hysds:%s" % get_uuid(job_id), prod_metrics['time_start'], prod_metrics['time_end'], [software], sa_id, None, [input_id], [output_id], label=job_id, bundle=bndl, prov_type="hysds:publish_dataset") # get json pd = json.loads(doc.serialize()) # update input entity orig_ent = prov_es_info.get('entity', {}).get(input_id, {}) pd['entity'][input_id].update(orig_ent) # update output entity for attr in orig_ent: if attr in ('prov:location', 'prov:label', 'prov:type'): continue pd['entity'][output_id][attr] = orig_ent[attr] # write prov with open(prov_es_file, 'w') as f: json.dump(pd, f, indent=2)
def log_prov_es(job, prov_es_info, prov_es_file): """Log PROV-ES document. Create temp PROV-ES document to populate attributes that only the worker has access to (e.g. PID).""" # create PROV-ES doc to generate attributes that only verdi know ps_id = "hysds:%s" % get_uuid(job['job_id']) bundle_id = "hysds:%s" % get_uuid('bundle-%s' % job['job_id']) doc = ProvEsDocument() # get bundle #bndl = doc.bundle(bundle_id) bndl = None # create sofware agent sa_label = "hysds:pge_wrapper/%s/%d/%s" % (job['job_info']['execute_node'], job['job_info']['pid'], datetime.utcnow().isoformat()) sa_id = "hysds:%s" % get_uuid(sa_label) doc.softwareAgent(sa_id, str(job['job_info']['pid']), job['job_info']['execute_node'], role=job.get('username', None), label=sa_label, bundle=bndl) # create processStep doc.processStep(ps_id, job['job_info']['cmd_start'], job['job_info']['cmd_end'], [], sa_id, None, [], [], bundle=bndl, prov_type="hysds:%s" % job['type']) # get json pd = json.loads(doc.serialize()) # update software agent and process step if 'bundle' in prov_es_info: if len(prov_es_info['bundle']) == 1: bundle_id_orig = list(prov_es_info['bundle'].keys())[0] # update software agent prov_es_info['bundle'][bundle_id_orig].setdefault( 'agent', {}).update(pd['bundle'][bundle_id]['agent']) # update wasAssociatedWith prov_es_info['bundle'][bundle_id_orig].setdefault( 'wasAssociatedWith', {}).update(pd['bundle'][bundle_id]['wasAssociatedWith']) # update activity if 'activity' in prov_es_info['bundle'][bundle_id_orig]: if len(prov_es_info['bundle'][bundle_id_orig] ['activity']) == 1: ps_id_orig = list(prov_es_info['bundle'][bundle_id_orig] ['activity'].keys())[0] prov_es_info['bundle'][bundle_id_orig]['activity'][ ps_id_orig]['prov:startTime'] = pd['bundle'][ bundle_id]['activity'][ps_id]['prov:startTime'] prov_es_info['bundle'][bundle_id_orig]['activity'][ ps_id_orig]['prov:endTime'] = pd['bundle'][bundle_id][ 'activity'][ps_id]['prov:endTime'] prov_es_info['bundle'][bundle_id_orig]['activity'][ ps_id_orig]['hysds:job_id'] = job['job_id'] prov_es_info['bundle'][bundle_id_orig]['activity'][ ps_id_orig]['hysds:job_type'] = job['type'] prov_es_info['bundle'][bundle_id_orig]['activity'][ ps_id_orig]['hysds:job_url'] = job['job_info'][ 'job_url'] prov_es_info['bundle'][bundle_id_orig]['activity'][ ps_id_orig]['hysds:mozart_url'] = app.conf.MOZART_URL if 'prov:type' not in prov_es_info['bundle'][ bundle_id_orig]['activity'][ps_id_orig]: prov_es_info['bundle'][bundle_id_orig]['activity'][ ps_id_orig]['prov:type'] = pd['bundle'][bundle_id][ 'activity'][ps_id]['prov:type'] # update wasAssociatedWith activity ids for waw_id in prov_es_info['bundle'][bundle_id_orig][ 'wasAssociatedWith']: if prov_es_info['bundle'][bundle_id_orig][ 'wasAssociatedWith'][waw_id][ 'prov:activity'] == ps_id: prov_es_info['bundle'][bundle_id_orig][ 'wasAssociatedWith'][waw_id][ 'prov:activity'] = ps_id_orig else: prov_es_info['bundle'][bundle_id_orig]['activity'].update( pd['bundle'][bundle_id]['activity']) else: prov_es_info['bundle'][bundle_id_orig]['activity'] = pd[ 'bundle'][bundle_id]['activity'] else: # update software agent prov_es_info.setdefault('agent', {}).update(pd['agent']) # update wasAssociatedWith prov_es_info.setdefault('wasAssociatedWith', {}).update(pd['wasAssociatedWith']) # update process step if 'activity' in prov_es_info: if len(prov_es_info['activity']) == 1: ps_id_orig = list(prov_es_info['activity'].keys())[0] prov_es_info['activity'][ps_id_orig]['prov:startTime'] = pd[ 'activity'][ps_id]['prov:startTime'] prov_es_info['activity'][ps_id_orig]['prov:endTime'] = pd[ 'activity'][ps_id]['prov:endTime'] prov_es_info['activity'][ps_id_orig]['hysds:job_id'] = job[ 'job_id'] prov_es_info['activity'][ps_id_orig]['hysds:job_type'] = job[ 'type'] prov_es_info['activity'][ps_id_orig]['hysds:job_url'] = job[ 'job_info']['job_url'] prov_es_info['activity'][ps_id_orig][ 'hysds:mozart_url'] = app.conf.MOZART_URL if 'prov:type' not in prov_es_info['activity'][ps_id_orig]: prov_es_info['activity'][ps_id_orig]['prov:type'] = pd[ 'activity'][ps_id]['prov:type'] # update wasAssociatedWith activity ids for waw_id in prov_es_info['wasAssociatedWith']: if prov_es_info['wasAssociatedWith'][waw_id][ 'prov:activity'] == ps_id: prov_es_info['wasAssociatedWith'][waw_id][ 'prov:activity'] = ps_id_orig else: prov_es_info['activity'].update(pd['activity']) else: prov_es_info['activity'] = pd['activity'] # write prov with open(prov_es_file, 'w') as f: json.dump(prov_es_info, f, indent=2)
def test_ProvEsDocument(): """Test dataset().""" # create doc doc = ProvEsDocument() # input dataset id = "hysds:INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629" doi = "10.5067/ARIAMH/INSAR/Scene" downloadURL = "https://dav.domain.com/repository/products/insar/v0.2/2014/09/22/INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629/INSAR20140922_913686_3720875" instrument = "eos:INSAR2-SAR" level = "L0" doc.dataset(id, doi, [downloadURL], [instrument], None, level) # input DEM dem_id = "hysds:srtm/version2_1/SRTM1/Region_01/N31W114" dem_doi = None dem_downloadURL = "https://dav.domain.com/repository/products/srtm/version2_1/SRTM1/Region_01/N31W114.hgt.zip" dem_level = "L0" doc.dataset(dem_id, dem_doi, [dem_downloadURL], [], None, dem_level) # platform platform = "eos:INSAR2" doc.platform(platform, [instrument]) # second instrument/platform from same org instrument2 = "eos:INSAR4-SAR" platform2 = "eos:INSAR4" doc.platform(platform2, [instrument2]) # instrument sensor = "eos:SAR" gov_org = "eos:ASI" doc.instrument(instrument, platform, [sensor], [gov_org]) doc.sensor(sensor, instrument) doc.instrument(instrument2, platform2, [sensor], [gov_org]) doc.sensor(sensor, instrument2) # software software = "eos:ISCE" algorithm = "eos:interferogram_creation" doc.software(software, [algorithm]) # document atbd_id = "eos:interferogram_creation_atbd" atbd_doi = "10.5067/SOME/FAKE/ATBD_DOI" atbd_url = "http://aria.domain.com/docs/ATBD.pdf" doc.document(atbd_id, atbd_doi, [atbd_url]) # algorithm doc.algorithm(algorithm, [software], [atbd_id]) # output dataset out_id = "hysds:interferogram__T22_F314-330_INSAR1_20130828-INSAR1_20130609" out_doi = "10.5067/ARIAMH/INSAR/Interferogram" out_accessURL = 'https://aria-search.domain.com/?source={"query":{"bool":{"must":[{"term":{"dataset":"interferogram"}},{"query_string":{"query":""interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906"","default_operator":"OR"}}]}},"sort":[{"_timestamp":{"order":"desc"}}],"fields":["_timestamp","_source"]}' out_downloadURL = "https://dav.domain.com/repository/products/interferograms/v0.2/2014/09/06/interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906/2014-09-22T224943.621648" out_level = "L1" doc.dataset(out_id, out_doi, [out_downloadURL], [instrument], None, out_level) # software agent sa_id = "hysds:ariamh-worker-32.domain.com/12353" pid = "12353" worker_node = "ariamh-worker-32.domain.com" doc.softwareAgent(sa_id, pid, worker_node) # runtime context rt_ctx_id = "hysds:runtime_context" doc.runtimeContext(rt_ctx_id, [downloadURL]) # process step proc_id = "hysds:create_interferogram-INSAR20130625_673969_2940232" start_time = datetime.utcnow() end_time = start_time + timedelta(seconds=12233) ps = doc.processStep( proc_id, start_time.isoformat() + "Z", end_time.isoformat() + "Z", [software], sa_id, rt_ctx_id, [id, dem_id], [out_id], wasAssociatedWithRole="softwareAgent", ) print doc.serialize(indent=2)
def log_prov_es(job, prov_es_info, prov_es_file): """Log PROV-ES document. Create temp PROV-ES document to populate attributes that only the worker has access to (e.g. PID).""" # create PROV-ES doc to generate attributes that only verdi know ps_id = "hysds:%s" % get_uuid(job["job_id"]) bundle_id = "hysds:%s" % get_uuid("bundle-%s" % job["job_id"]) doc = ProvEsDocument() # get bundle # bndl = doc.bundle(bundle_id) bndl = None # create sofware agent sa_label = "hysds:pge_wrapper/%s/%d/%s" % ( job["job_info"]["execute_node"], job["job_info"]["pid"], datetime.utcnow().isoformat(), ) sa_id = "hysds:%s" % get_uuid(sa_label) doc.softwareAgent( sa_id, str(job["job_info"]["pid"]), job["job_info"]["execute_node"], role=job.get("username", None), label=sa_label, bundle=bndl, ) # create processStep doc.processStep( ps_id, job["job_info"]["cmd_start"], job["job_info"]["cmd_end"], [], sa_id, None, [], [], bundle=bndl, prov_type="hysds:%s" % job["type"], ) # get json pd = json.loads(doc.serialize()) # update software agent and process step if "bundle" in prov_es_info: if len(prov_es_info["bundle"]) == 1: bundle_id_orig = list(prov_es_info["bundle"].keys())[0] # update software agent prov_es_info["bundle"][bundle_id_orig].setdefault( "agent", {}).update(pd["bundle"][bundle_id]["agent"]) # update wasAssociatedWith prov_es_info["bundle"][bundle_id_orig].setdefault( "wasAssociatedWith", {}).update(pd["bundle"][bundle_id]["wasAssociatedWith"]) # update activity if "activity" in prov_es_info["bundle"][bundle_id_orig]: if len(prov_es_info["bundle"][bundle_id_orig] ["activity"]) == 1: ps_id_orig = list(prov_es_info["bundle"][bundle_id_orig] ["activity"].keys())[0] prov_es_info["bundle"][bundle_id_orig]["activity"][ ps_id_orig]["prov:startTime"] = pd["bundle"][ bundle_id]["activity"][ps_id]["prov:startTime"] prov_es_info["bundle"][bundle_id_orig]["activity"][ ps_id_orig]["prov:endTime"] = pd["bundle"][bundle_id][ "activity"][ps_id]["prov:endTime"] prov_es_info["bundle"][bundle_id_orig]["activity"][ ps_id_orig]["hysds:job_id"] = job["job_id"] prov_es_info["bundle"][bundle_id_orig]["activity"][ ps_id_orig]["hysds:job_type"] = job["type"] prov_es_info["bundle"][bundle_id_orig]["activity"][ ps_id_orig]["hysds:job_url"] = job["job_info"][ "job_url"] prov_es_info["bundle"][bundle_id_orig]["activity"][ ps_id_orig]["hysds:mozart_url"] = app.conf.MOZART_URL if ("prov:type" not in prov_es_info["bundle"] [bundle_id_orig]["activity"][ps_id_orig]): prov_es_info["bundle"][bundle_id_orig]["activity"][ ps_id_orig]["prov:type"] = pd["bundle"][bundle_id][ "activity"][ps_id]["prov:type"] # update wasAssociatedWith activity ids for waw_id in prov_es_info["bundle"][bundle_id_orig][ "wasAssociatedWith"]: if (prov_es_info["bundle"][bundle_id_orig] ["wasAssociatedWith"][waw_id]["prov:activity"] == ps_id): prov_es_info["bundle"][bundle_id_orig][ "wasAssociatedWith"][waw_id][ "prov:activity"] = ps_id_orig else: prov_es_info["bundle"][bundle_id_orig]["activity"].update( pd["bundle"][bundle_id]["activity"]) else: prov_es_info["bundle"][bundle_id_orig]["activity"] = pd[ "bundle"][bundle_id]["activity"] else: # update software agent prov_es_info.setdefault("agent", {}).update(pd["agent"]) # update wasAssociatedWith prov_es_info.setdefault("wasAssociatedWith", {}).update(pd["wasAssociatedWith"]) # update process step if "activity" in prov_es_info: if len(prov_es_info["activity"]) == 1: ps_id_orig = list(prov_es_info["activity"].keys())[0] prov_es_info["activity"][ps_id_orig]["prov:startTime"] = pd[ "activity"][ps_id]["prov:startTime"] prov_es_info["activity"][ps_id_orig]["prov:endTime"] = pd[ "activity"][ps_id]["prov:endTime"] prov_es_info["activity"][ps_id_orig]["hysds:job_id"] = job[ "job_id"] prov_es_info["activity"][ps_id_orig]["hysds:job_type"] = job[ "type"] prov_es_info["activity"][ps_id_orig]["hysds:job_url"] = job[ "job_info"]["job_url"] prov_es_info["activity"][ps_id_orig][ "hysds:mozart_url"] = app.conf.MOZART_URL if "prov:type" not in prov_es_info["activity"][ps_id_orig]: prov_es_info["activity"][ps_id_orig]["prov:type"] = pd[ "activity"][ps_id]["prov:type"] # update wasAssociatedWith activity ids for waw_id in prov_es_info["wasAssociatedWith"]: if (prov_es_info["wasAssociatedWith"][waw_id] ["prov:activity"] == ps_id): prov_es_info["wasAssociatedWith"][waw_id][ "prov:activity"] = ps_id_orig else: prov_es_info["activity"].update(pd["activity"]) else: prov_es_info["activity"] = pd["activity"] # write prov with open(prov_es_file, "w") as f: json.dump(prov_es_info, f, indent=2)
def test_ProvEsDocument(): """Test dataset().""" # create doc doc = ProvEsDocument() # input dataset id = "hysds:INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629" doi = "10.5067/ARIAMH/INSAR/Scene" downloadURL = 'https://dav.domain.com/repository/products/insar/v0.2/2014/09/22/INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629/INSAR20140922_913686_3720875' instrument = "eos:INSAR2-SAR" level = "L0" doc.dataset(id, doi, [downloadURL], [instrument], None, level) # input DEM dem_id = "hysds:srtm/version2_1/SRTM1/Region_01/N31W114" dem_doi = None dem_downloadURL = 'https://dav.domain.com/repository/products/srtm/version2_1/SRTM1/Region_01/N31W114.hgt.zip' dem_level = "L0" doc.dataset(dem_id, dem_doi, [dem_downloadURL], [], None, dem_level) # platform platform = "eos:INSAR2" doc.platform(platform, [instrument]) # second instrument/platform from same org instrument2 = "eos:INSAR4-SAR" platform2 = "eos:INSAR4" doc.platform(platform2, [instrument2]) # instrument sensor = "eos:SAR" gov_org = "eos:ASI" doc.instrument(instrument, platform, [sensor], [gov_org]) doc.sensor(sensor, instrument) doc.instrument(instrument2, platform2, [sensor], [gov_org]) doc.sensor(sensor, instrument2) # software software = "eos:ISCE" algorithm = "eos:interferogram_creation" doc.software(software, [algorithm]) # document atbd_id = "eos:interferogram_creation_atbd" atbd_doi = "10.5067/SOME/FAKE/ATBD_DOI" atbd_url = "http://aria.domain.com/docs/ATBD.pdf" doc.document(atbd_id, atbd_doi, [atbd_url]) # algorithm doc.algorithm(algorithm, [software], [atbd_id]) # output dataset out_id = "hysds:interferogram__T22_F314-330_INSAR1_20130828-INSAR1_20130609" out_doi = "10.5067/ARIAMH/INSAR/Interferogram" out_accessURL = 'https://aria-search.domain.com/?source={"query":{"bool":{"must":[{"term":{"dataset":"interferogram"}},{"query_string":{"query":"\"interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906\"","default_operator":"OR"}}]}},"sort":[{"_timestamp":{"order":"desc"}}],"fields":["_timestamp","_source"]}' out_downloadURL = 'https://dav.domain.com/repository/products/interferograms/v0.2/2014/09/06/interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906/2014-09-22T224943.621648' out_level = "L1" doc.dataset(out_id, out_doi, [out_downloadURL], [instrument], None, out_level) # software agent sa_id = "hysds:ariamh-worker-32.domain.com/12353" pid = "12353" worker_node = "ariamh-worker-32.domain.com" doc.softwareAgent(sa_id, pid, worker_node) # runtime context rt_ctx_id = "hysds:runtime_context" doc.runtimeContext(rt_ctx_id, [downloadURL]) # process step proc_id = "hysds:create_interferogram-INSAR20130625_673969_2940232" start_time = datetime.utcnow() end_time = start_time + timedelta(seconds=12233) ps = doc.processStep(proc_id, start_time.isoformat() + 'Z', end_time.isoformat() + 'Z', [software], sa_id, rt_ctx_id, [id, dem_id], [out_id], wasAssociatedWithRole="softwareAgent") print(doc.serialize(indent=2))