def create_prov_es_json(id, project, master_orbit_file, slave_orbit_file, aria_dem_xml, aria_dem_file, work_dir, prov_file): """Create provenance JSON file.""" # get abs paths work_dir = os.path.abspath(work_dir) prod_dir = os.path.join(work_dir, id) # get context ctx_file = os.path.join(prod_dir, "%s.context.json" % id) with open(ctx_file) as f: context = json.load(f) # put in fake start/end times so that prov:used and prov:generated # are properly created by the prov lib fake_time = datetime.utcnow().isoformat() + 'Z' job_id = "create_interferogram-%s" % fake_time bundle_id = "bundle-create_interferogram-%s" % fake_time # create PROV-ES doc doc = ProvEsDocument() #bndl = doc.bundle("hysds:%s" % get_uuid(bundle_id)) bndl = None # input and output identifiers input_ids = {} platform_ids = {} instrument_ids = {} # full url paths work_url = "file://%s%s" % (socket.getfqdn(), work_dir) prod_url = "%s/%s" % (work_url, id) # add sentinel.ini file ini_ent = doc.file("hysds:%s" % get_uuid("%s/sentinel.ini" % work_url), ["%s/sentinel.ini" % work_url], label="sentinel.ini") input_ids[ini_ent.identifier] = True # add orbit files master_orbit_ent = doc.file( "hysds:%s" % get_uuid("%s/%s" % (work_url, master_orbit_file)), ["%s/%s" % (work_url, master_orbit_file)], label=os.path.basename(master_orbit_file)) input_ids[master_orbit_ent.identifier] = True slave_orbit_ent = doc.file( "hysds:%s" % get_uuid("%s/%s" % (work_url, slave_orbit_file)), ["%s/%s" % (work_url, slave_orbit_file)], label=os.path.basename(slave_orbit_file)) input_ids[slave_orbit_ent.identifier] = True # get list of S1A urls level = "L0" version = "v1.0" sensor = "eos:SAR" sensor_title = "Synthetic-aperture radar (SAR)" gov_org = "eos:ESA" gov_org_title = "European Space Agency" doc.governingOrganization(gov_org, label=gov_org_title, bundle=bndl) instrument = "" for i, url in enumerate( [context.get('master_zip_url', ''), context.get('slave_zip_url', '')]): match = PLATFORM_RE.search(url) if not match: continue pf = match.group(1) platform = "eos:%s" % pf platform_title = "Sentinel1A Satellite" instrument = "eos:%s-SAR" % pf instrument_title = "%s-SAR" % pf input_ds = doc.product("hysds:%s" % get_uuid(url), None, [url], [instrument], None, level, None, label=os.path.basename(url), bundle=bndl) input_ids[input_ds.identifier] = True if platform not in platform_ids: doc.platform(platform, [instrument], label=platform_title, bundle=bndl) platform_ids[platform] = True if instrument not in instrument_ids: doc.instrument(instrument, platform, [sensor], [gov_org], label=instrument_title, bundle=bndl) doc.sensor(sensor, instrument, label=sensor_title, bundle=bndl) instrument_ids[instrument] = True # add dem xml, file and related provenance srtm_platform = "eos:SpaceShuttleEndeavour" srtm_platform_title = "USS Endeavour" srtm_instrument = "eos:SRTM" srtm_instrument_title = "Shuttle Radar Topography Mission (SRTM)" srtm_sensor = "eos:radar" srtm_sensor_title = "radar" srtm_gov_org = "eos:JPL" srtm_gov_org_title = "Jet Propulsion Laboratory" doc.governingOrganization(srtm_gov_org, label=srtm_gov_org_title, bundle=bndl) dem_xml_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_xml)), ["%s/%s" % (work_url, aria_dem_xml)], label=os.path.basename(aria_dem_xml)) input_ids[dem_xml_ent.identifier] = True dem_file_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_file)), ["%s/%s" % (work_url, aria_dem_file)], label=os.path.basename(aria_dem_file)) input_ids[dem_file_ent.identifier] = True doc.platform(srtm_platform, [srtm_instrument], label=srtm_platform_title, bundle=bndl) doc.instrument(srtm_instrument, srtm_platform, [srtm_sensor], [srtm_gov_org], label=srtm_instrument_title, bundle=bndl) doc.sensor(srtm_sensor, srtm_instrument, label=srtm_sensor_title, bundle=bndl) instrument_ids[srtm_instrument] = True # software and algorithm algorithm = "eos:interferogram_generation" software_version = "2.0.0_201604" software_title = "InSAR SCE (InSAR Scientific Computing Environment) v%s" % software_version software = "eos:ISCE-%s" % software_version software_location = "https://winsar.unavco.org/isce.html" doc.software(software, [algorithm], software_version, label=software_title, location=software_location, bundle=bndl) # output int_level = "L2" int_version = "v1.0" int_collection = "eos:S1A-interferograms-%s" % int_version int_collection_shortname = "S1A-interferograms-%s" % int_version int_collection_label = "ISCE generated S1A interferograms %s" % int_version int_collection_loc = "https://aria-dst-dav.jpl.nasa.gov/products/s1a_ifg/%s" % int_version doc.collection(int_collection, None, int_collection_shortname, int_collection_label, [int_collection_loc], instrument_ids.keys(), int_level, int_version, label=int_collection_label, bundle=bndl) output_ds = doc.granule("hysds:%s" % get_uuid(prod_url), None, [prod_url], instrument_ids.keys(), int_collection, int_level, int_version, label=id, bundle=bndl) # runtime context rt_ctx_id = "hysds:runtimeContext-sentinel_ifg-%s" % project doc.runtimeContext(rt_ctx_id, [project], label=project, bundle=bndl) # create process doc.processStep("hysds:%s" % get_uuid(job_id), fake_time, fake_time, [software], None, rt_ctx_id, input_ids.keys(), [output_ds.identifier], label=job_id, bundle=bndl, prov_type="hysds:create_interferogram") # write with open(prov_file, 'w') as f: json.dump(json.loads(doc.serialize()), f, indent=2, sort_keys=True)
def log_publish_prov_es(prov_es_info, prov_es_file, prod_path, pub_urls, prod_metrics, objectid): """Log publish step in PROV-ES document.""" # create PROV-ES doc doc = ProvEsDocument(namespaces=prov_es_info['prefix']) # get bundle #bndl = doc.bundle(bundle_id) bndl = None # add input entity execute_node = socket.getfqdn() prod_url = "file://%s%s" % (execute_node, prod_path) input_id = "hysds:%s" % get_uuid(prod_url) input_ent = doc.granule(input_id, None, [prod_url], [], None, None, None, label=os.path.basename(prod_url), bundle=bndl) # add output entity output_id = "hysds:%s" % get_uuid(pub_urls[0]) output_ent = doc.product(output_id, None, [pub_urls[0]], [], None, None, None, label=objectid, bundle=bndl) # software and algorithm algorithm = "eos:product_publishing" software_version = hysds.__version__ software_title = "%s v%s" % (hysds.__description__, software_version) software = "eos:HySDS-%s" % software_version software_location = hysds.__url__ doc.software(software, [algorithm], software_version, label=software_title, location=software_location, bundle=bndl) # create sofware agent pid = os.getpid() sa_label = "hysds:publish_dataset/%s/%d/%s" % (execute_node, pid, prod_metrics['time_start']) sa_id = "hysds:%s" % get_uuid(sa_label) doc.softwareAgent(sa_id, str(pid), execute_node, role="invoked", label=sa_label, bundle=bndl) # create processStep job_id = "publish_dataset-%s" % os.path.basename(prod_path) doc.processStep("hysds:%s" % get_uuid(job_id), prod_metrics['time_start'], prod_metrics['time_end'], [software], sa_id, None, [input_id], [output_id], label=job_id, bundle=bndl, prov_type="hysds:publish_dataset") # get json pd = json.loads(doc.serialize()) # update input entity orig_ent = prov_es_info.get('entity', {}).get(input_id, {}) pd['entity'][input_id].update(orig_ent) # update output entity for attr in orig_ent: if attr in ('prov:location', 'prov:label', 'prov:type'): continue pd['entity'][output_id][attr] = orig_ent[attr] # write prov with open(prov_es_file, 'w') as f: json.dump(pd, f, indent=2)
def create_prov_es_json(id, netsel_file, jobdesc_file, project, aria_dem_xml, aria_dem_file, prod_dir, work_dir, prov_file): """Create provenance JSON file.""" # put in fake start/end times so that prov:used and prov:generated # are properly created by the prov lib fake_time = datetime.utcnow().isoformat() + 'Z' job_id = "create_interferogram-%s" % fake_time bundle_id = "bundle-create_interferogram-%s" % fake_time # create PROV-ES doc doc = ProvEsDocument() #bndl = doc.bundle("hysds:%s" % get_uuid(bundle_id)) bndl = None # input and output identifiers input_ids = {} platform_ids = {} instrument_ids = {} # full url paths work_url = "file://%s%s" % (socket.getfqdn(), work_dir) prod_url = "%s/%s" % (work_url, prod_dir) # add network selector file #netsel_ent = bndl.entity("hysds:%s" % get_uuid("%s/%s" % (work_url, netsel_file)), netsel_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, netsel_file)), ["%s/%s" % (work_url, netsel_file)], label=os.path.basename(netsel_file)) input_ids[netsel_ent.identifier] = True # add job description file #jobdesc_ent = bndl.entity("hysds:%s" % get_uuid("%s/%s" % (work_url, jobdesc_file)), jobdesc_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, jobdesc_file)), ["%s/%s" % (work_url, jobdesc_file)], label=os.path.basename(jobdesc_file)) input_ids[jobdesc_ent.identifier] = True # get list of CSK urls level = "L0" version = "v1.0" sensor = "eos:SAR" sensor_title = "Synthetic-aperture radar (SAR)" gov_org = "eos:ASI" gov_org_title = "Agenzia Spaziale Italiana" doc.governingOrganization(gov_org, label=gov_org_title, bundle=bndl) instrument = "" for i, url in enumerate(get_netsel_urls(netsel_file)): match = PLATFORM_RE.search(url) if not match: continue pf = match.group(1) platform = "eos:%s" % pf platform_title = "COSMO-SkyMed Satellite %s" % pf[-1] instrument = "eos:%s-SAR" % pf instrument_title = "%s-SAR" % pf input_ds = doc.product("hysds:%s" % get_uuid(url), None, [url], [instrument], None, level, version, label=os.path.basename(url), bundle=bndl) input_ids[input_ds.identifier] = True if platform not in platform_ids: doc.platform(platform, [instrument], label=platform_title, bundle=bndl) platform_ids[platform] = True if instrument not in instrument_ids: doc.instrument(instrument, platform, [sensor], [gov_org], label=instrument_title, bundle=bndl) doc.sensor(sensor, instrument, label=sensor_title, bundle=bndl) instrument_ids[instrument] = True # add dem xml, file and related provenance srtm_platform = "eos:SpaceShuttleEndeavour" srtm_platform_title = "USS Endeavour" srtm_instrument = "eos:SRTM" srtm_instrument_title = "Shuttle Radar Topography Mission (SRTM)" srtm_sensor = "eos:radar" srtm_sensor_title = "radar" srtm_gov_org = "eos:JPL" srtm_gov_org_title = "Jet Propulsion Laboratory" doc.governingOrganization(srtm_gov_org, label=srtm_gov_org_title, bundle=bndl) #dem_xml_ent = bndl.entity("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_xml)), dem_xml_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_xml)), ["%s/%s" % (work_url, aria_dem_xml)], label=os.path.basename(aria_dem_xml)) input_ids[dem_xml_ent.identifier] = True #dem_file_ent = bndl.entity("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_file)), dem_file_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_file)), ["%s/%s" % (work_url, aria_dem_file)], label=os.path.basename(aria_dem_file)) input_ids[dem_file_ent.identifier] = True doc.platform(srtm_platform, [srtm_instrument], label=srtm_platform_title, bundle=bndl) doc.instrument(srtm_instrument, srtm_platform, [srtm_sensor], [srtm_gov_org], label=srtm_instrument_title, bundle=bndl) doc.sensor(srtm_sensor, srtm_instrument, label=srtm_sensor_title, bundle=bndl) instrument_ids[srtm_instrument] = True # software and algorithm algorithm = "eos:interferogram_generation" software_version = "2.0.0_201604" software_title = "InSAR SCE (InSAR Scientific Computing Environment) v%s" % software_version software = "eos:ISCE-%s" % software_version software_location = "https://winsar.unavco.org/isce.html" doc.software(software, [algorithm], software_version, label=software_title, location=software_location, bundle=bndl) # output int_level = "L2" int_version = "v1.0" int_collection = "eos:CSK-interferograms-%s" % int_version int_collection_shortname = "CSK-interferograms-%s" % int_version int_collection_label = "ISCE generated CSK interferograms %s" % int_version int_collection_loc = "https://aria-dav.jpl.nasa.gov/repository/products/interferogram/%s" % int_version doc.collection(int_collection, None, int_collection_shortname, int_collection_label, [int_collection_loc], instrument_ids.keys(), int_level, int_version, label=int_collection_label, bundle=bndl) output_ds = doc.granule("hysds:%s" % get_uuid(prod_url), None, [prod_url], instrument_ids.keys(), int_collection, int_level, int_version, label=id, bundle=bndl) # runtime context rt_ctx_id = "hysds:runtimeContext-ariamh-%s" % project doc.runtimeContext(rt_ctx_id, [project], label=project, bundle=bndl) # create process doc.processStep("hysds:%s" % get_uuid(job_id), fake_time, fake_time, [software], None, rt_ctx_id, input_ids.keys(), [output_ds.identifier], label=job_id, bundle=bndl, prov_type="hysds:create_interferogram") # write with open(prov_file, 'w') as f: json.dump(json.loads(doc.serialize()), f, indent=2, sort_keys=True)