def test_dot(self): # This is naive, since we can't programatically check the output is # correct document = ProvDocument() bundle1 = ProvBundle(identifier=EX_NS['bundle1']) bundle1.usage( activity=EX_NS['a1'], entity=EX_NS['e1'], identifier=EX_NS['use1'] ) bundle1.entity( identifier=EX_NS['e1'], other_attributes={PROV_ROLE: "sausage"} ) bundle1.activity(identifier=EX_NS['a1']) document.activity(EX_NS['a2']) bundle2 = ProvBundle(identifier=EX_NS['bundle2']) bundle2.usage( activity=EX_NS['aa1'], entity=EX_NS['ee1'], identifier=EX_NS['use2'] ) bundle2.entity(identifier=EX_NS['ee1']) bundle2.activity(identifier=EX_NS['aa1']) document.add_bundle(bundle1) document.add_bundle(bundle2) prov_to_dot(document)
def persist_document(document, name, format, extension, hide_elem_attr, hide_rel_attr, dir): print_msg(" Persisting collected provenance to local storage") filename = "{}{}".format(name, extension) serializers = ["json", "rdf", "provn", "turtle", "rdfxml", "trig", "xml"] rdf_serializers = {"turtle": "turtle", "rdfxml": "xml", "trig": "trig"} writers = ["dot", "jpeg", "png", "svg", "pdf"] if format in serializers: print_msg(" Employing serializer to export to {}".format(format)) with open(filename, 'w') as file: if format in rdf_serializers: document.serialize(destination=file, format="rdf", rdf_format=rdf_serializers[format]) else: document.serialize(destination=file, format=format) elif format in writers: print_msg(" Employing dot writer to export to {}".format(format)) provo_dot.prov_to_dot( document, show_element_attributes=not hide_elem_attr, direction=dir, show_relation_attributes=not hide_rel_attr).write(filename, format=format) else: print_msg( " Could not find suitable exporting module for {{name=\"{}\", format=\"{}\", extension=\"{}\"}}. " "Try different input parameters.".format(name, format, extension)) sys.exit(1) print_msg("Export to file \"{}\" done.".format(filename), force=True)
def main(auth_json_path, full_provenance=False): with open(auth_json_path, 'r') as f: auth_json = json.load(f) api_token = auth_json['services']['cityofbostondataportal']['token'] username = '******'#auth_json['services']['cityofbostondataportal']['username'] mongo_pass = '******' #auth_json['services']['cityofbostondataportal']['username'] database_helper = database_helpers.DatabaseHelper(username=username, password=mongo_pass) bdp_api = bdp_query.BDPQuery(api_token=api_token) if full_provenance: with open(plan_json, 'w') as f: f.write(json.dumps({})) setup_crime_incidents(database_helper, bdp_api, full_provenance=full_provenance) setup_property_assessment(database_helper, bdp_api, full_provenance=full_provenance) setup_boston_public_schools(database_helper, bdp_api, full_provenance=full_provenance) setup_hospital_locations(database_helper, bdp_api, full_provenance=full_provenance) setup_crime_centroids(database_helper, full_provenance=full_provenance) setup_hospital_distances(database_helper, full_provenance=full_provenance) setup_crime_knn(database_helper, full_provenance=full_provenance) setup_home_value_model(database_helper, full_provenance=full_provenance) setup_hospital_scatter(database_helper, full_provenance=full_provenance) setup_school_distances(database_helper, full_provenance=full_provenance) setup_school_scatter(database_helper, full_provenance=full_provenance) if full_provenance: with open(plan_json, 'r') as f: prov_doc = ProvDocument.deserialize(f) dot = prov_to_dot(prov_doc) dot.write_svg(prov_svg)
def save_DotGraph(self,filename,format=None): dot = prov_to_dot(self.graph) #add some logic to find nodes with dct:hasPart relation and add those edges to graph...prov_to_dot ignores these if not (format == "None"): dot.write(filename,format=format) else: dot.write(filename,format="pdf")
def main(auth_json_path, full_provenance=False): with open(auth_json_path, 'r') as f: auth_json = json.load(f) api_token = auth_json['services']['cityofbostondataportal']['token'] username = auth_json['services']['cityofbostondataportal']['username'] mongo_pass = auth_json['services']['cityofbostondataportal']['password'] database_helper = database_helpers.DatabaseHelper(username=username, password=mongo_pass) bdp_api = bdp_query.BDPQuery(api_token=api_token) if full_provenance: with open(plan_json, 'w') as f: f.write(json.dumps({})) setup_crime_incidents(database_helper, bdp_api, full_provenance=full_provenance) setup_property_assessment(database_helper, bdp_api, full_provenance=full_provenance) setup_boston_public_schools(database_helper, bdp_api, full_provenance=full_provenance) setup_hospital_locations(database_helper, bdp_api, full_provenance=full_provenance) setup_crime_centroids(database_helper, full_provenance=full_provenance) setup_hospital_distances(database_helper, full_provenance=full_provenance) setup_crime_knn(database_helper, full_provenance=full_provenance) setup_home_value_model(database_helper, full_provenance=full_provenance) setup_hospital_scatter(database_helper, full_provenance=full_provenance) setup_school_distances(database_helper, full_provenance=full_provenance) setup_school_scatter(database_helper, full_provenance=full_provenance) if full_provenance: with open(plan_json, 'r') as f: prov_doc = ProvDocument.deserialize(f) dot = prov_to_dot(prov_doc) dot.write_svg(prov_svg)
def reportGlobalGraph(self): dot = prov_to_dot(pGlobal) if os.path.exists(self.PATH): dot.write_png(self.PATH + self.SEP + "pGlobal.png") f = open(self.PATH + self.SEP + "pGlobal.provn", "w") f.write(pGlobal.get_provn()) else: print "path not found: {p}".format(p=self.PATH)
def prov2dot(prov_doc): """ Convert ProvDocument to dot graphical format :param prov_doc: :return: """ dot = prov_to_dot(prov_doc) return dot
def do_tests(self, prov_doc, msg=None): dot = prov_to_dot(prov_doc) svg_content = dot.create(format="svg", encoding="utf-8") # Very naive check of the returned SVG content as we have no way to check the graphical content self.assertGreater( len(svg_content), self.MIN_SVG_SIZE, "The size of the generated SVG content should be greater than %d bytes" % self.MIN_SVG_SIZE)
def do_tests(self, prov_doc, msg=None): dot = prov_to_dot(prov_doc) svg_content = dot.create(format="svg") # Very naive check of the returned SVG content as we have no way to check the graphical content self.assertGreater( len(svg_content), self.MIN_SVG_SIZE, "The size of the generated SVG content should be greater than %d bytes" % self.MIN_SVG_SIZE )
def generate_provenance_graph(n_clicks, id): p = synapseutils.provenance.SynapseProvenanceDocument(id, annotations=[]) dot = prov_to_dot(p.prov_doc) image_filename = 'article-prov.png' dot.write_png(image_filename) encoded_image = base64.b64encode(open(image_filename, 'rb').read()) return 'data:image/png;base64,{}'.format(encoded_image)
def save_graph(self, nameFile): """Save provenance of last operation in png image graph.""" if not os.path.exists(self.results_path): os.makedirs(self.results_path) prov_doc = self.current_provDoc dot = prov_to_dot(prov_doc) nameFile = os.path.join(self.results_path, nameFile) dot.write_png(nameFile + '.png')
def __init__( self, project, add_attributes=False, add_users=True, _add_project_namespaces=True, _iter_samples=True, _iter_project=True, ): """ Constructs the W3C-PROV document for a project. :param Project project: instance of bioprov.src.Project. :param bool add_attributes: whether to add object attributes. :param bool add_users: whether to add users and environments. :param bool _add_project_namespaces: :param bool _iter_samples: :param bool _iter_project: """ # Assert Project is good before constructing instance assert isinstance(project, Project), Warnings()["incorrect_type"](project, Project) self.ProvDocument = ProvDocument() self.project = project self.project.document = self.ProvDocument self._dot = prov_to_dot(self.ProvDocument) self._provn = self.ProvDocument.get_provn() self._entities = dict() self._activities = dict() self._agents = dict() self._user_bundles = dict() self._provstore_document = None # Don't add attributes if you plan on exporting to graphic format self.add_attributes = add_attributes # Set this before running Namespaces if add_users: self._create_envs_and_users = True else: self._create_envs_and_users = False # Default actions to create the document if _add_project_namespaces: self._add_project_namespaces() if self._create_envs_and_users: self._iter_envs_and_users() if _iter_project: self._iter_project() if _iter_samples: self._iter_samples()
def reportROProvlets(self): if os.path.exists(self.PATH): for RO in repoSim.values(): # print "RO: {ro}:\n{provlet}\n*****".format(ro=RO.id, provlet=RO.provlet) dot = prov_to_dot(RO.provlet) dot.write_png(self.PATH + self.SEP + RO.id + ".png") f = open(self.PATH + self.SEP + RO.id + ".provn", "w") f.write(RO.provlet.get_provn()) else: print "path not found: {p}".format(p=self.PATH)
def save_provenance(self): """Export provenance information.""" self._include_provenance() filename = os.path.splitext(self.filename)[0] + '_provenance' self.provenance.serialize(filename + '.xml', format='xml') # Only plot provenance if there are not too many records. if len(self.provenance.records) > 100: logger.debug("Not plotting large provenance tree of %s", self.filename) else: figure = prov_to_dot(self.provenance) figure.write_svg(filename + '.svg')
def viz_turtle(source=None, content=None, img_file=None, **kwargs): prov_doc = ProvDocument.deserialize(source=source, content=content, format='rdf', rdf_format='turtle') # TODO : show attributes has optional arg dot = prov_to_dot(prov_doc, use_labels=True, show_element_attributes=False, show_relation_attributes=False) dot.write_png(img_file)
def write_prov(self): json_fp = path.join(self.output_fp, "json") png_fp = path.join(self.output_fp, "png") if not path.exists(self.output_fp): # Check for records of this crawl os.makedirs(self.output_fp) os.mkdir(json_fp) os.mkdir(png_fp) for visit, document in self.documents.items(): print("writing visit%d" % visit) dot = prov_to_dot(document) dot.write_png(path.join(png_fp, 'visit%d.png' % visit)) document.serialize(path.join(json_fp, 'visit%d.json' % visit), indent=4)
def save_all_graph(self, nameFile): """Save all provenance in png image graph.""" if not os.path.exists(self.results_path): os.makedirs(self.results_path) directory = self.results_path final_doc = prov.ProvDocument() prov_doc = prov.ProvDocument() for file in os.listdir(directory): if file.endswith('.json'): prov_doc = prov_doc.deserialize(os.path.join(directory, file)) final_doc.update(prov_doc) dot = prov_to_dot(final_doc) nameFile = os.path.join(self.results_path, nameFile) dot.write_png(nameFile + '.png')
def provdoc2svg(provdoc, filename): from prov.dot import prov_to_dot from pydotplus.graphviz import InvocationException try: dot = prov_to_dot( provdoc, use_labels=True, show_element_attributes=True, show_relation_attributes=True, ) svg_content = dot.create(format="svg") except InvocationException as e: svg_content = "" print(f"problem while creating svg content: {repr(e)}") with open(filename, "wb") as f: f.write(svg_content)
def __call__(self, context, file_name=None): """ Save provenance information. By default we save to xml format. """ [file_name, bucket_name, bucket_key] = \ misc.create_temp_file_path_for_s3(file_name) [basename, ext] = os.path.splitext(file_name) dot = prov_to_dot(context.prov) dot.write_png(basename + '.png') context.prov.serialize(file_name, format='xml') if bucket_key is not None: misc.upload_to_s3_if_applicable(file_name, bucket_name, bucket_key) misc.upload_to_s3_if_applicable(basename + '.png', bucket_name, bucket_key[:-len(ext)] + '.png')
def transform_to_prov(context_model): from prov.model import ProvDocument from prov.dot import prov_to_dot doc = ProvDocument() doc.add_namespace('is', 'http://www.provbook.org/nownews/is/#') doc.add_namespace('void', 'http://vocab.deri.ie/void#') doc.add_namespace('nowpeople', 'http://www.provbook.org/nownews/people/') input_data = doc.entity("void:Inputdata") backend_agent = doc.agent("nowpeople:EODC") user_agent = doc.agent("nowpeople:OpenEO-User") doc.wasAttributedTo(input_data, backend_agent) process_details = context_model["process_details"] prev_key = input_data for key in process_details: key_entity = doc.entity("void:" + key + "_output") key_activity = doc.activity('is:' + key) doc.used(key_activity, prev_key) doc.wasDerivedFrom(key_entity, prev_key) doc.wasGeneratedBy(key_entity, key_activity, time=process_details[key]["timing"]["end"]) doc.wasStartedBy(key_activity, user_agent, time=process_details[key]["timing"]["start"]) prev_key = key_entity dot = prov_to_dot(doc) dot.write_png('output-prov.png') return doc
def write_png(self): outfile = self.output_dir / "provenance.png" figure = prov_to_dot(self.doc) figure.write_png(outfile.as_posix()) return outfile
document.used('narciprov:TimeSeriesExtract', 'narciprov:CellFromRegionOfInterest') document.used('narciprov:RegionClassification', 'narciprov:RegionOfInterest') document.used('narciprov:VisualStimulation', 'narciprov:VisualStimulusPresentation') # # wasGeneratedBy # document.wasGeneratedBy('narciprov:Craniotomy', 'narciprov:CraniotomyProcedure') document.wasGeneratedBy('narciprov:ImageSequence', 'narciprov:TwoPhotonImaging') document.wasGeneratedBy('narciprov:CellFromRegionOfInterest', 'narciprov:RegionClassification') document.wasGeneratedBy('narciprov:FluorescenceActivity', 'narciprov:TimeSeriesExtract') # # wasInformedBy # document.wasInformedBy('narciprov:VisualStimulation', 'narciprov:TwoPhotonImaging') # # wasDerivedFrom # document.wasDerivedFrom('narciprov:RegionOfInterest', 'narciprov:ImageSequence') # # Visualize the graph # from prov.dot import prov_to_dot dot = prov_to_dot(document) dot.write_svg("narciprov.svg") # graphviz should be installed
def visualize_prov(prov_doc): dot = prov_to_dot(prov_doc) dot.write_png('tmp1.png') dot.write_pdf('tmp1.pdf') return Image('tmp1.png')
def serialize(graph, fmt="json"): """Serialize prov graph in *fmt* file format.""" if fmt == "dot": return str(prov_to_dot(graph)) return str(graph.serialize(format=fmt))
def write_png(self): outfile = os.path.join(self.output_dir, "provenance.png") figure = prov_to_dot(self.doc) figure.write_png(outfile) return outfile
def get_blank_prov_document(): return ProvDocument(namespaces=all_namespaces) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description= "Process and generate provenance for a MIMIC patient admission") parser.add_argument("admission_id", type=int, help="The ID of admission to process") args = parser.parse_args() prov_doc1 = ProvDocument(namespaces=all_namespaces) admission = Admission(prov_doc1, args.admission_id) admission.process() filepath = output_path / f"{args.admission_id}.json" with filepath.open("w") as f: prov_doc1.serialize(f) provn_content = prov_doc1.get_provn() print(provn_content) with filepath.with_suffix(".provn").open("w") as f: f.write(provn_content) dot = prov_to_dot(prov_doc1) dot.write_pdf(filepath.with_suffix(".pdf")) db.close_session()
def reportROTrajectories(self): for RO in repoSim.values(): DT = self.computeDT(RO) dot = prov_to_dot(DT) dot.write_png("DT_" + RO.id + ".png")
}) release_tag_model.entity("Release_Asset", { "uri": "", "format": "", "filepath": "" }) release_tag_model.hadMember("Release_Asset", "Release") release_tag_model.hadMember("Release_Evidence", "Release") release_tag_model.hadMember("Tag", "Release") release_tag_model.hadMember("Commit", "Tag") release_tag_model.wasAssociatedWith("Commit_Event", "User") release_tag_model.wasAssociatedWith("Release_Event", "User") release_tag_model.wasAssociatedWith("Tag_Event", "User") release_tag_model.wasAttributedTo("Release", "User") release_tag_model.wasAttributedTo("Tag", "User") release_tag_model.wasAttributedTo("Commit", "User") release_tag_model.wasGeneratedBy("Release", "Release_Event") release_tag_model.wasGeneratedBy("Tag", "Tag_Event") release_tag_model.wasGeneratedBy("Commit", "Commit_Event") for title, doc in [("git_commit_model_add", add), ("git_commit_model_mod", mod), ("git_commit_model_del", rem), ("gitlab_commit_model", com), ("gitlab_issue_model", iss), ("gitlab_merge_request_model", mr), ("gitlab_release_tag_model", release_tag_model)]: prov_to_dot(doc, show_nary=False, use_labels=False, direction="BT").write_pdf(f"pdfs/{title}.pdf") prov_to_dot(doc, show_nary=False, use_labels=False, direction="BT").write_svg(f"svgs/{title}.svg")
logpage_ident = get_logpage(str(logpage), prov_doc) return plate_ident # Create a new provenance document d1 = ProvDocument() declare_namespaces(d1) # get V468Cyg # get_plate # process = get_process('2180', d1) try: # scan = get_entity('2462','scan', d1) id = '2180' prov_type = 'lightcurve' # plate_name = get_entity(id,prov_type, d1) # process_name = get_process('9804',d1) # logpage_name = get_logpage('10085',d1) # source_id = get_source('40000001', d1) # plate_ident = get_plate_prov(id, d1) id = get_lightcurve('614-089373', d1) except TypeError: print('the job is still executing...') print(d1.get_provn()) filename = 'prov_' + prov_type + id d1.serialize(filename + '.xml', format='xml') dot = prov_to_dot(d1) dot.write_png(filename + '.png')
filename = os.path.abspath(inspect.getfile(inspect.currentframe())) folder = os.path.dirname(filename) dot_folder = os.path.join(folder, "dot") xml_folder = os.path.join(folder, "xml") if not os.path.exists(dot_folder): os.makedirs(dot_folder) if not os.path.exists(xml_folder): os.makedirs(xml_folder) for prov_file in glob.glob(os.path.join(folder, "*.py")): if prov_file == filename: continue with open(prov_file, "rt") as fh: content = fh.read() name = os.path.splitext(os.path.basename(prov_file))[0].strip( os.path.extsep) dot_filename = os.path.join(dot_folder, name + os.path.extsep + "dot") xml_filename = os.path.join(xml_folder, name + os.path.extsep + "xml") pr = prov.model.ProvDocument() pr.add_namespace(*NS_SEIS) exec(content) dot.prov_to_dot(pr, use_labels=True).write_dot(dot_filename) pr.serialize(xml_filename, format="xml")
def do_tests(self, prov_doc, msg=None): dot = prov_to_dot(prov_doc) svg_content = dot.create(format="svg")
def save_DotGraph(self, filename, format=None): dot = prov_to_dot(self.graph) ISPARTOF = { 'label': 'isPartOf', 'fontsize': '10.0', 'color': 'darkgreen', 'fontcolor': 'darkgreen' } style = ISPARTOF # query self.graph for Project uuids #use RDFLib here for temporary graph making query easier rdf_graph = Graph() rdf_graph = rdf_graph.parse(source=StringIO( self.graph.serialize(None, format='rdf', rdf_format='ttl')), format='turtle') #SPARQL query to get project UUIDs query = ''' PREFIX nidm:<http://purl.org/nidash/nidm#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT distinct ?uuid Where { { ?uuid rdf:type nidm:Project } } ''' qres = rdf_graph.query(query) for row in qres: print("project uuid = %s" % row) # parse uuid from project URI #project_uuid = str(row[0]).rsplit('/', 1)[-1] project_uuid = str(row[0]) # for each Project uuid search dot structure for Project uuid project_node = None for key, value in dot.obj_dict['nodes'].items(): # get node number in DOT graph for Project if 'URL' in dot.obj_dict['nodes'][key][0]['attributes']: if project_uuid in str(dot.obj_dict['nodes'][key][0] ['attributes']['URL']): project_node = key break # for each Session in Project class self.sessions list, find node numbers in DOT graph for session in self.sessions: print(session) for key, value in dot.obj_dict['nodes'].items(): # get node number in DOT graph for Project if 'URL' in dot.obj_dict['nodes'][key][0]['attributes']: if session.identifier.uri in str(dot.obj_dict['nodes'][key] [0]['attributes']['URL']): session_node = key #print("session node = %s" %key) # add to DOT structure edge between project_node and session_node dot.add_edge(Edge(session_node, project_node, **style)) # for each Acquisition in Session class ._acquisitions list, find node numbers in DOT graph for acquisition in session.get_acquisitions(): # search through the nodes again to figure out node number for acquisition for key, value in dot.obj_dict['nodes'].items(): # get node number in DOT graph for Project if 'URL' in dot.obj_dict['nodes'][key][0][ 'attributes']: if acquisition.identifier.uri in str( dot.obj_dict['nodes'][key][0] ['attributes']['URL']): acquisition_node = key #print("acquisition node = %s" %key) dot.add_edge( Edge(acquisition_node, session_node, **style)) #add some logic to find nodes with dct:hasPart relation and add those edges to graph...prov_to_dot ignores these if not (format == "None"): dot.write(filename, format=format) else: dot.write(filename, format="pdf")
def dot(self): self._dot = prov_to_dot(self.ProvDocument) return self._dot
from provneo4j.api import Api import provneo4j.tests.examples as examples from prov.model import ProvDocument, Namespace, Literal, PROV, Identifier import datetime import os provneo4j_api = Api(base_url="http://localhost:7474/db/data", username="******", password="******") # Function reads the output.json file to gather the serialized Prov Document, deserializes it and returns the Prov Document. def primer(): a = ProvDocument() script_path = os.path.dirname(os.path.abspath( __file__ )) # with open(str(script_path) + "/output.json") as json_file: line = json_file.readline() a = a.deserialize(content=line) return a prov_document = primer() # Store the document to ProvStore: # - the public parameter is optional and defaults to False provneo4j_api.document.create(prov_document, name="Primer Example") # => This will store the document and return a ProvStore Document object dot = prov_to_dot(prov_document) #dot.write_png('article-prov.png') dot.write_pdf('article-prov.pdf')