Example #1
0
def get_provdoc(format,infile):
    if format == "json":
       return ProvDocument.deserialize(infile)
    elif format == "xml":
       return ProvDocument.deserialize(infile,format='xml')
    else:
       print "Error: unsupported format (xml and json are supported"
Example #2
0
def provRead(source, format=None):
    from prov.model import ProvDocument
    from prov.serializers import Registry

    Registry.load_serializers()
    serializers = Registry.serializers.keys()

    if format:
        try:
            ret = ProvDocument.deserialize(source=source,
                                           format=format.lower())
            return ret
        except Exception as e:
            log.error(e)
            raise TypeError(e)

    for format in serializers:
        source.seek(0)
        try:
            return ProvDocument.deserialize(source=source, format=format)
        except:
            pass
    else:
        raise TypeError("Could not read from the source. To get a proper "
                        "error message, specify the format with the 'format' "
                        "parameter.")
Example #3
0
def read(source, format=None):
    """
    Convenience function returning a ProvDocument instance.

    It does a lazy format detection by simply using try/except for all known
    formats. The deserializers should fail fairly early when data of the
    wrong type is passed to them thus the try/except is likely cheap. One
    could of course also do some more advanced format auto-detection but I am
    not sure that is necessary.

    The downside is that no proper error messages will be produced, use the
    format parameter to get the actual traceback.
    """
    # Lazy imports to not globber the namespace.
    from prov.model import ProvDocument

    from prov.serializers import Registry
    Registry.load_serializers()
    serializers = Registry.serializers.keys()

    if format:
        return ProvDocument.deserialize(source=source, format=format.lower())

    for format in serializers:
        try:
            return ProvDocument.deserialize(source=source, format=format)
        except:
            pass
    else:
        raise TypeError("Could not read from the source. To get a proper "
                        "error message, specify the format with the 'format' "
                        "parameter.")
Example #4
0
def get_provdoc(format, infile):
    if format == "json":
        return ProvDocument.deserialize(infile)
    elif format == "xml":
        return ProvDocument.deserialize(infile, format='xml')
    else:
        print "Error: unsupported format (xml and json are supported"
Example #5
0
    def test_loading_all_json(self):
        # self.assertFalse(fails, 'Failed to load/round-trip %d JSON files (%s)' % (len(fails), ', '.join(fails)))

        # Code for debugging the failed tests
        for filename in self.fails:
            # Reload the failed files
            filepath = self.json_path + filename
#             os.rename(json_path + filename, json_path + filename + '-fail')
            with open(filepath) as json_file:
                logger.info("Loading %s...", filepath)
                g1 = ProvDocument.deserialize(json_file)
                json_str = g1.serialize(indent=4)
                g2 = ProvDocument.deserialize(content=json_str)
                self.assertEqual(g1, g2, 'Round-trip JSON encoding/decoding failed:  %s.' % filename)
Example #6
0
 def setUp(self):
     self.json_path = os.path.dirname(os.path.abspath(__file__)) + '/json/'
     filenames = os.listdir(self.json_path)
     self.fails = []
     for filename in filenames:
         if filename.endswith('.json'):
             with open(self.json_path + filename) as json_file:
                 try:
                     g1 = ProvDocument.deserialize(json_file)
                     json_str = g1.serialize(indent=4)
                     g2 = ProvDocument.deserialize(content=json_str)
                     self.assertEqual(g1, g2, 'Round-trip JSON encoding/decoding failed:  %s.' % filename)
                 except:
                     self.fails.append(filename)
Example #7
0
    def testLoadAllJSON(self):
        # self.assertFalse(fails, 'Failed to load/round-trip %d JSON files (%s)' % (len(fails), ', '.join(fails)))
        logging.basicConfig(level=logging.DEBUG)

        # Code for debugging the failed tests
        for filename in self.fails:
            # Reload the failed files
            filepath = self.json_path + filename
#             os.rename(json_path + filename, json_path + filename + '-fail')
            with open(filepath) as json_file:
                logger.info("Loading %s...", filepath)
                g1 = ProvDocument.deserialize(json_file)
                json_str = g1.serialize(indent=4)
                g2 = ProvDocument.deserialize(content=json_str)
                self.assertEqual(g1, g2, 'Round-trip JSON encoding/decoding failed:  %s.' % filename)
def main(auth_json_path, full_provenance=False):
    with open(auth_json_path, 'r') as f:
        auth_json = json.load(f)
        api_token = auth_json['services']['cityofbostondataportal']['token']
        username = '******'#auth_json['services']['cityofbostondataportal']['username']
        mongo_pass = '******' #auth_json['services']['cityofbostondataportal']['username']

    database_helper = database_helpers.DatabaseHelper(username=username, password=mongo_pass)
    bdp_api = bdp_query.BDPQuery(api_token=api_token)

    if full_provenance:
        with open(plan_json, 'w') as f:
            f.write(json.dumps({}))

    setup_crime_incidents(database_helper, bdp_api, full_provenance=full_provenance)
    setup_property_assessment(database_helper, bdp_api, full_provenance=full_provenance)
    setup_boston_public_schools(database_helper, bdp_api, full_provenance=full_provenance)
    setup_hospital_locations(database_helper, bdp_api, full_provenance=full_provenance)
    setup_crime_centroids(database_helper, full_provenance=full_provenance)
    setup_hospital_distances(database_helper, full_provenance=full_provenance)
    setup_crime_knn(database_helper, full_provenance=full_provenance)
    setup_home_value_model(database_helper, full_provenance=full_provenance)
    setup_hospital_scatter(database_helper, full_provenance=full_provenance)
    setup_school_distances(database_helper, full_provenance=full_provenance)
    setup_school_scatter(database_helper, full_provenance=full_provenance)

    if full_provenance:
        with open(plan_json, 'r') as f:
            prov_doc = ProvDocument.deserialize(f)
            dot = prov_to_dot(prov_doc)
            dot.write_svg(prov_svg)
def extract_pg_data(filepath: Path):
    prov_doc = ProvDocument.deserialize(filepath)

    n_balls_collected = 0

    pokemons_strength = dict()
    pokemons_captured = []
    pokemons_disposed = []
    strength_captured_avg = -1
    strength_disposed_avg = -1

    for record in prov_doc.get_records(ProvElement):
        if isinstance(record, ProvEntity):
            ent_id = str(record.identifier)
            if "pokemons" in ent_id:
                strength_values = record.get_attribute(PGO_strength)  # type: set
                strength = (
                    next(iter(strength_values)) if strength_values else 0
                )  # type: int
                pokemon_id = ent_id[:-2]
                if ent_id.endswith(".0"):
                    pokemons_captured.append(pokemon_id)
                    if strength and (pokemon_id not in pokemons_strength):
                        pokemons_strength[pokemon_id] = strength
        elif isinstance(record, ProvActivity):
            act_id = str(record.identifier)
            if "collectballs" in act_id:
                n_balls_collected += 1

    for record in prov_doc.get_records(ProvInvalidation):
        ent_id = str(record.args[0])
        pokemon_id = ent_id[:-2]
        pokemons_disposed.append(pokemon_id)

    n_pokemons_captured = len(pokemons_captured)
    n_pokemons_disposed = len(pokemons_disposed)
    if pokemons_captured:
        strength_captured_avg = np.mean(
            [
                pokemons_strength[pokemon_id]
                for pokemon_id in pokemons_captured
                if pokemon_id in pokemons_strength
            ]
        )
    if pokemons_disposed:
        strength_disposed_avg = np.mean(
            [
                pokemons_strength[pokemon_id]
                for pokemon_id in pokemons_disposed
                if pokemon_id in pokemons_strength
            ]
        )

    return (
        n_balls_collected,
        n_pokemons_captured,
        n_pokemons_disposed,
        strength_captured_avg,
        strength_disposed_avg,
    )
Example #10
0
def primer():
        a  = ProvDocument()
        script_path = os.path.dirname(os.path.abspath( __file__ )) #
        with open(str(script_path) + "/output.json") as json_file:
                line = json_file.readline()
                a =  a.deserialize(content=line)
        return a
def main(auth_json_path, full_provenance=False):
    with open(auth_json_path, 'r') as f:
        auth_json = json.load(f)
        api_token = auth_json['services']['cityofbostondataportal']['token']
        username = auth_json['services']['cityofbostondataportal']['username']
        mongo_pass = auth_json['services']['cityofbostondataportal']['password']

    database_helper = database_helpers.DatabaseHelper(username=username, password=mongo_pass)
    bdp_api = bdp_query.BDPQuery(api_token=api_token)

    if full_provenance:
        with open(plan_json, 'w') as f:
            f.write(json.dumps({}))

    setup_crime_incidents(database_helper, bdp_api, full_provenance=full_provenance)
    setup_property_assessment(database_helper, bdp_api, full_provenance=full_provenance)
    setup_boston_public_schools(database_helper, bdp_api, full_provenance=full_provenance)
    setup_hospital_locations(database_helper, bdp_api, full_provenance=full_provenance)
    setup_crime_centroids(database_helper, full_provenance=full_provenance)
    setup_hospital_distances(database_helper, full_provenance=full_provenance)
    setup_crime_knn(database_helper, full_provenance=full_provenance)
    setup_home_value_model(database_helper, full_provenance=full_provenance)
    setup_hospital_scatter(database_helper, full_provenance=full_provenance)
    setup_school_distances(database_helper, full_provenance=full_provenance)
    setup_school_scatter(database_helper, full_provenance=full_provenance)

    if full_provenance:
        with open(plan_json, 'r') as f:
            prov_doc = ProvDocument.deserialize(f)
            dot = prov_to_dot(prov_doc)
            dot.write_svg(prov_svg)
Example #12
0
def primer():
    a = ProvDocument()
    script_path = os.path.dirname(os.path.abspath(__file__))  #
    with open(str(script_path) + "/output.json") as json_file:
        line = json_file.readline()
        a = a.deserialize(content=line)
    return a
Example #13
0
def count_flatprovenancetypes_for_graphs(
    dataset_path: Path,
    graph_filenames: Collection[str],
    level: int,
    including_primitives_types: bool,
) -> Tuple[List[Dict[int, Dict[str, int]]], List[List[float]]]:
    logger.debug(
        "Calculating flat provenance types up to level %s (with application types: %s) for %d graphs...",
        level,
        including_primitives_types,
        len(graph_filenames),
    )
    results = []  # type: List[Dict[int, Dict[str, int]]]
    timings = []  # type: List[List[float]]
    for graph_filename in graph_filenames:
        filepath = dataset_path / graph_filename
        prov_doc = ProvDocument.deserialize(filepath)
        durations = []  # type: List[float]
        features = dict()  # type: Dict[int, Dict[str, int]]
        for h in range(level + 1):
            timer = Timer(verbose=False)
            with timer:
                fp_types = calculate_flat_provenance_types(
                    prov_doc, h, including_primitives_types
                )
            # counting only the last level
            features[h] = count_fp_types(fp_types[h].values())
            durations.append(timer.interval)
        results.append(features)
        timings.append(durations)
    return results, timings
    def __init__(self, database_helper, full_provenance=False):
        """
        Initializes the provenance for the mjclawar_rarshad project

        Parameters
        ----------
        database_helper: DatabaseHelper
        full_provenance: bool

        Returns
        -------
        """
        assert isinstance(database_helper, DatabaseHelper)

        self.database_helper = database_helper
        if full_provenance:
            self.prov_doc = ProvDocument.deserialize(dir_info.plan_json)
        else:
            self.prov_doc = ProvDocument()
        self.prov_doc.add_namespace(mcras.BDP_NAMESPACE.name,
                                    mcras.BDP_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.ALG_NAMESPACE.name,
                                    mcras.ALG_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.DAT_NAMESPACE.name,
                                    mcras.DAT_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.LOG_NAMESPACE.name,
                                    mcras.LOG_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.ONT_NAMESPACE.name,
                                    mcras.ONT_NAMESPACE.link)
Example #15
0
    def test_get_document_as_json(self):
        example = examples.primer_example()
        document_id = self.provapi.create_document_from_prov(example)

        prov_str = self.provapi.get_document_as_json(document_id)
        self.assertIsNotNone(prov_str)
        self.assertIsInstance(prov_str, str)
        prov_document_reverse = ProvDocument.deserialize(content=prov_str,
                                                         format="json")
        self.assertEqual(prov_document_reverse, example)
def from_xml(xml_str=None):
    """
    Try to convert a xml string into a ProvDocument

    :param xml_str: The xml string
    :type xml_str: str
    :return: The Prov document
    :rtype: ProvDocument
    """
    if xml_str is None:
        raise NoDocumentException()
    return ProvDocument.deserialize(source=xml_str, format='xml')
Example #17
0
def viz_turtle(source=None, content=None, img_file=None, **kwargs):
    prov_doc = ProvDocument.deserialize(source=source,
                                        content=content,
                                        format='rdf',
                                        rdf_format='turtle')

    # TODO : show attributes has optional arg
    dot = prov_to_dot(prov_doc,
                      use_labels=True,
                      show_element_attributes=False,
                      show_relation_attributes=False)
    dot.write_png(img_file)
Example #18
0
def form_string(content):
    """
    Take a string or BufferedReader as argument and transform the string into a ProvDocument
    :param content: Takes a sting or BufferedReader
    :return:ProvDocument
    """
    if isinstance(content, ProvDocument):
        return content
    elif isinstance(content, BufferedReader):
        content = reduce(lambda total, a: total + a, content.readlines())

    if type(content) is six.binary_type:
        content_str = content[0:15].decode()
        if content_str.find("{") > -1:
            return ProvDocument.deserialize(content=content, format='json')
        if content_str.find('<?xml') > -1:
            return ProvDocument.deserialize(content=content, format='xml')
        elif content_str.find('document') > -1:
            return ProvDocument.deserialize(content=content, format='provn')

    raise ParseException("Unsupported input type {}".format(type(content)))
Example #19
0
 def prov(self, format='json', filename=None):
     if self.prov_url is None:
         raise APIException('no provenance information found')
     response = self.adama.utils.request(self.prov_url, format=format)
     if format in ('json', 'sources'):
         return response.json()
     elif format == 'prov-n':
         return response.text
     elif format == 'prov':
         return ProvDocument.deserialize(
             content=json.dumps(response.json()))
     elif format == 'png':
         return png(response.content, filename)
Example #20
0
    def get_bundle(self, document_id, bundle_id, prov_format=ProvDocument):
        if prov_format == ProvDocument:
            extension = 'json'
        else:
            extension = prov_format

        r = self._request('get', "/documents/%i/bundles/%i.%s" % (document_id, bundle_id, extension),
                          headers=self.headers)

        if prov_format == ProvDocument:
            return ProvDocument.deserialize(content=r.content)
        else:
            return r.content
def from_json(json=None):
    """
    Try to convert a json string into a document

    :param json: The json str
    :type json: str
    :return: Prov Document
    :rtype: prov.model.ProvDocument
    :raise: NoDocumentException
    """
    if json is None:
        raise NoDocumentException()
    return ProvDocument.deserialize(source=json, format='json')
def from_provn(provn_str=None):
    """
    Try to convert a provn string into a ProvDocument

    :param provn_str: The string to convert
    :type provn_str: str
    :return: The Prov document
    :rtype: ProvDocument
    :raises: NoDocumentException
    """
    if provn_str is None:
        raise NoDocumentException()
    return ProvDocument.deserialize(source=provn_str, format='provn')
 def prov(self, format='json', filename=None):
     if self.prov_url is None:
         raise APIException('no provenance information found')
     response = self.adama.utils.request(self.prov_url, format=format)
     if format in ('json', 'sources'):
         return response.json()
     elif format == 'prov-n':
         return response.text
     elif format == 'prov':
         return ProvDocument.deserialize(
             content=json.dumps(response.json()))
     elif format == 'png':
         return png(response.content, filename)
Example #24
0
    def get_document_prov(self, document_id, prov_format=ProvDocument):
        if prov_format == ProvDocument:
            extension = 'json'
        else:
            extension = prov_format

        r = self._request('get',
                          "/documents/%i.%s" % (document_id, extension),
                          headers=self.headers)

        if prov_format == ProvDocument:
            return ProvDocument.deserialize(content=r.content)
        else:
            return r.content
Example #25
0
 def testAllExamples(self):
     num_graphs = len(examples.tests)
     logger.info('PROV-JSON round-trip testing %d example provenance graphs', num_graphs)
     counter = 0
     for name, graph in examples.tests:
         counter += 1
         logger.info('%d. Testing the %s example', counter, name)
         g1 = graph()
         logger.debug('Original graph in PROV-N\n%s', g1.get_provn())
         # json_str = g1.get_provjson(indent=4)
         json_str = g1.serialize(indent=4)
         logger.debug('Original graph in PROV-JSON\n%s', json_str)
         g2 = ProvDocument.deserialize(content=json_str)
         logger.debug('Graph decoded from PROV-JSON\n%s', g2.get_provn())
         self.assertEqual(g1, g2, 'Round-trip JSON encoding/decoding failed:  %s.' % name)
Example #26
0
    def test_get_document_as_json(self):
        """
        try to get the document as json
        :return:
        """
        self.clear_database()
        example = examples.primer_example()
        document_id = self.provapi.save_document_from_prov(example)

        prov_str = self.provapi.get_document_as_json(document_id)
        self.assertIsNotNone(prov_str)
        self.assertIsInstance(prov_str, str)
        prov_document_reverse = ProvDocument.deserialize(content=prov_str,
                                                         format="json")
        self.assertEqual(prov_document_reverse, example)
def build_grakel_graphs(graphs: pd.DataFrame, dataset_path: Path):
    if "grakel_graphs" in graphs.columns:
        # nothing to do
        return graphs  # unchanged

    # expecting a "graphfile" column in the input DataFrame
    grakel_graphs = []
    for graph_filename in graphs.graph_file:
        filepath = dataset_path / graph_filename
        # load the file
        prov_doc = ProvDocument.deserialize(filepath)
        prov_graph = prov_to_graph(prov_doc)  # type: nx.MultiDiGraph
        grakel_graphs.append(graph_from_prov_networkx_graph(prov_graph))
    graphs["grakel_graphs"] = grakel_graphs
    return graphs
Example #28
0
    def get_document(self, doc_id, format=None, flattened=False, view=None):
        """Returns a ProvBundle object of the document with the ID provided or raises ApiNotFoundError"""

        extension = format if format is not None else 'json'
        view = "/views/%s" % view if view in ['data', 'process', 'responsibility'] else ""
        url = "documents/%d%s%s.%s" % (doc_id, "/flattened" if flattened else "", view, extension)
        response = self.request(url, raw=True)

        if format is None:
            # Try to decode it as a ProvDocument
            result = ProvDocument.deserialize(content=response)
        else:
            # return the raw response
            result = response
        return result
Example #29
0
 def test_unifying(self):
     # This is a very trivial test just to exercise the unified() function
     # TODO: Create a proper unification test
     json_path = os.path.dirname(os.path.abspath(__file__)) + '/unification/'
     filenames = os.listdir(json_path)
     for filename in filenames:
         if not filename.endswith('.json'):
             continue
         filepath = json_path + filename
         with open(filepath) as json_file:
             logger.info('Testing unifying: %s', filename)
             logger.debug("Loading %s...", filepath)
             document = ProvDocument.deserialize(json_file)
             flattened = document.flattened()
             unified = flattened.unified()
             self.assertLess(len(unified.get_records()), len(flattened.get_records()))
Example #30
0
    def test_decoding_unicode_value(self):
        unicode_char = u'\u2019'
        json_content = u'''{
    "prefix": {
        "ex": "http://www.example.org"
    },
    "entity": {
        "ex:unicode_char": {
            "prov:label": "%s"
        }
    }
}''' % unicode_char

        prov_doc = ProvDocument.deserialize(content=json_content, format='json')
        e1 = prov_doc.get_record('ex:unicode_char')[0]
        self.assertIn(unicode_char, e1.get_attribute('prov:label'))
Example #31
0
    def test_decoding_unicode_value(self):
        unicode_char = u'\u2019'
        rdf_content = u'''
@prefix ex: <http://www.example.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

    ex:unicode_char a prov:Entity ;
        rdfs:label "%s"^^xsd:string .
''' % unicode_char
        prov_doc = ProvDocument.deserialize(content=rdf_content,
                                            format='rdf', rdf_format='turtle')
        e1 = prov_doc.get_record('ex:unicode_char')[0]
        self.assertIn(unicode_char, e1.get_attribute('prov:label'))
Example #32
0
    def assertRoundTripEquivalence(self, prov_doc, msg=None):
        if self.FORMAT is None:
            # This is a dummy test, just return
            return

        with io.BytesIO() as stream:
            prov_doc.serialize(destination=stream, format=self.FORMAT, indent=4)
            stream.seek(0, 0)

            prov_doc_new = ProvDocument.deserialize(source=stream, format=self.FORMAT)
            stream.seek(0, 0)
            # Assume UTF-8 encoding which is forced by the particular
            # PROV XML implementation and should also work for the PROV
            # JSON implementation.
            msg_extra = "'%s' serialization content:\n%s" % (self.FORMAT, stream.read().decode("utf-8"))
            msg = "\n".join((msg, msg_extra)) if msg else msg_extra
            self.assertEqual(prov_doc, prov_doc_new, msg)
    def test_decoding_unicode_value(self):
        unicode_char = u'\u2019'
        json_content = u'''{
    "prefix": {
        "ex": "http://www.example.org"
    },
    "entity": {
        "ex:unicode_char": {
            "prov:label": "%s"
        }
    }
}''' % unicode_char

        prov_doc = ProvDocument.deserialize(content=json_content,
                                            format='json')
        e1 = prov_doc.get_record('ex:unicode_char')[0]
        self.assertIn(unicode_char, e1.get_attribute('prov:label'))
Example #34
0
    def test_decoding_unicode_value(self):
        unicode_char = u'\u2019'
        rdf_content = u'''
@prefix ex: <http://www.example.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

    ex:unicode_char a prov:Entity ;
        rdfs:label "%s"^^xsd:string .
''' % unicode_char
        prov_doc = ProvDocument.deserialize(content=rdf_content,
                                            format='rdf', rdf_format='turtle')
        e1 = prov_doc.get_record('ex:unicode_char')[0]
        self.assertIn(unicode_char, e1.get_attribute('prov:label'))
    def assertRoundTripEquivalence(self, prov_doc, msg=None):
        if self.FORMAT is None:
            # This is a dummy test, just return
            return

        with io.BytesIO() as stream:
            prov_doc.serialize(destination=stream, format=self.FORMAT, indent=4)
            stream.seek(0, 0)

            prov_doc_new = ProvDocument.deserialize(source=stream,
                                                    format=self.FORMAT)
            stream.seek(0, 0)
            # Assume UTF-8 encoding which is forced by the particular
            # PROV XML implementation and should also work for the PROV
            # JSON implementation.
            msg_extra = u"'%s' serialization content:\n%s" % (
                self.FORMAT, stream.read().decode("utf-8"))
            msg = u'\n'.join((msg, msg_extra)) if msg else msg_extra
            self.assertEqual(prov_doc, prov_doc_new, msg)
    def test_decoding_unicode_value(self):
        unicode_char = "\u2019"
        json_content = (
            """{
    "prefix": {
        "ex": "http://www.example.org"
    },
    "entity": {
        "ex:unicode_char": {
            "prov:label": "%s"
        }
    }
}"""
            % unicode_char
        )

        prov_doc = ProvDocument.deserialize(content=json_content, format="json")
        e1 = prov_doc.get_record("ex:unicode_char")[0]
        self.assertIn(unicode_char, e1.get_attribute("prov:label"))
    def test_decoding_unicode_value(self):
        unicode_char = "\u2019"
        rdf_content = (
            """
@prefix ex: <http://www.example.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

    ex:unicode_char a prov:Entity ;
        rdfs:label "%s"^^xsd:string .
"""
            % unicode_char
        )
        prov_doc = ProvDocument.deserialize(
            content=rdf_content, format="rdf", rdf_format="turtle"
        )
        e1 = prov_doc.get_record("ex:unicode_char")[0]
        self.assertIn(unicode_char, e1.get_attribute("prov:label"))
def count_flatprovenancetypes_for_graphs(
    dataset_path: Path,
    graph_filenames: Collection[str],
    level: int,
    including_primitives_types: bool,
    counting_wdf_as_two: bool = False,
    ignored_types: FrozenSet[str] = ϕ,
) -> Tuple[List[Dict[int, Dict[FlatProvenanceType, int]]], List[List[float]]]:
    logger.debug(
        "Producing linear provenance types up to level %s "
        "(with application types: %s, counting derivations as 2-length edges: %s) "
        "for %d graphs...",
        level,
        including_primitives_types,
        counting_wdf_as_two,
        len(graph_filenames),
    )
    results = []  # type: List[Dict[int, Dict[FlatProvenanceType, int]]]
    timings = []  # type: List[List[float]]
    for graph_filename in graph_filenames:
        filepath = dataset_path / graph_filename
        prov_doc = ProvDocument.deserialize(filepath)
        durations = []  # type: List[float]
        features = dict()  # type: Dict[int, Dict[FlatProvenanceType, int]]
        for h in range(level + 1):
            timer = Timer(verbose=False)
            with timer:
                fp_types = calculate_flat_provenance_types(
                    prov_doc,
                    h,
                    including_primitives_types,
                    counting_wdf_as_two,
                    ignored_types=ignored_types,
                )
            # counting only the last level
            features[h] = Counter(fp_types[h].values())
            durations.append(timer.interval)
        results.append(features)
        timings.append(durations)
    return results, timings
Example #39
0
def calculate_provenance_features_for_file(filepath: Path) -> list:
    # Calculate Provenance Network Metrics (22) and number of edge types
    try:
        # load the file
        prov_doc = ProvDocument.deserialize(filepath)
    except Exception as e:
        logger.error("Cannot deserialize %s", filepath)
        raise e
    try:
        timer = Timer(verbose=False)
        with timer:
            # counting the record types
            rec_type_counts = count_record_types(prov_doc)
            prov_rel_cols = [
                rec_type_counts[rec_type] if rec_type in rec_type_counts else 0
                for rec_type in PROV_RELATION_NAMES
            ]
            mv5 = version5(prov_doc, flat=True)  # calculate

        return mv5[:-4] + prov_rel_cols + [timer.interval]
    except Exception as e:
        logger.error("Cannot calculate metrics for %s", filepath)
        raise e
    def __init__(self, database_helper, full_provenance=False):
        """
        Initializes the provenance for the mjclawar_rarshad project

        Parameters
        ----------
        database_helper: DatabaseHelper
        full_provenance: bool

        Returns
        -------
        """
        assert isinstance(database_helper, DatabaseHelper)

        self.database_helper = database_helper
        if full_provenance:
            self.prov_doc = ProvDocument.deserialize(dir_info.plan_json)
        else:
            self.prov_doc = ProvDocument()
        self.prov_doc.add_namespace(mcras.BDP_NAMESPACE.name, mcras.BDP_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.ALG_NAMESPACE.name, mcras.ALG_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.DAT_NAMESPACE.name, mcras.DAT_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.LOG_NAMESPACE.name, mcras.LOG_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.ONT_NAMESPACE.name, mcras.ONT_NAMESPACE.link)
Example #41
0
 def assertPROVJSONRoundTripEquivalence(self, prov_doc, msg=None):
     json_str = prov_doc.serialize(indent=4)
     prov_doc_new = ProvDocument.deserialize(content=json_str)
     self.assertEqual(prov_doc, prov_doc_new, msg)