Esempio n. 1
0
    def test_evaluate_museum(self):
        """
        Here we have a class node with no data nodes, a list of ontologies, class instance link.
        Not all columns from file get mapped.
        :return:
        """
        dataset = self._datasets.upload(self._museum_file)

        ontologies = []
        for path in os.listdir(self._museum_owl_dir):
            f = os.path.join(self._museum_owl_dir, path)
            ontologies.append(self._ontologies.upload(f))

        new_json = dataset.bind_ssd(self._museum_ssd, ontologies,
                                    KARMA_DEFAULT_NS)

        empty_ssd = SSD(dataset, ontologies)
        ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)

        self.assertEqual(len(ssd.class_nodes), 6)
        self.assertEqual(len(ssd.links),
                         14)  # class instance, data property, object property
        self.assertEqual(len(ssd.data_nodes), 10)
        self.assertEqual(len(ssd.mappings), 10)

        res = self._ssds.compare(ssd, ssd, False, False)
        print(res)

        self.assertEqual(res['precision'], 1)
        self.assertEqual(res['recall'], 1)
        self.assertEqual(res['jaccard'], 1)
Esempio n. 2
0
    def test_evaluate_places_mix(self):
        """
        Tests evaluation for places_mix
        :return:
        """
        path = os.path.join(self._data_path, "places_mix.csv")
        dataset = self._datasets.upload(path)
        on = self._ontologies.upload(self._test_owl)

        assert (issubclass(type(dataset), DataSet))

        ssd_path = os.path.join(self._ssd_path, "places_mix.ssd")
        new_json = dataset.bind_ssd(ssd_path, [on], str(on._prefixes['']))

        empty_ssd = SSD(dataset, on)
        ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)
        pprint(ssd.json)

        self.assertEqual(len(ssd.class_nodes), 2)
        self.assertEqual(len(ssd.data_nodes), 4)
        self.assertEqual(len(ssd.mappings), 4)
        self.assertEqual(len(ssd.links), 5)  # class and data links
        self.assertEqual(len(ssd.data_links),
                         4)  # these are only data properties
        self.assertEqual(len(ssd.object_links),
                         1)  # these are only object properties

        res = self._ssds.compare(ssd, ssd, False, True)
        print(res)

        self.assertEqual(res['precision'], 1)
        self.assertEqual(res['recall'], 1)
        self.assertEqual(res['jaccard'], 1)
Esempio n. 3
0
    def test_s27(self):
        """

        :return:
        """
        ontologies = self._add_owls()
        self.assertEqual(len(self._ontologies.items), 11)
        self._add_datasets(ontologies)
        self.assertEqual(len(self._ssds.items), 29)
        self.assertEqual(len(self._datasets.items), 29)

        # first grab the JSON object from the dataset...
        datasets = self._datasets.items
        s27_ds = ""
        for ds in datasets:
            if "s27-s-the-huntington.json" in ds.filename:
                s27_ds = ds

        # secondly grab the reference SSD
        s27_ssd = os.path.join(self._benchmark_path, "ssd", "s27-s-the-huntington.json.ssd")

        # bind the dataset and ssd together...
        new_json = s27_ds.bind_ssd(s27_ssd, ontologies, KARMA_DEFAULT_NS)

        # create a new ssd and update with the reference JSON
        empty_ssd = SSD(s27_ds, ontologies)
        ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)

        # upload to the server and ensure it is parsed correctly...
        f = self._ssds.upload(ssd)
        self.assertEqual(set(ssd.mappings), set(f.mappings))
Esempio n. 4
0
    def test_ssd_evaluate_tricky_cities(self):
        """
        Here the ssd has two class nodes of the same type.
        We use the evaluate method of ssd and not from the server.
        :return:
        """
        self._datasets.upload(self._cities_file)
        on = self._ontologies.upload(self._test_owl)

        dataset = self._datasets.items[0]
        assert (issubclass(type(dataset), DataSet))

        ontology = self._ontologies.items[0]

        new_json = dataset.bind_ssd(self._tricky_cities_ssd, [ontology],
                                    str(ontology._prefixes['']))

        empty_ssd = SSD(dataset, on)
        ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)

        self.assertEqual(len(ssd.class_nodes), 2)
        self.assertEqual(len(ssd.data_nodes), 2)
        self.assertEqual(len(ssd.mappings), 2)
        self.assertEqual(len(ssd.object_links),
                         1)  # these are only object properties

        res = ssd.evaluate(ssd)
        print(res)

        self.assertEqual(res['precision'], 1)
        self.assertEqual(res['recall'], 1)
        self.assertEqual(res['jaccard'], 1)
    def test_evaluate_country_names(self):
        """
        Tests evaluation for country_names
        :return:
        """
        path = os.path.join(self._data_path, "country_names.csv")
        dataset = self._datasets.upload(path)
        on = self._ontologies.upload(self._test_owl)

        assert (issubclass(type(dataset), DataSet))

        ssd_path = os.path.join(self._ssd_path, "country_names.ssd")
        new_json = dataset.bind_ssd(ssd_path, [on], str(on._prefixes['']))

        empty_ssd = SSD(dataset, on)
        ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)

        self.assertEqual(len(ssd.class_nodes), 1)
        self.assertEqual(len(ssd.data_nodes), 4)
        self.assertEqual(len(ssd.mappings), 4)
        self.assertEqual(len(ssd.data_links),
                         4)  # these are only data properties
        self.assertEqual(len(ssd.object_links),
                         0)  # these are only object properties

        uploaded = self._ssds.upload(ssd)
        self.assertTrue(uploaded.stored)
        self.assertEqual(len(uploaded.class_nodes), 1)
        self.assertEqual(len(uploaded.data_nodes), 4)
        self.assertEqual(len(uploaded.mappings), 4)
        self.assertEqual(len(uploaded.data_links),
                         4)  # these are only data properties
        self.assertEqual(len(uploaded.object_links),
                         0)  # these are only object properties
Esempio n. 6
0
    def test_evaluate_business(self):
        """
        Tests evaluation for business
        :return:
        """
        dataset = self._datasets.upload(self._business_file)
        on = self._ontologies.upload(self._test_owl)

        assert (issubclass(type(dataset), DataSet))

        ontology = self._ontologies.items[0]

        new_json = dataset.bind_ssd(self._business_ssd, [ontology],
                                    str(ontology._prefixes['']))

        empty_ssd = SSD(dataset, on)
        ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)

        self.assertEqual(len(ssd.class_nodes), 4)
        self.assertEqual(len(ssd.data_nodes), 4)
        self.assertEqual(len(ssd.data_links),
                         4)  # these are only data properties
        self.assertEqual(len(ssd.object_links),
                         3)  # these are only object properties

        res = self._ssds.compare(ssd, ssd)
        print(res)

        self.assertEqual(res['precision'], 1)
        self.assertEqual(res['recall'], 1)
        self.assertEqual(res['jaccard'], 1)
    def test_show_multi_levels(self):
        data_path = os.path.join(os.path.dirname(os.path.dirname(__file__)),
                                 "resources", "museum_benchmark", "data_copy",
                                 "s07-s-13.json.csv")
        dataset = self._datasets.upload(data_path)

        print("--------------------1")

        ontologies = []
        for path in os.listdir(self._museum_owl_dir):
            f = os.path.join(self._museum_owl_dir, path)
            ontologies.append(self._ontologies.upload(f))

        print("--------------------2")

        self.assertEqual(len(ontologies), 11)

        ssd_path = os.path.join(self._ssd_path, "s07_many_levels.ssd")
        new_json = dataset.bind_ssd(ssd_path, ontologies, KARMA_DEFAULT_NS)

        empty_ssd = SSD(dataset, ontologies)
        ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)

        print("HERE!!!!!!!!!!!!!!!!!!!")
        ssd.show()
        print(ssd)

        self.fail()
    def test_evaluate_paintings(self):
        """
        Here we have a class node with no data nodes
        :return:
        """
        self._datasets.upload(self._paintings_file)
        on = self._ontologies.upload(self._objects_owl)

        dataset = self._datasets.items[0]
        assert (issubclass(type(dataset), DataSet))

        ontology = self._ontologies.items[0]

        new_json = dataset.bind_ssd(self._paintings_ssd, [ontology],
                                    str(ontology._prefixes['']))

        empty_ssd = SSD(dataset, on)
        ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)

        self.assertEqual(len(ssd.class_nodes), 3)
        self.assertEqual(len(ssd.links), 4)
        self.assertEqual(len(ssd.data_links), 2)
        self.assertEqual(len(ssd.object_links), 2)
        self.assertEqual(len(ssd.data_nodes), 2)
        self.assertEqual(len(ssd.mappings), 2)

        uploaded = self._ssds.upload(ssd)
        self.assertTrue(uploaded.stored)
        self.assertEqual(len(uploaded.class_nodes), 3)
        self.assertEqual(len(uploaded.links), 4)
        self.assertEqual(len(uploaded.data_links), 2)
        self.assertEqual(len(uploaded.object_links), 2)
        self.assertEqual(len(uploaded.data_nodes), 2)
        self.assertEqual(len(uploaded.mappings), 2)
    def test_tricky_cities(self):
        """
        Here the ssd has two class nodes of the same type
        :return:
        """
        self._datasets.upload(self._cities_file)
        on = self._ontologies.upload(self._test_owl)
        dataset = self._datasets.items[0]

        assert (issubclass(type(dataset), DataSet))

        ontology = self._ontologies.items[0]
        new_json = dataset.bind_ssd(self._tricky_cities_ssd, [ontology],
                                    str(ontology._prefixes['']))

        empty_ssd = SSD(dataset, on)
        ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)
        #pprint(ssd.json)

        self.assertEqual(len(ssd.class_nodes), 2)
        self.assertEqual(len(ssd.data_nodes), 2)
        self.assertEqual(len(ssd.mappings), 2)
        self.assertEqual(len(ssd.object_links),
                         1)  # these are only object properties

        uploaded = self._ssds.upload(ssd)
        self.assertTrue(uploaded.stored)
        self.assertEqual(len(self._ssds.items), 1)
        self.assertEqual(len(uploaded.class_nodes), 2)
        self.assertEqual(len(uploaded.data_nodes), 2)
        self.assertEqual(len(uploaded.mappings), 2)
        self.assertEqual(len(uploaded.object_links),
                         1)  # these are only object properties
    def test_evaluate_museum(self):
        """
        Here we have a class node with no data nodes, a list of ontologies, class instance link.
        Not all columns from file get mapped.
        :return:
        """
        dataset = self._datasets.upload(self._museum_file)

        ontologies = []
        for path in os.listdir(self._museum_owl_dir):
            f = os.path.join(self._museum_owl_dir, path)
            ontologies.append(self._ontologies.upload(f))

        assert (issubclass(type(dataset), DataSet))

        self.assertEqual(len(ontologies), 11)

        new_json = dataset.bind_ssd(self._museum_ssd, ontologies,
                                    KARMA_DEFAULT_NS)

        empty_ssd = SSD(dataset, ontologies)
        ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)

        self.assertEqual(len(ssd.class_nodes), 6)
        self.assertEqual(len(ssd.links),
                         14)  # class instance, data property, object property
        self.assertEqual(len(ssd.data_links),
                         9)  # class instance, data property
        self.assertEqual(len(ssd.object_links), 5)  # object property
        self.assertEqual(len(ssd.data_nodes), 10)
        self.assertEqual(len(ssd.mappings), 10)

        uploaded = self._ssds.upload(ssd)
        self.assertTrue(uploaded.stored)
        self.assertEqual(len(uploaded.class_nodes), 6)
        self.assertEqual(len(uploaded.links),
                         14)  # class instance, data property, object property
        self.assertEqual(len(uploaded.data_links),
                         9)  # class instance, data property
        self.assertEqual(len(uploaded.object_links), 5)  # object property
        self.assertEqual(len(uploaded.data_nodes), 10)
        self.assertEqual(len(uploaded.mappings), 10)
Esempio n. 11
0
    def test_evaluate_paintings(self):
        """
        Here we have a class node with no data nodes
        :return:
        """
        self._datasets.upload(self._paintings_file)
        on = self._ontologies.upload(self._objects_owl)

        dataset = self._datasets.items[0]
        #print(dataset)
        assert (issubclass(type(dataset), DataSet))

        ontology = self._ontologies.items[0]
        #print("namespaces: ", ontology._prefixes)
        #print("class nodes: ", list(ontology._iclass_nodes()))
        #print("data nodes: ", list(ontology._idata_nodes()))
        #print("links: ", list(ontology._ilinks()))

        new_json = dataset.bind_ssd(self._paintings_ssd, [ontology],
                                    str(ontology._prefixes['']))

        # print("************************")
        # print("new json...")
        # pprint(new_json)

        empty_ssd = SSD(dataset, on)
        ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)
        # pprint(ssd.json)

        self.assertEqual(len(ssd.class_nodes), 3)
        self.assertEqual(len(ssd.links), 4)
        self.assertEqual(len(ssd.data_links), 2)
        self.assertEqual(len(ssd.object_links), 2)
        self.assertEqual(len(ssd.data_nodes), 2)
        self.assertEqual(len(ssd.mappings), 2)

        res = self._ssds.compare(ssd, ssd, True, True)
        print(res)

        self.assertEqual(res['precision'], 1)
        self.assertEqual(res['recall'], 1)
        self.assertEqual(res['jaccard'], 1)
Esempio n. 12
0
 def items(self):
     """Maintains a list of SSD objects"""
     keys = self._api.keys()
     ssds = []
     for k in keys:
         blob = self._api.item(k)
         s = SSD().update(blob,
                          self._dataset_endpoint,
                          self._ontology_endpoint)
         ssds.append(s)
     return tuple(ssds)
Esempio n. 13
0
    def _build_simple(self):

        ds = self._datasets.upload(self._test_file)
        on = self._ontologies.upload(self._test_owl)

        single = SSD(dataset=ds, ontology=on)

        self.assertEqual(len(single.data_nodes), 0)
        self.assertEqual(len(single.links), 0)
        self.assertEqual(len(single.columns), 4)
        return single
Esempio n. 14
0
def init_ssd(target):
    target.dataset_json = {
        "dateCreated":
        "2017-03-16T15:29:03.388",
        "dateModified":
        "2017-03-16T15:29:03.388",
        "description":
        "",
        "filename":
        "businessInfo.csv",
        "id":
        2035625835,
        "path":
        "/Users/li151/Dev/serene/./storage/datasets/2035625835/businessinfo.csv",
        "typeMap": {},
        "columns": [{
            "datasetID": 2035625835,
            "id": 1246005714,
            "index": 0,
            "logicalType": "string",
            "name": "company",
            "path":
            "/Users/li151/Dev/serene/./storage/datasets/2035625835/businessinfo.csv",
            "sample": ["Data61"],
            "size": 59
        }, {
            "datasetID": 2035625835,
            "id": 281689915,
            "index": 1,
            "logicalType": "string",
            "name": "ceo",
            "path":
            "/Users/li151/Dev/serene/./storage/datasets/2035625835/businessinfo.csv",
            "sample": ["Garv Mcowen"],
            "size": 59
        }]
    }

    target.dataset = DataSet(target.dataset_json)
    target.ontology = Ontology().update({
        "name":
        __file__,
        "id":
        123,
        "description":
        "test ontology",
        "dateCreated":
        "2017-03-16T15:29:03.388",
        "dateModified":
        "2017-03-16T15:29:03.388"
    })

    target.ssd = SSD(target.dataset, target.ontology, "test ssd")
    target.ssd_json = target.ssd.json
Esempio n. 15
0
    def test_evaluate_country_names_zero(self):
        """
        Tests evaluation for country_names
        If we ignore everything, then it will all 0
        :return:
        """
        path = os.path.join(self._data_path, "country_names.csv")
        dataset = self._datasets.upload(path)
        on = self._ontologies.upload(self._test_owl)

        ssd_path = os.path.join(self._ssd_path, "country_names.ssd")
        new_json = dataset.bind_ssd(ssd_path, [on], str(on._prefixes['']))

        empty_ssd = SSD(dataset, on)
        ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)

        res = self._ssds.compare(ssd, ssd, True, True)
        print(res)

        self.assertEqual(res['precision'], 0)
        self.assertEqual(res['recall'], 0)
        self.assertEqual(res['jaccard'], 0)
    def test_business(self):
        """
        Tests business info
        :return:
        """
        dataset = self._datasets.upload(self._business_file)
        ontology = self._ontologies.upload(self._test_owl)

        assert (issubclass(type(dataset), DataSet))

        new_json = dataset.bind_ssd(self._business_ssd, [ontology],
                                    str(ontology._prefixes['']))

        empty_ssd = SSD(dataset, [ontology])
        ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)
        pprint(ssd.json)

        self.assertEqual(len(ssd.class_nodes), 4)
        self.assertEqual(len(ssd.data_nodes), 4)
        self.assertEqual(len(ssd.mappings), 4)
        self.assertEqual(len(ssd.links), 7)  # class and data links
        self.assertEqual(len(ssd.data_links),
                         4)  # these are only data properties
        self.assertEqual(len(ssd.object_links),
                         3)  # these are only object properties

        uploaded = self._ssds.upload(ssd)
        self.assertTrue(uploaded.stored)
        self.assertEqual(len(self._ssds.items), 1)
        self.assertEqual(len(uploaded.class_nodes), 4)
        self.assertEqual(len(uploaded.data_nodes), 4)
        self.assertEqual(len(uploaded.mappings), 4)
        self.assertEqual(len(uploaded.links), 7)  # class and data links
        self.assertEqual(len(uploaded.data_links),
                         4)  # these are only data properties
        self.assertEqual(len(uploaded.object_links),
                         3)  # these are only object properties
Esempio n. 17
0
    def _add_datasets(self, ontologies):
        # we need to unzip first the data
        if os.path.exists(os.path.join(self._museum_data, "data.tar.gz")):
            with tarfile.open(os.path.join(self._museum_data, "data.tar.gz")) as f:
                f.extractall(path=self._museum_data)

        # add datasets with their ssds
        for ds in os.listdir(self._museum_data):
            if ds.endswith(".gz"):
                continue # skip archive
            ds_f = os.path.join(self._museum_data, ds)
            ds_name = os.path.splitext(ds)[0]
            ssd_f = os.path.join(self._museum_ssd, ds_name + ".ssd")

            _logger.debug("Adding dataset: {}".format(ds_f))
            dataset = self._datasets.upload(ds_f, description="museum_benchmark")

            _logger.debug("Adding ssd: {}".format(ssd_f))
            new_json = dataset.bind_ssd(ssd_f, ontologies, KARMA_DEFAULT_NS)
            empty_ssd = SSD(dataset, ontologies)
            ssd = empty_ssd.update(new_json, self._datasets, self._ontologies)
            self._ssds.upload(ssd)
            # we remove the csv dataset
            os.remove(ds_f)