Example #1
0
    def test_map_multi_instance_links(self):
        """
        Tests the map function with multiple instances in the links

        :return:
        """
        simple = self._build_simple()

        (simple
         .map(Column("company"),
              DataNode(ClassNode("City", 0), "name"))
         .map(Column("ceo"),
              DataNode(ClassNode("City", 1), "name"))
         .map(Column("city"),
              DataNode(ClassNode("City", 2), "name"))
         .map(Column("state"),
              DataNode(ClassNode("City", 3), "name"))
         .link(ClassNode("City", 0), "nearby", ClassNode("City", 1))
         .link(ClassNode("City", 1), "nearby", ClassNode("City", 2))
         .link(ClassNode("City", 2), "nearby", ClassNode("City", 3)))

        self.assertEqual(len(simple.class_nodes), 4)
        self.assertEqual(len(simple.data_nodes), 4)
        self.assertEqual(len(simple.data_links), 4)
        self.assertEqual(len(simple.object_links), 3)
Example #2
0
    def test_duplicate_data_node_block(self):
        """
        Tests the map function can only map a data node once
        :return:
        """
        simple = self._build_simple()

        with self.assertRaises(Exception):
            (simple
             .map(Column("company"), "Organization.name")
             .map(Column("ceo"), "Person.name")
             .map(Column("city"), "Person.name")
             .map(Column("state"), "State.name"))
 def test_eq(self):
     """
     Checks that the Column equality is operating correctly
     :return:
     """
     self.assertEqual(
         self.column.update(self.column_json, self.sample_ds),
         Column("testee2").update(self.column_json, self.sample_ds))
Example #4
0
    def test_map_full(self):
        """
        Tests the map function for SSD mapping with full map
        :return:
        """
        simple = self._build_simple()

        (simple
         .map(Column("company"), DataNode(ClassNode("Organization"), "name"))
         .map(Column("ceo"), DataNode(ClassNode("Person"), "name"))
         .map(Column("city"), DataNode(ClassNode("City"), "name"))
         .map(Column("state"), DataNode(ClassNode("State"), "name")))

        self.assertEqual(len(simple.class_nodes), 4)
        self.assertEqual(len(simple.data_nodes), 4)
        self.assertEqual(len(simple.data_links), 4)
        self.assertEqual(len(simple.object_links), 0)
    def test_add(self):
        """
        Tests that the add method functions correctly
        :return:
        """
        server_octo = self._octopus()

        self.assertEqual(server_octo.stored, True)

        # upload a test dataset...
        ds = self._serene.datasets
        data_path = os.path.join("tests", "resources", "data")
        postal_code_ds = ds.upload(
            os.path.join(data_path, 'postalCodeLookup.csv'))

        # upload a test ssd
        postal_code_ssd = (self._serene.SSD(
            postal_code_ds, self.ontology,
            name='postal-code').map(Column("zipcode"), "Place.postalCode").map(
                Column("city"), "City.name").map(Column("state"),
                                                 "State.name"))

        # check there is a new ssd available
        ssd_len = len(server_octo.ssds)
        server_octo.add(postal_code_ssd)
        self.assertEqual(ssd_len + 1, len(server_octo.ssds))
        self.assertEqual(server_octo.stored, False)

        # upload a test ontology
        test_ontology = self._ontology()

        # check that there is a new ontology available...
        onto_len = len(server_octo.ontologies)
        server_octo.add(test_ontology)
        self.assertEqual(onto_len + 1, len(server_octo.ontologies))
        self.assertEqual(server_octo.stored, False)

        with self.assertRaises(ValueError):
            server_octo.add("junk")
Example #6
0
    def test_map_simple(self):
        """
        Tests the map function for SSD mapping with one map
        :return:
        """
        simple = self._build_simple()

        simple.map(Column("ceo"), DataNode(ClassNode("Person"), "name"))

        self.assertEqual(len(simple.class_nodes), 1)
        self.assertEqual(len(simple.data_nodes), 1)
        self.assertEqual(len(simple.data_links), 1)
        self.assertEqual(len(simple.object_links), 0)
Example #7
0
 def __init__(self, json):
     """
     Initialize a DataSet object with a json response
     :param json:
     """
     self.id = json['id']
     self.columns = [Column('--').update(c, self) for c in json['columns']]
     self.filename = json['filename']
     self.path = json['path']
     self.type_map = json['typeMap']
     self.description = json['description']
     self.date_created = convert_datetime(json['dateCreated'])
     self.date_modified = convert_datetime(json['dateModified'])
     self.sample = pd.DataFrame({c.name: c.sample for c in self.columns})
     self._stored = True
Example #8
0
    def test_remove_column(self):
        """
        Tests the removal function when removing data nodes and columns
        :return:
        """
        simple = self._build_simple()

        (simple
         .map("company", "Organization.name")
         .map("ceo", "Person.name")
         .map("city", "City.name")
         .map("state", "State.name")
         .remove(DataNode(ClassNode("Person"), "name"))
         .remove(Column("city")))

        self.assertEqual(len(simple.class_nodes), 2)
        self.assertEqual(len(simple.data_nodes), 2)
        self.assertEqual(len(simple.data_links), 2)
        self.assertEqual(len(simple.object_links), 0)
    def _ssds(self, datasets, ontology):
        """
        Load up the example SSDs
        :param datasets:
        :param ontology:
        :return:
        """
        business_info_ssd = (self._serene.SSD(
            datasets[0], ontology, name='business-info').map(
                Column("company"),
                DataNode(ClassNode("Organization"), "name")).map(
                    Column("ceo"), DataNode(ClassNode("Person"), "name")).map(
                        Column("city"),
                        DataNode(ClassNode("City"), "name")).map(
                            Column("state"),
                            DataNode(ClassNode("State"), "name")).link(
                                "Organization", "operatesIn", "City").link(
                                    "Organization", "ceo",
                                    "Person").link("City", "state", "State"))

        employee_address_ssd = (self._serene.SSD(
            datasets[1], ontology, name='employee-addr').map(
                "name", "Person.name").map("address", "Place.name").map(
                    "postcode",
                    "Place.postalCode").link("Person", "livesIn", "Place"))

        get_cities_ssd = (self._serene.SSD(
            datasets[2], ontology,
            name='cities').map(Column("city"),
                               DataNode(ClassNode("City"), "name")).map(
                                   Column("state"),
                                   DataNode(ClassNode("State"), "name")).link(
                                       "City", "state", "State"))

        get_employees_ssd = (self._serene.SSD(
            datasets[3], ontology, name='employees').map(
                Column("employer"),
                DataNode(ClassNode("Organization"),
                         "name")).map(Column("employee"),
                                      DataNode(ClassNode("Person"),
                                               "name")).link(
                                                   "Person", "worksFor",
                                                   "Organization"))

        postal_code_ssd = (self._serene.SSD(
            datasets[4], ontology,
            name='postal-code').map(Column("zipcode"), "Place.postalCode").map(
                Column("city"),
                "City.name").map(Column("state"), "State.name").link(
                    "City", "state", "State").link("City", "isPartOf",
                                                   "Place"))
        ssds = [
            business_info_ssd, employee_address_ssd, get_cities_ssd,
            get_employees_ssd, postal_code_ssd
        ]

        for ssd in ssds:
            self._serene.ssds.upload(ssd)

        return ssds
 def setUp(self):
     self.column = Column("testee")
class TestColumn(unittest.TestCase):
    """
    Tests the Column class
    """
    def __init__(self, methodName='runTest'):
        """
        Initializes the Column testing suite
        """
        super().__init__(methodName)

        self.sample_ds = DataSet({
            'dateCreated':
            '2017-03-16T15:29:03.388',
            'dateModified':
            '2017-03-16T15:29:03.388',
            'description':
            '',
            'filename':
            'businessInfo.csv',
            'id':
            2035625835,
            'path':
            'tests/resources/data/businessinfo.csv',
            'typeMap': {},
            'columns': [{
                'datasetID': 2035625835,
                'id': 1234567890,
                'index': 0,
                'logicalType': 'string',
                'name': 'company',
                'path': 'tests/resources/data/businessinfo.csv',
                'sample': ['Data61'],
                'size': 59
            }, {
                'datasetID': 2035625835,
                'id': 123456789,
                'index': 1,
                'logicalType': 'string',
                'name': 'ceo',
                'path': 'tests/resources/data/businessinfo.csv',
                'sample': ['Garv Mcowen'],
                'size': 59
            }]
        })

        self.column_json = {
            'datasetID': 2035625835,
            'id': 1246005714,
            'index': 0,
            'logicalType': 'string',
            'name': 'company',
            'path': 'tests/resources/data/businessinfo.csv',
            'sample': ['Data61'],
            'size': 59
        }
        self.column = None

    def setUp(self):
        self.column = Column("testee")

    def test_update(self):
        """
        Tests that the column is updated correctly from json.
        :return:
        """
        self.column.update(self.column_json, self.sample_ds)

        self.assertEqual(self.column.index, self.column_json["index"])
        self.assertEqual(self.column.filename, self.column_json["path"])
        self.assertEqual(self.column.name, self.column_json["name"])
        self.assertEqual(self.column.id, self.column_json["id"])
        self.assertEqual(self.column.size, self.column_json["size"])
        self.assertEqual(self.column.datasetID, self.column_json["datasetID"])
        self.assertEqual(self.column.sample, self.column_json["sample"])
        self.assertEqual(self.column.logicalType,
                         self.column_json["logicalType"])

    def test_repr(self):
        """Tests that the string is output correctly"""
        self.assertEqual(repr(self.column), "Column(testee)")

    def test_eq(self):
        """
        Checks that the Column equality is operating correctly
        :return:
        """
        self.assertEqual(
            self.column.update(self.column_json, self.sample_ds),
            Column("testee2").update(self.column_json, self.sample_ds))

    def test_hash(self):
        """
        Tests that the hash codes of Column are calculated correctly
        :return:
        """
        self.assertEqual(
            hash(self.column.update(self.column_json, self.sample_ds)),
            hash((self.column_json["name"], self.column_json["id"],
                  self.column_json["datasetID"])))