Esempio n. 1
0
def get_setup_entities(entity_names, data_files):

    if isinstance(data_files, str):
        if os.path.isdir(data_files):
            files = _auto_find_file_names(entity_names, data_files)
            data_dir = data_files
            data_files = [
                os.path.join(data_dir, file_name) for file_name in files
            ]
        else:
            raise TypeError(
                "Invalid data_files given. data_files should be a directory path to files"
                " or list() of file paths themselves")
    elif not isinstance(data_files, list):
        raise TypeError(
            "Invalid data_files given. data_files should be a directory path to files"
            " or list() of file paths themselves")

    if len(entity_names) != len(data_files):
        raise ValueError(
            "Entity names and data files should be of same length")

    entities = {}
    for name, data_file in zip(entity_names, data_files):
        ent = Entity(name)
        ent.load_data_build_indices(os.path.abspath(data_file))
        entities[name] = ent

    return entities
 def test_duplicate_primary_key(self):
     """Duplicate Primary Key throw an error
     """
     test_in_data = [{"_id": 1}, {"_id": 1}]
     for dup in test_in_data:
         entity = Entity("user")
         with pytest.raises(DuplicatePrimaryKeyError) as error:
             entity.load_data_build_indices(test_in_data)
             assert "Duplicate primary key value: " in str(error.value)
     assert True
    def test_entity_invalid_file(self):
        """Test for a FileNotFoundError when an empty string or invalid path to file is given
        """
        entity = Entity("user")
        invalid_files = ["a", "nofile.txt", "{}", "set", "True", "None"]

        for invalid_file_name in invalid_files:
            with pytest.raises(FileNotFoundError) as error:
                entity.load_data_build_indices(invalid_file_name)
            assert "[Errno 2] No such file or directory:" in str(error.value)
    def test_build_index_missing_primary_key(self):
        """Missing primary key should throw an error
        """
        no_pkey_data = [[{}], [{"url": "https://test.com"}]]

        for no_pkey in no_pkey_data:
            entity = Entity("ticket")
            with pytest.raises(PrimaryKeyNotFoundError):
                entity.load_data_build_indices(no_pkey)
        assert True
    def test_build_pkey_index_unhashable(self):
        """Unhashable values in data point's primary key index should not throw TypeErrors as they are being stringified
        """
        test_in_data = [
            [{
                "_id": {
                    1: 1
                }
            }],
            [{
                "_id": {1}
            }],
            [{
                "_id": [1]
            }],
        ]

        test_out_data = [
            {
                "_id": {
                    "{1: 1}": {
                        "_id": {
                            1: 1
                        }
                    }
                }
            },
            {
                "_id": {
                    "{1}": {
                        "_id": {1}
                    }
                }
            },
            {
                "_id": {
                    "[1]": {
                        "_id": [1]
                    }
                }
            },
        ]

        for inp, out in zip(test_in_data, test_out_data):
            entity = Entity("ticket")
            print(inp)
            entity.load_data_build_indices(inp)
            assert entity._indices == out

        assert True
    def test_build_load_invalid_data_type(self):
        """Valid data = [], [{"primary_key": }], 'path/to/file'
        Invalid data should throw a value error
        """
        invalid_input_data = [1, {1}, (), True, None, Entity("user")]

        for invalid_data_point in invalid_input_data:
            entity = Entity("ticket")
            with pytest.raises(TypeError) as error:
                entity.load_data_build_indices(invalid_data_point)
            assert (
                "Data to load should be one of file path as str(), data point as dict() or data as list of data point()"
                == str(error.value))
        assert True
def get_entity_with_data_indices(entity_name):
    """Instantiates and returns an Entity object of entity_name after loading data (from
    inferred test data file) and building _indices
    
    Args:
        entity_name (str): One of user, organization, ticket
    
    Returns:
        Entity(): entity object of name entity_name, with test data loaded and incdices built
    """
    data_file_name = f"{os.path.dirname(os.path.abspath(__file__))}/test_data/test_data_import_{entity_name}s.json"
    entity = Entity(entity_name)
    entity.load_data_build_indices(data_file_name)
    return entity
    def test_entity_invalid_json_structure(self, tmpdir):
        """Invalid json in any of the entity files should throw a Json Decode Error
        """
        for invalid_json in [
                "{", "[}]", '{"_id":1 "2":2}', "", " ", "[", "nothing"
        ]:
            tmp_file_name = f"{tmpdir}/invalid_json.json"
            write_to_file(invalid_json, tmp_file_name)

            entity = Entity("user")

            with pytest.raises(ValueError):
                entity.load_data_build_indices(tmp_file_name)

            assert True
    def test_custom_primary_key(self, tmpdir):
        """Custom primary key should use the given custom primary key
        """

        tmp_file_name = f"{tmpdir}/custom_prim_key.json"
        test_data = '[{"cid": 1}]'
        test_primary_key = "cid"

        expected_index = {"cid": {"1": {"cid": 1}}}

        write_to_file(test_data, tmp_file_name)
        entity = Entity("user", "cid")
        entity.load_data_build_indices(tmp_file_name)

        assert test_primary_key == entity.primary_key
        assert expected_index == entity._indices
Esempio n. 10
0
    def test_build_index_tags(self):
        """Test that when the data point has values that are a list we flatten them 
        """
        test_in_data = [
            [{
                "_id": 1,
                "tags": ["tag1", "tag2"]
            }],
            [{
                "_id": 1,
                "tags": []
            }],
        ]
        test_out_data = [
            {
                "_id": {
                    "1": {
                        "_id": 1,
                        "tags": ["tag1", "tag2"]
                    }
                },
                "tags": {
                    "tag1": [1],
                    "tag2": [1]
                },
            },
            {
                "_id": {
                    "1": {
                        "_id": 1,
                        "tags": []
                    }
                },
                "tags": {
                    "": [1]
                }
            },
        ]

        for inp, out in zip(test_in_data, test_out_data):
            entity = Entity("ticket")
            entity.load_data_build_indices(inp)
            assert out == entity._indices

        assert True
Esempio n. 11
0
    def test_entity_valid_data_in_file(self, tmpdir):
        """Testing with valid data should result in expected output, empty data [] should result in empty index
        {} is not valid as it doesn't have the primary key in it
        """
        test_io = {
            "[]": {
                "_id": {}
            },
            '{"_id": 1}': {
                "_id": {
                    "1": {
                        "_id": 1
                    }
                }
            },
            '[{"_id": 1}]': {
                "_id": {
                    "1": {
                        "_id": 1
                    }
                }
            },
            '[{"_id": 1, "d": 2}]': {
                "_id": {
                    "1": {
                        "_id": 1,
                        "d": 2
                    }
                },
                "d": {
                    2: [1]
                }
            },
        }
        for in_data in test_io:
            tmp_file_name = f"{tmpdir}/invalid_json.json"
            write_to_file(in_data, tmp_file_name)

            entity = Entity("user")

            entity.load_data_build_indices(tmp_file_name)

            assert test_io[in_data] == entity._indices

        assert True
Esempio n. 12
0
    def test_entity_valid_data_no_file(self, tmpdir):
        """Testing with valid data should result in expected output, empty data [] should result in empty index
        {} is not valid as it doesn't have the primary key in it
        """
        test_in_data = [[], {"_id": 1}, [{"_id": 1}], [{"_id": 1, "d": 2}]]
        test_out_data = [
            {
                "_id": {}
            },
            {
                "_id": {
                    "1": {
                        "_id": 1
                    }
                }
            },
            {
                "_id": {
                    "1": {
                        "_id": 1
                    }
                }
            },
            {
                "_id": {
                    "1": {
                        "_id": 1,
                        "d": 2
                    }
                },
                "d": {
                    2: [1]
                }
            },
        ]
        for inp, out in zip(test_in_data, test_out_data):

            entity = Entity("user")

            entity.load_data_build_indices(inp)

            assert out == entity._indices

        assert True
Esempio n. 13
0
    def test_build_index_unhashable(self):
        """Unhashable values in data point's fields should throw TypeErrors
        """
        test_in_data = [
            [{
                "_id": 1,
                "unhash": set()
            }],
            [{
                "_id": 1,
                "tags": {}
            }],
        ]

        for inp in test_in_data:
            entity = Entity("ticket")
            with pytest.raises(TypeError) as error:
                entity.load_data_build_indices(inp)
            assert "Unhashable value" in str(error.value)

        assert True
Esempio n. 14
0
    def test_entity_missing_mandatory_key(self, tmpdir):
        """Missing '_id' in ANY data point should throw a ValueError
        """

        for empty_data in [
                "{}",
                "[{}]",
                json.dumps({"url": "https://test.com"}),
                json.dumps([{
                    "_id": 1
                }, {
                    "url": "https://test.com"
                }]),
        ]:
            tmp_file_name = f"{tmpdir}/missing_id.json"
            write_to_file(empty_data, tmp_file_name)

            entity = Entity("user")

            with pytest.raises(PrimaryKeyNotFoundError) as error:
                entity.load_data_build_indices(tmp_file_name)
            assert "Cannot find _id in the data point:" in str(error.value)

        assert True
Esempio n. 15
0
    def test_entity_struct(self):
        """Test to see if Entity instantiates with 
                a primary key
                alteast an index on primary key
                _build_indices 
                load_data_build_indices
                search
        """
        entity = Entity("user")

        assert entity.primary_key == "_id"
        assert entity._indices == {"_id": {}}
        assert entity._data == []
        assert hasattr(entity, "_build_indices")
        assert hasattr(entity, "load_data_build_indices")
        assert hasattr(entity, "search")
Esempio n. 16
0
    def test_build_index_valid_data(self):
        """Valid data should return valid _indices
        if the data is
            - [] it should result in vanilla index
        """
        test_ticket_in_data = [
            [],
            [{
                "_id": 1,
                "name": "surya"
            }],
            [{
                "_id": 1,
                "name": "surya"
            }, {
                "_id": 2,
                "name": "surya"
            }],
            [{
                "_id": "436bf9b0-1147-4c0a-8439-6f79833bff5b",
                "url":
                "http://initech.zendesk.com/api/v2/tickets/436bf9b0-1147-4c0a-8439-6f79833bff5b.json",
                "external_id": "9210cdc9-4bee-485f-a078-35396cd74063",
            }],
        ]
        test_ticket_out_data = [
            {
                "_id": {}
            },
            {
                "_id": {
                    "1": {
                        "_id": 1,
                        "name": "surya"
                    }
                },
                "name": {
                    "surya": [1]
                }
            },
            {
                "_id": {
                    "1": {
                        "_id": 1,
                        "name": "surya"
                    },
                    "2": {
                        "_id": 2,
                        "name": "surya"
                    },
                },
                "name": {
                    "surya": [1, 2]
                },
            },
            {
                "_id": {
                    "436bf9b0-1147-4c0a-8439-6f79833bff5b": {
                        "_id": "436bf9b0-1147-4c0a-8439-6f79833bff5b",
                        "url":
                        "http://initech.zendesk.com/api/v2/tickets/436bf9b0-1147-4c0a-8439-6f79833bff5b.json",
                        "external_id": "9210cdc9-4bee-485f-a078-35396cd74063",
                    },
                },
                "url": {
                    "http://initech.zendesk.com/api/v2/tickets/436bf9b0-1147-4c0a-8439-6f79833bff5b.json":
                    ["436bf9b0-1147-4c0a-8439-6f79833bff5b"]
                },
                "external_id": {
                    "9210cdc9-4bee-485f-a078-35396cd74063":
                    ["436bf9b0-1147-4c0a-8439-6f79833bff5b"]
                },
            },
        ]

        for inp, out in zip(test_ticket_in_data, test_ticket_out_data):
            entity = Entity("ticket")
            entity.load_data_build_indices(inp)

            assert out == entity._indices

        assert True
Esempio n. 17
0
    def test_build_index_blank_values(self):
        """Testing for corner cases, empty strings, spaces, empty lists as values in data fields
        """

        test_in_data = [
            [{
                "_id": ""
            }],
            [{
                "_id": " "
            }],
            [{
                "_id": 1,
                "tags": []
            }],
            [{
                "_id": "",
                "name": "surya"
            }],
        ]
        test_out_data = [
            {
                "_id": {
                    "": {
                        "_id": ""
                    }
                }
            },
            {
                "_id": {
                    " ": {
                        "_id": " "
                    }
                }
            },
            {
                "_id": {
                    "1": {
                        "_id": 1,
                        "tags": []
                    }
                },
                "tags": {
                    "": [1]
                }
            },
            {
                "_id": {
                    "": {
                        "_id": "",
                        "name": "surya"
                    }
                },
                "name": {
                    "surya": [""]
                }
            },
        ]

        for inp, out in zip(test_in_data, test_out_data):
            entity = Entity("organization")
            entity.load_data_build_indices(inp)
            assert out == entity._indices
        assert True
Esempio n. 18
0
def get_entity_from_formatted_data(entity_name, data):
    entity = Entity(entity_name)
    entity.load_data_build_indices(data)
    return entity