Example #1
def get_setup_entities(entity_names, data_files):

    if isinstance(data_files, str):
        if os.path.isdir(data_files):
            files = _auto_find_file_names(entity_names, data_files)
            data_dir = data_files
            data_files = [
                os.path.join(data_dir, file_name) for file_name in files
            raise TypeError(
                "Invalid data_files given. data_files should be a directory path to files"
                " or list() of file paths themselves")
    elif not isinstance(data_files, list):
        raise TypeError(
            "Invalid data_files given. data_files should be a directory path to files"
            " or list() of file paths themselves")

    if len(entity_names) != len(data_files):
        raise ValueError(
            "Entity names and data files should be of same length")

    entities = {}
    for name, data_file in zip(entity_names, data_files):
        ent = Entity(name)
        entities[name] = ent

    return entities
 def test_duplicate_primary_key(self):
     """Duplicate Primary Key throw an error
     test_in_data = [{"_id": 1}, {"_id": 1}]
     for dup in test_in_data:
         entity = Entity("user")
         with pytest.raises(DuplicatePrimaryKeyError) as error:
             assert "Duplicate primary key value: " in str(error.value)
     assert True
    def test_entity_invalid_file(self):
        """Test for a FileNotFoundError when an empty string or invalid path to file is given
        entity = Entity("user")
        invalid_files = ["a", "nofile.txt", "{}", "set", "True", "None"]

        for invalid_file_name in invalid_files:
            with pytest.raises(FileNotFoundError) as error:
            assert "[Errno 2] No such file or directory:" in str(error.value)
    def test_build_index_missing_primary_key(self):
        """Missing primary key should throw an error
        no_pkey_data = [[{}], [{"url": "https://test.com"}]]

        for no_pkey in no_pkey_data:
            entity = Entity("ticket")
            with pytest.raises(PrimaryKeyNotFoundError):
        assert True
    def test_build_pkey_index_unhashable(self):
        """Unhashable values in data point's primary key index should not throw TypeErrors as they are being stringified
        test_in_data = [
                "_id": {
                    1: 1
                "_id": {1}
                "_id": [1]

        test_out_data = [
                "_id": {
                    "{1: 1}": {
                        "_id": {
                            1: 1
                "_id": {
                    "{1}": {
                        "_id": {1}
                "_id": {
                    "[1]": {
                        "_id": [1]

        for inp, out in zip(test_in_data, test_out_data):
            entity = Entity("ticket")
            assert entity._indices == out

        assert True
    def test_build_load_invalid_data_type(self):
        """Valid data = [], [{"primary_key": }], 'path/to/file'
        Invalid data should throw a value error
        invalid_input_data = [1, {1}, (), True, None, Entity("user")]

        for invalid_data_point in invalid_input_data:
            entity = Entity("ticket")
            with pytest.raises(TypeError) as error:
            assert (
                "Data to load should be one of file path as str(), data point as dict() or data as list of data point()"
                == str(error.value))
        assert True
def get_entity_with_data_indices(entity_name):
    """Instantiates and returns an Entity object of entity_name after loading data (from
    inferred test data file) and building _indices
        entity_name (str): One of user, organization, ticket
        Entity(): entity object of name entity_name, with test data loaded and incdices built
    data_file_name = f"{os.path.dirname(os.path.abspath(__file__))}/test_data/test_data_import_{entity_name}s.json"
    entity = Entity(entity_name)
    return entity
    def test_entity_invalid_json_structure(self, tmpdir):
        """Invalid json in any of the entity files should throw a Json Decode Error
        for invalid_json in [
                "{", "[}]", '{"_id":1 "2":2}', "", " ", "[", "nothing"
            tmp_file_name = f"{tmpdir}/invalid_json.json"
            write_to_file(invalid_json, tmp_file_name)

            entity = Entity("user")

            with pytest.raises(ValueError):

            assert True
    def test_custom_primary_key(self, tmpdir):
        """Custom primary key should use the given custom primary key

        tmp_file_name = f"{tmpdir}/custom_prim_key.json"
        test_data = '[{"cid": 1}]'
        test_primary_key = "cid"

        expected_index = {"cid": {"1": {"cid": 1}}}

        write_to_file(test_data, tmp_file_name)
        entity = Entity("user", "cid")

        assert test_primary_key == entity.primary_key
        assert expected_index == entity._indices
    def test_build_index_tags(self):
        """Test that when the data point has values that are a list we flatten them 
        test_in_data = [
                "_id": 1,
                "tags": ["tag1", "tag2"]
                "_id": 1,
                "tags": []
        test_out_data = [
                "_id": {
                    "1": {
                        "_id": 1,
                        "tags": ["tag1", "tag2"]
                "tags": {
                    "tag1": [1],
                    "tag2": [1]
                "_id": {
                    "1": {
                        "_id": 1,
                        "tags": []
                "tags": {
                    "": [1]

        for inp, out in zip(test_in_data, test_out_data):
            entity = Entity("ticket")
            assert out == entity._indices

        assert True
    def test_entity_valid_data_in_file(self, tmpdir):
        """Testing with valid data should result in expected output, empty data [] should result in empty index
        {} is not valid as it doesn't have the primary key in it
        test_io = {
            "[]": {
                "_id": {}
            '{"_id": 1}': {
                "_id": {
                    "1": {
                        "_id": 1
            '[{"_id": 1}]': {
                "_id": {
                    "1": {
                        "_id": 1
            '[{"_id": 1, "d": 2}]': {
                "_id": {
                    "1": {
                        "_id": 1,
                        "d": 2
                "d": {
                    2: [1]
        for in_data in test_io:
            tmp_file_name = f"{tmpdir}/invalid_json.json"
            write_to_file(in_data, tmp_file_name)

            entity = Entity("user")


            assert test_io[in_data] == entity._indices

        assert True
    def test_entity_valid_data_no_file(self, tmpdir):
        """Testing with valid data should result in expected output, empty data [] should result in empty index
        {} is not valid as it doesn't have the primary key in it
        test_in_data = [[], {"_id": 1}, [{"_id": 1}], [{"_id": 1, "d": 2}]]
        test_out_data = [
                "_id": {}
                "_id": {
                    "1": {
                        "_id": 1
                "_id": {
                    "1": {
                        "_id": 1
                "_id": {
                    "1": {
                        "_id": 1,
                        "d": 2
                "d": {
                    2: [1]
        for inp, out in zip(test_in_data, test_out_data):

            entity = Entity("user")


            assert out == entity._indices

        assert True
    def test_build_index_unhashable(self):
        """Unhashable values in data point's fields should throw TypeErrors
        test_in_data = [
                "_id": 1,
                "unhash": set()
                "_id": 1,
                "tags": {}

        for inp in test_in_data:
            entity = Entity("ticket")
            with pytest.raises(TypeError) as error:
            assert "Unhashable value" in str(error.value)

        assert True
    def test_entity_missing_mandatory_key(self, tmpdir):
        """Missing '_id' in ANY data point should throw a ValueError

        for empty_data in [
                json.dumps({"url": "https://test.com"}),
                    "_id": 1
                }, {
                    "url": "https://test.com"
            tmp_file_name = f"{tmpdir}/missing_id.json"
            write_to_file(empty_data, tmp_file_name)

            entity = Entity("user")

            with pytest.raises(PrimaryKeyNotFoundError) as error:
            assert "Cannot find _id in the data point:" in str(error.value)

        assert True
    def test_entity_struct(self):
        """Test to see if Entity instantiates with 
                a primary key
                alteast an index on primary key
        entity = Entity("user")

        assert entity.primary_key == "_id"
        assert entity._indices == {"_id": {}}
        assert entity._data == []
        assert hasattr(entity, "_build_indices")
        assert hasattr(entity, "load_data_build_indices")
        assert hasattr(entity, "search")
    def test_build_index_valid_data(self):
        """Valid data should return valid _indices
        if the data is
            - [] it should result in vanilla index
        test_ticket_in_data = [
                "_id": 1,
                "name": "surya"
                "_id": 1,
                "name": "surya"
            }, {
                "_id": 2,
                "name": "surya"
                "_id": "436bf9b0-1147-4c0a-8439-6f79833bff5b",
                "external_id": "9210cdc9-4bee-485f-a078-35396cd74063",
        test_ticket_out_data = [
                "_id": {}
                "_id": {
                    "1": {
                        "_id": 1,
                        "name": "surya"
                "name": {
                    "surya": [1]
                "_id": {
                    "1": {
                        "_id": 1,
                        "name": "surya"
                    "2": {
                        "_id": 2,
                        "name": "surya"
                "name": {
                    "surya": [1, 2]
                "_id": {
                    "436bf9b0-1147-4c0a-8439-6f79833bff5b": {
                        "_id": "436bf9b0-1147-4c0a-8439-6f79833bff5b",
                        "external_id": "9210cdc9-4bee-485f-a078-35396cd74063",
                "url": {
                "external_id": {

        for inp, out in zip(test_ticket_in_data, test_ticket_out_data):
            entity = Entity("ticket")

            assert out == entity._indices

        assert True
    def test_build_index_blank_values(self):
        """Testing for corner cases, empty strings, spaces, empty lists as values in data fields

        test_in_data = [
                "_id": ""
                "_id": " "
                "_id": 1,
                "tags": []
                "_id": "",
                "name": "surya"
        test_out_data = [
                "_id": {
                    "": {
                        "_id": ""
                "_id": {
                    " ": {
                        "_id": " "
                "_id": {
                    "1": {
                        "_id": 1,
                        "tags": []
                "tags": {
                    "": [1]
                "_id": {
                    "": {
                        "_id": "",
                        "name": "surya"
                "name": {
                    "surya": [""]

        for inp, out in zip(test_in_data, test_out_data):
            entity = Entity("organization")
            assert out == entity._indices
        assert True
def get_entity_from_formatted_data(entity_name, data):
    entity = Entity(entity_name)
    return entity