예제 #1
0
 def tearDown(self):
     try:
         shutil.rmtree(self.root_dir)
     except OSError as e:
         if e.errno != 2:  # file doesn't exist
             config = ConfigMother.test_configuration_with_values(file_root_path=self.root_dir)
             file_system_client = FileSystemClient(config)
             file_system_client.delete_directory("run1")
             shutil.rmtree(self.root_dir)
예제 #2
0
 def setUp(self):
     self.root_dir = "/tmp/majic_test"  # same as in delete_dir_test.sh
     try:
         os.mkdir(self.root_dir)
     except OSError as ex:
         if ex.errno != 17:
             raise ex
         else:
             self.tearDown()
             os.mkdir(self.root_dir)
     self.filename = os.path.join(self.root_dir, "test_file")
     f = open(self.filename, "w")
     f.write("testing")
     f.close()
     config = ConfigMother.test_configuration_with_values(file_root_path=self.root_dir)
     self.file_system_client = FileSystemClient(config)
예제 #3
0
 def __init__(
         self,
         config,
         file_system_client=None):
     """
     Constructor
     :param config: configuration for file system comparer
     :param file_system_client: the file system client
     :return:
     """
     self._config = config
     self.data_path = self._config.get(CONFIG_DATA_PATH, CONFIG_DATA_SECTION)
     if file_system_client is not None:
         self._file_system_client = file_system_client
     else:
         self._file_system_client = FileSystemClient(self._config)
     self.new_directories = None
     self.deleted_directories = None
     self.changed_directories = None
     self.existing_non_deleted_directories = None
    def __init__(self, config, apache_client=None, file_system_client=None):
        """
        Constructor
        :param config: configuration accessor
        :param apache_client: client to contact apache
        :param file_system_client: client to contact the file system
        :return: nothing
        """
        self._config = config
        if apache_client is None:
            self._apache_client = ApacheClient(self._config)
        else:
            self._apache_client = apache_client

        if file_system_client is None:
            self._file_system_client = FileSystemClient(self._config)
        else:
            self._file_system_client = file_system_client

        self.extensions_to_copy = self._config.get(CONFIG_EXTENSIONS_TO_COPY, CONFIG_DATA_SECTION).split()
        regexes = self._config.get(CONFIG_CONTENT_TO_IGNORE_REGEX, CONFIG_DATA_SECTION).split()
        self._regexes = [re.compile(expression) for expression in regexes]
class DirectorySynchroniser(object):
    """
    Synchronise Directories and their content between the apache file server and the local filesystem
    It only copies new files, it does not check file contents or delete old file

    Only files with extensions in the configuration file are copied
    """

    def __init__(self, config, apache_client=None, file_system_client=None):
        """
        Constructor
        :param config: configuration accessor
        :param apache_client: client to contact apache
        :param file_system_client: client to contact the file system
        :return: nothing
        """
        self._config = config
        if apache_client is None:
            self._apache_client = ApacheClient(self._config)
        else:
            self._apache_client = apache_client

        if file_system_client is None:
            self._file_system_client = FileSystemClient(self._config)
        else:
            self._file_system_client = file_system_client

        self.extensions_to_copy = self._config.get(CONFIG_EXTENSIONS_TO_COPY, CONFIG_DATA_SECTION).split()
        regexes = self._config.get(CONFIG_CONTENT_TO_IGNORE_REGEX, CONFIG_DATA_SECTION).split()
        self._regexes = [re.compile(expression) for expression in regexes]

    def _copy_directory_content_item(self, directory_content, relative_directory_path):
        """
        Copy an item in a directory
        :param directory_content: the name of content to copy
        :param relative_directory_path: the relative path of the directory
        :return: count of copied items
        """
        relative_file_path = os.path.join(relative_directory_path, directory_content)

        # check that content doesn't match some ignored content
        for regex in self._regexes:
            if regex.search(relative_file_path):
                return 0

        if relative_file_path.endswith('/'):
            return self._copy_directory(relative_file_path[:-1])

        path, ext = os.path.splitext(relative_file_path)
        if ext not in self.extensions_to_copy:
            return 0

        new_file = self._file_system_client.create_file(relative_file_path)
        if new_file is None:
            # file already exists
            return 0

        try:
            self._apache_client.download_file(relative_file_path, new_file)
            self._file_system_client.close_file(new_file)
            return 1
        except Exception as ex:
            self._file_system_client.close_and_delete_file(new_file)
            raise ex

    def _copy_directory(self, new_directory_path):
        """
        Copy a directory and all its sub directories
        :param new_directory_path:
        :return: number of files and directories copied
        """
        copied_count = 0
        try:
            if self._file_system_client.create_dir(new_directory_path):
                copied_count += 1

            directory_contents = self._apache_client.get_contents(new_directory_path)
            for directory_content in directory_contents:
                try:
                    copied_count += self._copy_directory_content_item(directory_content, new_directory_path)
                except FileSystemClientError as ex:
                    # log the error and skip content
                    log.error("Error opening the file {} in directory {}. {}"
                              .format(directory_content, new_directory_path, ex.message))
                except ApacheClientError as ex:
                    # log the error and skip directory
                    log.error("Error downloading file from apache for file {} in directory {}. {}"
                              .format(directory_content, new_directory_path, ex.message))

        except ApacheClientError as ex:
            # log the error and skip directory
            log.error("Error contacting apache for directory contents of {}. {}".format(new_directory_path, ex.message))
        except FileSystemClientError as ex:
            # log the error and skip directory
            log.error("Error creating a directory {}. {}".format(new_directory_path, ex.message))

        return copied_count

    def copy_new(self, new_directories):
        """
        copy new directories and their content from apache
        :param new_directories: the new directory file properties to copy
        :return: number of files and directories copied
        """
        copied_count = 0
        for new_directory in new_directories:
            log.debug("Copying New Directory: {}".format(new_directory.file_path))
            new_directory_path = new_directory.file_path
            current_count = self._copy_directory(new_directory_path)
            if current_count > 0:
                self._file_system_client.set_permissions(new_directory)
            copied_count += current_count

        return copied_count

    def update_permissions(self, changed_directories):
        """
        Update the permissions of changed directories
        :param changed_directories: the directories that have changed
        :return: count of directories changed
        """
        update_count = 0
        for changed_directory in changed_directories:
            log.debug("Updating Permissions on directory: {}".format(changed_directory.file_path))
            try:
                self._file_system_client.set_permissions(changed_directory)
                update_count += 1
            except FileSystemClientError as ex:
                # log the error and skip directory
                log.error("Error when setting permissions on directory {}. {}".format(changed_directory, ex.message))
        return update_count

    def delete(self, deleted_directories):
        """
        Delete directories which are no longer present
        :param deleted_directories: the directories to delete
        :return: count of deleted directories
        """
        delete_count = 0
        for deleted_directory in deleted_directories:
            log.debug("Deleting directory: {}".format(deleted_directory))
            try:
                self._file_system_client.delete_directory(deleted_directory)
                delete_count += 1
            except FileSystemClientError as ex:
                # log the error and skip directory
                log.error("Error when setting permissions on directory {}. {}".format(deleted_directory, ex.message))
        return delete_count

    def synchronise_all(self, directory_types):
        """
        Synchronise the directory dtructure based on directory type
        :param directory_types: a object holding new, changed and deleted directories
        :return: count of new, updated and deleted file and directories
        """
        new_count = self.copy_new(directory_types.new_directories) + \
            self.copy_new(directory_types.existing_non_deleted_directories)
        return (
            new_count,
            self.update_permissions(directory_types.changed_directories),
            self.delete(directory_types.deleted_directories))
예제 #6
0
class TestFileSystemClient(unittest.TestCase):
    def setUp(self):
        self.root_dir = "/tmp/majic_test"  # same as in delete_dir_test.sh
        try:
            os.mkdir(self.root_dir)
        except OSError as ex:
            if ex.errno != 17:
                raise ex
            else:
                self.tearDown()
                os.mkdir(self.root_dir)
        self.filename = os.path.join(self.root_dir, "test_file")
        f = open(self.filename, "w")
        f.write("testing")
        f.close()
        config = ConfigMother.test_configuration_with_values(file_root_path=self.root_dir)
        self.file_system_client = FileSystemClient(config)

    def tearDown(self):
        try:
            shutil.rmtree(self.root_dir)
        except OSError as e:
            if e.errno != 2:  # file doesn't exist
                config = ConfigMother.test_configuration_with_values(file_root_path=self.root_dir)
                file_system_client = FileSystemClient(config)
                file_system_client.delete_directory("run1")
                shutil.rmtree(self.root_dir)

    def test_GIVEN_file_WHEN_from_path_THEN_properties_set(self):
        props = self.file_system_client.get_file_properties(self.filename)

        assert_that(props.file_path, is_(self.filename), "file path")
        assert_that(props.owner, is_(getpass.getuser()), "file path")

    def test_GIVEN_file_is_group_read_WHEN_from_path_THEN_properties_is_published_butnot_public(self):
        os.chmod(self.filename, S_IRUSR | S_IWUSR | S_IRGRP)

        props = self.file_system_client.get_file_properties(self.filename)

        assert_that(props.is_published, is_(True), "published")
        assert_that(props.is_public, is_(False), "public")

    def test_GIVEN_file_is_other_read_WHEN_from_path_THEN_properties_is_not_published_but_is_public(self):
        # this is a funny situation to be in
        os.chmod(self.filename, S_IRUSR | S_IWUSR | S_IROTH)

        props = self.file_system_client.get_file_properties(self.filename)

        assert_that(props.is_published, is_(False), "published")
        assert_that(props.is_public, is_(True), "public")

    def test_GIVEN_file_is_group_read_and_other_read_WHEN_from_path_THEN_properties_is_published_and_is_public(self):
        os.chmod(self.filename, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)

        props = self.file_system_client.get_file_properties(self.filename)

        assert_that(props.is_published, is_(True), "published")
        assert_that(props.is_public, is_(True), "public")

    def test_GIVEN_file_is_not_read_WHEN_from_path_THEN_properties_is_not_published_but_is_not_public(self):
        os.chmod(self.filename, S_IRUSR | S_IWUSR)

        props = self.file_system_client.get_file_properties(self.filename)

        assert_that(props.is_published, is_(False), "published")
        assert_that(props.is_public, is_(False), "public")

    def test_GIVEN_dir_WHEN_create_directory_created(self):
        file_path = "tmp"

        self.file_system_client.create_dir(file_path)

        assert_that(os.path.isdir(os.path.join(self.root_dir, file_path)), "Path is not directory or doesn't exist")

    def test_GIVEN_dir_in_non_existant_dir_WHEN_create_THEN_directory_tree_created(self):
        file_path = "tmp/sub/sub/end"

        self.file_system_client.create_dir(file_path)

        assert_that(os.path.isdir(os.path.join(self.root_dir, file_path)), "Path is not directory or doesn't exist")

    def test_GIVEN_dir_already_exists_WHEN_create_directory_created(self):
        file_path = "tmp"
        self.file_system_client.create_dir(file_path)

        self.file_system_client.create_dir(file_path)

        assert_that(os.path.isdir(os.path.join(self.root_dir, file_path)), "Path still exists")

    def test_GIVEN_any_other_error_WHEN_create_THEN_exception_thrown_as_apache_file_error(self):
        self.file_system_client._filesystem_root_for_app = "/data"

        assert_that(calling(self.file_system_client.create_dir).with_args("1"), raises(FileSystemClientError))

    def test_GIVEN_dir_referenced_dir_outside_of_root_WHEN_create_directory_THEN_exception(self):
        file_path = "../john"

        assert_that(
            calling(self.file_system_client.create_dir).with_args(file_path),
            raises(Exception, "Code is trying to access a directory outside root"),
        )

    def test_GIVEN_file_WHEN_create_THEN_file_created(self):
        file_path = "tmp"

        fileobj = self.file_system_client.create_file(file_path)
        fileobj.write("hi")

        assert_that(os.path.isfile(os.path.join(self.root_dir, file_path)), "Path is not a file or doesn't exist")

    def test_GIVEN_file_exists_WHEN_create_THEN_non_returned(self):
        file_path = self.filename

        fileobj = self.file_system_client.create_file(file_path)

        assert_that(fileobj, none(), "File object is none")

    def test_GIVEN_file_in_non_writable_directory_WHEN_create_THEN_file_created_after_permissions_changed(self):

        self.create_file_and_sub_dir()
        file_path = os.path.join(self.model_run_path, "tmp")
        self.file_system_client.set_permissions(FileProperties(self.model_run_path, "nobody", False, False))

        fileobj = self.file_system_client.create_file(file_path)
        fileobj.write("hi")

        assert_that(os.path.isfile(os.path.join(self.root_dir, file_path)), "Path is not a file or doesn't exist")

    def test_GIVEN_dir_in_non_writable_directory_WHEN_create_THEN_file_created_after_permissions_changed(self):

        self.create_file_and_sub_dir()
        file_path = os.path.join(self.model_run_path, "tmp")
        self.file_system_client.set_permissions(FileProperties(self.model_run_path, "nobody", False, False))

        self.file_system_client.create_dir(file_path)

        assert_that(os.path.isdir(os.path.join(self.root_dir, file_path)), "Path is not a file or doesn't exist")

    def test_GIVEN_any_error_thrown_WHEN_create_THEN_FileSystemClientError(self):
        file_path = "doesntexists/file.txt"

        assert_that(calling(self.file_system_client.create_file).with_args(file_path), raises(FileSystemClientError))

    def test_GIVEN_file_WHEN_close_THEN_file_closed(self):
        file_path = "tmp"
        fileobj = self.file_system_client.create_file(file_path)

        self.file_system_client.close_file(fileobj)

        assert_that(fileobj.closed, "File is not closed")

    def test_GIVEN_exception_WHEN_close_THEN_FileSystemClientError(self):

        assert_that(calling(self.file_system_client.close_file).with_args(None), raises(FileSystemClientError))

    def test_GIVEN_file_WHEN_close_and_delete_THEN_file_deleted(self):
        file_path = "tmp"
        fileobj = self.file_system_client.create_file(file_path)
        fileobj.write("hi")

        self.file_system_client.close_and_delete_file(fileobj)

        assert_that(os.path.isfile(os.path.join(self.root_dir, file_path)), is_(False), "Path exists")

    def test_GIVEN_dir_with_file_WHEN_delete_directory_THEN_directory_deleted(self):
        file_path = "run1"
        subdir = "blah"
        self.file_system_client.create_dir(file_path)
        sub_dir_path = os.path.join(file_path, subdir)
        self.file_system_client.create_dir(sub_dir_path)
        f = self.file_system_client.create_file(os.path.join(sub_dir_path, "file"))
        f.write("hi there")
        self.file_system_client.close_file(f)

        self.file_system_client.delete_directory(file_path)

        assert_that(os.path.exists(os.path.join(self.root_dir, file_path)), is_(False), "Path still exists")

    def test_GIVEN_error_WHEN_delete_directory_THEN_exception(self):
        file_path = ""

        assert_that(
            calling(self.file_system_client.delete_directory).with_args(file_path), raises(FileSystemClientError)
        )

    def create_file_and_sub_dir(self):
        self.model_run_path = "run1"
        subdir = "blah"
        self.file_system_client.create_dir(self.model_run_path)
        sub_dir_path = os.path.join(self.model_run_path, subdir)
        self.file_system_client.create_dir(sub_dir_path)
        file_path = os.path.join(sub_dir_path, "file")
        f = self.file_system_client.create_file(file_path)
        f.write("hi there")
        self.file_system_client.close_file(f)

        return file_path

    def test_GIVEN_dir_WHEN_set_permissions_FF_THEN_premissions_set(self):
        file_path = self.create_file_and_sub_dir()
        file_property = FileProperties(file_path, getpass.getuser(), False, False)

        self.file_system_client.set_permissions(file_property)

        assert_that(self.file_system_client.get_file_properties(file_path), is_(file_property), "File properties")

    def test_GIVEN_dir_WHEN_set_permissions_FT_THEN_premissions_set(self):
        file_path = self.create_file_and_sub_dir()
        file_property = FileProperties(file_path, getpass.getuser(), False, True)

        self.file_system_client.set_permissions(file_property)

        assert_that(self.file_system_client.get_file_properties(file_path), is_(file_property), "File properties")

    def test_GIVEN_dir_WHEN_set_permissions_TF_THEN_premissions_set(self):
        file_path = self.create_file_and_sub_dir()
        file_property = FileProperties(file_path, getpass.getuser(), True, False)

        self.file_system_client.set_permissions(file_property)

        assert_that(self.file_system_client.get_file_properties(file_path), is_(file_property), "File properties")

    def test_GIVEN_dir_WHEN_set_permissions_TT_THEN_premissions_set(self):
        file_path = self.create_file_and_sub_dir()
        file_property = FileProperties(file_path, "root", True, True)

        self.file_system_client.set_permissions(file_property)

        assert_that(self.file_system_client.get_file_properties(file_path), is_(file_property), "File properties")

    def test_GIVEN_dir_WHEN_list_THEN_directories_returned(self):
        file_path = self.create_file_and_sub_dir()

        dirs = self.file_system_client.list_dirs("")

        assert_that(dirs, contains(self.model_run_path), "Directories")
예제 #7
0
class FileSystemComparer(object):
    """
    Compare the list of directories from the data path on the file system with a list of model and detects changes

    On perform_analysis the following are populated:
        new_directories - with FileProperties for models that needs to be copied
        deleted_directories  - with directory names which need deleting
        changed_directories - with FileProperties of directories which need their permissions updating
        existing_non_deleted_directories - directories which are neither new of deleted
    """

    def __init__(
            self,
            config,
            file_system_client=None):
        """
        Constructor
        :param config: configuration for file system comparer
        :param file_system_client: the file system client
        :return:
        """
        self._config = config
        self.data_path = self._config.get(CONFIG_DATA_PATH, CONFIG_DATA_SECTION)
        if file_system_client is not None:
            self._file_system_client = file_system_client
        else:
            self._file_system_client = FileSystemClient(self._config)
        self.new_directories = None
        self.deleted_directories = None
        self.changed_directories = None
        self.existing_non_deleted_directories = None

    def perform_analysis(self, model_properties):
        """
        Perform the analysis of the file system compared to the model properties
        :param model_properties: model properties expected to appear
        :return: nothing but set internal properties for new, changed and deleted files
        """
        self.new_directories = []
        self.deleted_directories = []
        self.existing_non_deleted_directories = []
        self.changed_directories = []

        existing_run_ids = set()
        existing_directories = self._file_system_client.list_dirs(self.data_path)
        for existing_directory in existing_directories:
            match = re.match('run(\d+)', existing_directory)
            if match:
                existing_run_ids.add(int(match.group(1)))

        model_run_ids = set()
        file_properties_for_model_runs = {}
        for model_property in model_properties:
            model_run_id = model_property[JSON_MODEL_RUN_ID]
            model_run_ids.add(model_run_id)
            file_properties_for_model_runs[model_run_id] = FileProperties(
                self._create_model_dir(model_run_id),
                model_property[JSON_USER_NAME],
                model_property[JSON_IS_PUBLISHED],
                model_property.get(JSON_IS_PUBLIC, False))

        for model_run_id in model_run_ids.difference(existing_run_ids):
            self.new_directories.append(file_properties_for_model_runs[model_run_id])

        for model_run_id in existing_run_ids.difference(model_run_ids):
            self.deleted_directories.append(self._create_model_dir(model_run_id))

        existing_run_ids.intersection_update(model_run_ids)
        for model_run_id in existing_run_ids:
            file_properties_to_set = file_properties_for_model_runs[model_run_id]
            self.existing_non_deleted_directories.append(file_properties_to_set)
            current_file_properties = self._file_system_client.get_file_properties(file_properties_to_set.file_path)
            if current_file_properties != file_properties_for_model_runs[model_run_id]:
                self.changed_directories.append(file_properties_to_set)

    def _create_model_dir(self, model_run_id):
        """
        Create the model run dir path from the model run id
        :param model_run_id: model run id
        :return: the full path to the model run directory
        """
        #  ensure that the model id is a number so it can not possible contain path elements
        model_run_id_dir = str(model_run_id)
        assert(model_run_id_dir.isdigit())

        dir_name = "{}{}".format(MODEL_RUN_DIR_PREFIX, model_run_id)
        return os.path.join(self.data_path, dir_name)

    def add_extra_directories_to_sync(self):
        """
        Add extra directories to synchronise as public data
        :return: nothing
        """
        owner = getuser()
        for dir_name in self._config.get(CONFIG_EXTRA_DIRS_TO_SYNC, CONFIG_DATA_SECTION).split():
            dir_properties = FileProperties(dir_name, owner, True, True)
            self.existing_non_deleted_directories.append(dir_properties)