Example #1
0
    def test_ignore(self, mock_list_dir, mock_makedirs, mock_islink, mock_isdir, mock_copy):
        """
        Test that passing an ignore callable causes files to be ignored

        The mock 'src' directory has following structure:
            - src
               - dir1
                  - file1
               - dir2
                  - file2
               - file3
        """
        mock_list_dir.side_effect = [['dir1', 'dir2', 'file3'], ['file2']]
        mock_isdir.side_effect = [True, True]
        mock_ignore = Mock()

        # Ignore dir1 and file3
        mock_ignore.side_effect = [['dir1', 'file3'], ['file2'], []]
        util.copytree('src', 'dst', ignore=mock_ignore)

        # Assert only 'src' and 'src/dir2' directories are visited
        mock_list_dir.assert_has_calls([call('src'), call('src/dir2')])

        # Assert only the not ignored files are checked with os.path.isdir()
        mock_isdir.assert_has_calls([call('src/dir2')])

        # Assert files are copied using copy()
        self.assertFalse(mock_copy.called)

        # Assert that only dst and dir2 directories are created using makedirs()
        mock_makedirs.assert_has_calls([call('dst'), call('dst/dir2')])
Example #2
0
 def process_main(self):
     """
     Copy one directory to another.
     """
     if self.delete_before_copy:
         shutil.rmtree(self.target_dir, ignore_errors=True)
     copytree(self.source_dir, self.target_dir, symlinks=self.preserve_symlinks)
Example #3
0
    def test_symlinks(self, mock_list_dir, mock_makedirs, mock_islink, mock_isdir, mock_copy,
                      mock_symlink, mock_readlink):
        """
        Test that symlinks are created as symlinks

        The mock 'src' directory has following structure:
            - src
               - dir1
                  - file1 <symlink to file3>
               - dir2
                  - file2
               - file3
        """
        mock_list_dir.side_effect = [['dir1', 'dir2', 'file3'], ['file1'], ['file2']]
        mock_isdir.side_effect = [True, True, False, False]
        mock_islink.side_effect = [False, True, False, False, False]
        mock_readlink.side_effect = ['src/file3']
        util.copytree('src', 'dst', symlinks=True)

        # Assert that 'src/dir1/file1' is treated as symlink
        mock_readlink.assert_has_calls([call('src/dir1/file1')])
        mock_symlink.assert_has_calls([call('src/file3', 'dst/dir1/file1')])

        # Assert that all directories are visited
        mock_list_dir.assert_has_calls([call('src'), call('src/dir1'), call('src/dir2')])

        # Assert everything except for symlink is checked if it is a directory
        mock_isdir.assert_has_calls([call('src/dir1'), call('src/dir2'), call('src/dir2/file2'),
                                     call('src/file3')])
Example #4
0
    def test_error_limit(self, mock_list_dir, mock_isdir, mock_copy, mock_makedirs):
        """
        Make sure it doesn't collect an unbounded number of errors. 100 is the defined limit
        after which it gives up.

        https://pulp.plan.io/issues/1808
        """
        mock_list_dir.return_value = (str(x) for x in range(110))
        mock_isdir.return_value = False
        mock_copy.side_effect = OSError('oops')

        with self.assertRaises(shutil.Error) as assertion:
            util.copytree('src', 'dst')

        errors = assertion.exception.args[0]
        # there should be 100 errors exactly, because that is the limit
        self.assertEqual(len(errors), 100)
        # ensure each error has the correct data
        for i, error in enumerate(errors):
            src, dst, why = error
            self.assertEqual('src/%d' % i, src)
            self.assertEqual('dst/%d' % i, dst)
            self.assertEqual('oops', why)
        # make sure there are 10 more in the iterator, thus those copy operations were definitely
        # not attempted.
        self.assertEqual(len(list(mock_list_dir.return_value)), 10)
Example #5
0
    def test_recursion(self, mock_list_dir, mock_makedirs, mock_islink, mock_isdir, mock_copy,
                       mock_stat, mock_utime):
        """
        Check that copytree is called recursively on all directories within a tree

        The mock 'src' directory has following structure:
            - src
               - dir1
                  - file1
               - dir2
                  - file2
               - file3
        """
        mock_list_dir.side_effect = [['dir1', 'dir2', 'file3'], ['file1'], ['file2']]
        mock_isdir.side_effect = [True, False, True, False, False]
        util.copytree('src', 'dst')
        mock_list_dir.assert_has_calls([call('src'), call('src/dir1'), call('src/dir2')])
        mock_isdir.assert_has_calls([call('src/dir1'), call('src/dir1/file1'), call('src/dir2'),
                                     call('src/dir2/file2'), call('src/file3')])
        # Assert files are copied using copy()
        mock_copy.assert_has_calls([call('src/dir1/file1', 'dst/dir1/file1'),
                                    call('src/dir2/file2', 'dst/dir2/file2'),
                                    call('src/file3', 'dst/file3')])
        # Assert that directories are created using makedirs()
        mock_makedirs.assert_has_calls([call('dst'), call('dst/dir1'), call('dst/dir2')])
Example #6
0
    def process_main(self, item=None):
        """
        Publish a directory from the repo to a target directory.
        """

        # Use the timestamp as the name of the current master repository
        # directory. This allows us to identify when these were created as well
        # as having more than one side-by-side during the publishing process.
        timestamp_master_dir = os.path.join(self.master_publish_dir,
                                            self.parent.timestamp)

        # Given that it is timestamped for this publish/repo we could skip the copytree
        # for items where http & https are published to a separate directory

        _logger.debug('Copying tree from %s to %s' % (self.source_dir, timestamp_master_dir))
        copytree(self.source_dir, timestamp_master_dir, symlinks=True)

        for source_relative_location, publish_location in self.publish_locations:
            if source_relative_location.startswith('/'):
                source_relative_location = source_relative_location[1::]

            timestamp_master_location = os.path.join(timestamp_master_dir, source_relative_location)
            timestamp_master_location = timestamp_master_location.rstrip('/')

            # Without the trailing '/'
            publish_location = publish_location.rstrip('/')

            # Create the parent directory of the published repository tree, if needed
            publish_dir_parent = os.path.dirname(publish_location)
            if not os.path.exists(publish_dir_parent):
                os.makedirs(publish_dir_parent, 0750)

            if not self.only_publish_directory_contents:
                # Create a temporary symlink in the parent of the published directory tree
                tmp_link_name = os.path.join(publish_dir_parent, self.parent.timestamp)
                os.symlink(timestamp_master_location, tmp_link_name)

                # Rename the symlink to the official published location name.
                # This has two desirable effects:
                # 1. it will overwrite an existing link, if it's there
                # 2. the operation is atomic, instantly changing the published directory
                # NOTE: it's not easy (possible?) to directly edit the target of a symlink
                os.rename(tmp_link_name, publish_location)
            else:
                if not os.path.exists(publish_location):
                    os.makedirs(publish_location, 0750)
                for file_name in os.listdir(timestamp_master_location):
                    tmp_link_name = os.path.join(publish_location, self.parent.timestamp)
                    master_source_file = os.path.join(timestamp_master_location, file_name)
                    os.symlink(master_source_file, tmp_link_name)
                    final_name = os.path.join(publish_location, file_name)
                    os.rename(tmp_link_name, final_name)

        # Clear out any previously published masters
        misc.clear_directory(self.master_publish_dir, skip_list=[self.parent.timestamp])
Example #7
0
    def test_copy2_copystat_not_used(self, mock_list_dir, mock_makedirs, mock_islink, mock_isdir,
                                     mock_copy, mock_copy2, mock_copystat):
        """
        Test that only copy is used and copy2 and copystat are never called

        The mock 'src' directory has following structure:
            - src
               - file
        """
        mock_list_dir.side_effect = [['file'], []]
        mock_isdir.side_effect = [False]
        util.copytree('src', 'dst')
        mock_copy.assert_called_with('src/file', 'dst/file')
        self.assertFalse(mock_copy2.called)
        self.assertFalse(mock_copystat.called)
Example #8
0
    def test_copy_mtime(self, mock_list_dir, mock_makedirs, mock_islink, mock_isdir,
                        mock_copy, mock_copy2, mock_copystat, mock_stat, mock_utime):
        """
        Test that mtime is copied when copytree is ran

        The mock 'src' directory has following structure:
            - src
               - file
        """
        mock_stat.return_value.st_atime = 1
        mock_stat.return_value.st_mtime = 2
        mock_list_dir.side_effect = [['file'], []]
        mock_isdir.side_effect = [False]
        util.copytree('src', 'dst')
        mock_utime.assert_called_with('dst/file', (1, 2))
Example #9
0
    def publish_repo(self, repo, publish_conduit, config):
        """
        Publish the repository.

        :param repo:            metadata describing the repo
        :type  repo:            pulp.plugins.model.Repository
        :param publish_conduit: The conduit for publishing a repo
        :type  publish_conduit: pulp.plugins.conduits.repo_publish.RepoPublishConduit
        :param config:          plugin configuration
        :type  config:          pulp.plugins.config.PluginConfiguration
        :param config_conduit: Configuration Conduit;
        :type config_conduit: pulp.plugins.conduits.repo_validate.RepoConfigConduit
        :return:                report describing the publish operation
        :rtype:                 pulp.plugins.model.PublishReport
        """
        progress_report = FilePublishProgressReport(publish_conduit)
        _logger.info(_('Beginning publish for repository <%(repo)s>') % {'repo': repo.id})

        try:
            progress_report.state = progress_report.STATE_IN_PROGRESS
            units = publish_conduit.get_units()

            # Set up an empty build_dir
            build_dir = os.path.join(repo.working_dir, BUILD_DIRNAME)
            # Let's erase the path at build_dir so we can be sure it's a clean directory
            self._rmtree_if_exists(build_dir)
            os.makedirs(build_dir)

            self.initialize_metadata(build_dir)

            try:
                # process each unit
                for unit in units:
                    links_to_create = self.get_paths_for_unit(unit)
                    self._symlink_unit(build_dir, unit, links_to_create)
                    self.publish_metadata_for_unit(unit)
            finally:
                # Finalize the processing
                self.finalize_metadata()

            # Let's unpublish, and then republish
            self.unpublish_repo(repo, config)

            hosting_locations = self.get_hosting_locations(repo, config)
            for location in hosting_locations:
                copytree(build_dir, location, symlinks=True)

            self.post_repo_publish(repo, config)

            # Clean up our build_dir
            self._rmtree_if_exists(build_dir)

            # Report that we are done
            progress_report.state = progress_report.STATE_COMPLETE
            return progress_report.build_final_report()
        except Exception, e:
            _logger.exception(e)
            # Something failed. Let's put an error message on the report
            progress_report.error_message = str(e)
            progress_report.traceback = traceback.format_exc()
            progress_report.state = progress_report.STATE_FAILED
            report = progress_report.build_final_report()
            return report
Example #10
0
    def publish_repo(self, repo, publish_conduit, config):
        """
        Publish the repository.

        :param repo:            metadata describing the repo
        :type  repo:            pulp.plugins.model.Repository
        :param publish_conduit: The conduit for publishing a repo
        :type  publish_conduit: pulp.plugins.conduits.repo_publish.RepoPublishConduit
        :param config:          plugin configuration
        :type  config:          pulp.plugins.config.PluginConfiguration
        :param config_conduit: Configuration Conduit;
        :type config_conduit: pulp.plugins.conduits.repo_validate.RepoConfigConduit
        :return:                report describing the publish operation
        :rtype:                 pulp.plugins.model.PublishReport
        """
        _logger.info(
            _('Beginning publish for repository <%(repo)s>') %
            {'repo': repo.id})
        if not config.get("force_full",
                          False) and publish_conduit.last_publish:
            try:
                return self.publish_repo_fast_forward(repo, publish_conduit,
                                                      config)
            except FastForwardUnavailable:
                _logger.debug(
                    "Fast-forward publish bailed out, continuing normally")

        progress_report = FilePublishProgressReport(publish_conduit)

        try:
            progress_report.state = progress_report.STATE_IN_PROGRESS
            units = publish_conduit.get_units()

            # Set up an empty build_dir
            working_dir = common_utils.get_working_directory()
            build_dir = os.path.join(working_dir, BUILD_DIRNAME)
            os.makedirs(build_dir)

            self.initialize_metadata(build_dir)

            try:
                # process each unit
                for unit in units:
                    links_to_create = self.get_paths_for_unit(unit)
                    self._symlink_unit(build_dir, unit, links_to_create)
                    self.publish_metadata_for_unit(unit)
            finally:
                # Finalize the processing
                self.finalize_metadata()

            # Let's unpublish, and then republish
            self.unpublish_repo(repo, config)

            hosting_locations = self.get_hosting_locations(repo, config)
            for location in hosting_locations:
                copytree(build_dir, location, symlinks=True)

            self.post_repo_publish(repo, config)

            # Clean up our build_dir
            self._rmtree_if_exists(build_dir)

            # Report that we are done
            progress_report.state = progress_report.STATE_COMPLETE
            return progress_report.build_final_report()
        except Exception, e:
            _logger.exception(e)
            # Something failed. Let's put an error message on the report
            progress_report.error_message = str(e)
            progress_report.traceback = traceback.format_exc()
            progress_report.state = progress_report.STATE_FAILED
            report = progress_report.build_final_report()
            return report
Example #11
0
    def publish_repo_fast_forward(self, repo, publish_conduit, config):
        """
        Publish the repository.

        :param repo:            metadata describing the repo
        :type  repo:            pulp.plugins.model.Repository
        :param publish_conduit: The conduit for publishing a repo
        :type  publish_conduit: pulp.plugins.conduits.repo_publish.RepoPublishConduit
        :param config:          plugin configuration
        :type  config:          pulp.plugins.config.PluginConfiguration
        :return:                report describing the publish operation
        :rtype:                 pulp.plugins.model.PublishReport
        """
        progress_report = FilePublishProgressReport(publish_conduit)

        try:
            progress_report.state = progress_report.STATE_IN_PROGRESS
            units = publish_conduit.get_units()

            # Set up an empty build_dir
            working_dir = common_utils.get_working_directory()
            build_dir = os.path.join(working_dir, BUILD_DIRNAME)

            self._rmtree_if_exists(build_dir)
            os.makedirs(build_dir)

            self.initialize_metadata(build_dir)
            unit_checksum_set = set()

            try:
                # process each unit
                for unit in units:
                    unit_checksum_set.add(unit.unit_key['checksum'])
                    self.publish_metadata_for_unit(unit)
            finally:
                # Finalize the processing
                self.finalize_metadata()

            # Just generate increased files and copy them to publishing directories
            hosting_locations = self.get_hosting_locations(repo, config)
            for location in hosting_locations:
                unit_checksum_old_set = set()
                unit_over_path_map = {}
                metadata_filename = os.path.join(location, MANIFEST_FILENAME)
                if os.path.exists(metadata_filename):
                    with open(metadata_filename, 'r') as metadata_file:
                        for line in metadata_file:
                            fields = line.split(',')
                            checksum = fields[1]
                            unit_checksum_old_set.add(checksum)
                            if checksum not in unit_checksum_set:
                                unit_over_path_map[checksum] = fields[0]
                _logger.debug(
                    "%d items were in MANIFEST %s, which exists? %s." %
                    (len(unit_checksum_old_set), metadata_filename,
                     os.path.exists(metadata_filename)))

                # Copy incremental files into publishing directories
                checksum_absent_set = unit_checksum_set - unit_checksum_old_set
                _logger.debug("Increasing %d units" % len(checksum_absent_set))

                # If added too many units, then publish repo with force_full
                max_increase_units = min(50000,
                                         len(units) / len(hosting_locations))
                if len(checksum_absent_set) > max_increase_units:
                    self._rmtree_if_exists(build_dir)
                    raise FastForwardUnavailable

                criteria = UnitAssociationCriteria(
                    unit_filters={
                        'checksum': {
                            "$in": list(checksum_absent_set)
                        }
                    },
                    unit_fields={'name', 'checksum', '_storage_path', 'size'})
                unit_absent_set = publish_conduit.get_units(criteria=criteria)
                for unit in unit_absent_set:
                    links_to_create = self.get_paths_for_unit(unit)
                    self._symlink_unit(build_dir, unit, links_to_create)

                # Remove modified and deleted files from publishing directories
                for checksum, unit_path in unit_over_path_map.items():
                    unit_path = os.path.join(location, unit_path)
                    if os.path.exists(unit_path):
                        os.remove(unit_path)
                        dir_name = os.path.dirname(unit_path)
                        if not os.listdir(dir_name):
                            os.removedirs(dir_name)
                    elif os.path.islink(unit_path):
                        os.unlink(unit_path)

                if len(unit_absent_set) > 0 or len(unit_over_path_map) > 0:
                    if os.path.exists(metadata_filename):
                        os.remove(metadata_filename)
                    copytree(build_dir, location, symlinks=True)

            self.post_repo_publish(repo, config)

            # Clean up our build_dir
            self._rmtree_if_exists(build_dir)

            # Report that we are done
            progress_report.state = progress_report.STATE_COMPLETE
            return progress_report.build_final_report()
        except Exception, e:
            _logger.exception(e)
            # Something failed. Let's put an error message on the report
            progress_report.error_message = str(e)
            progress_report.traceback = traceback.format_exc()
            progress_report.state = progress_report.STATE_FAILED
            report = progress_report.build_final_report()
            return report