def test_ignore(self, mock_list_dir, mock_makedirs, mock_islink, mock_isdir, mock_copy): """ Test that passing an ignore callable causes files to be ignored The mock 'src' directory has following structure: - src - dir1 - file1 - dir2 - file2 - file3 """ mock_list_dir.side_effect = [['dir1', 'dir2', 'file3'], ['file2']] mock_isdir.side_effect = [True, True] mock_ignore = Mock() # Ignore dir1 and file3 mock_ignore.side_effect = [['dir1', 'file3'], ['file2'], []] util.copytree('src', 'dst', ignore=mock_ignore) # Assert only 'src' and 'src/dir2' directories are visited mock_list_dir.assert_has_calls([call('src'), call('src/dir2')]) # Assert only the not ignored files are checked with os.path.isdir() mock_isdir.assert_has_calls([call('src/dir2')]) # Assert files are copied using copy() self.assertFalse(mock_copy.called) # Assert that only dst and dir2 directories are created using makedirs() mock_makedirs.assert_has_calls([call('dst'), call('dst/dir2')])
def process_main(self): """ Copy one directory to another. """ if self.delete_before_copy: shutil.rmtree(self.target_dir, ignore_errors=True) copytree(self.source_dir, self.target_dir, symlinks=self.preserve_symlinks)
def test_symlinks(self, mock_list_dir, mock_makedirs, mock_islink, mock_isdir, mock_copy, mock_symlink, mock_readlink): """ Test that symlinks are created as symlinks The mock 'src' directory has following structure: - src - dir1 - file1 <symlink to file3> - dir2 - file2 - file3 """ mock_list_dir.side_effect = [['dir1', 'dir2', 'file3'], ['file1'], ['file2']] mock_isdir.side_effect = [True, True, False, False] mock_islink.side_effect = [False, True, False, False, False] mock_readlink.side_effect = ['src/file3'] util.copytree('src', 'dst', symlinks=True) # Assert that 'src/dir1/file1' is treated as symlink mock_readlink.assert_has_calls([call('src/dir1/file1')]) mock_symlink.assert_has_calls([call('src/file3', 'dst/dir1/file1')]) # Assert that all directories are visited mock_list_dir.assert_has_calls([call('src'), call('src/dir1'), call('src/dir2')]) # Assert everything except for symlink is checked if it is a directory mock_isdir.assert_has_calls([call('src/dir1'), call('src/dir2'), call('src/dir2/file2'), call('src/file3')])
def test_error_limit(self, mock_list_dir, mock_isdir, mock_copy, mock_makedirs): """ Make sure it doesn't collect an unbounded number of errors. 100 is the defined limit after which it gives up. https://pulp.plan.io/issues/1808 """ mock_list_dir.return_value = (str(x) for x in range(110)) mock_isdir.return_value = False mock_copy.side_effect = OSError('oops') with self.assertRaises(shutil.Error) as assertion: util.copytree('src', 'dst') errors = assertion.exception.args[0] # there should be 100 errors exactly, because that is the limit self.assertEqual(len(errors), 100) # ensure each error has the correct data for i, error in enumerate(errors): src, dst, why = error self.assertEqual('src/%d' % i, src) self.assertEqual('dst/%d' % i, dst) self.assertEqual('oops', why) # make sure there are 10 more in the iterator, thus those copy operations were definitely # not attempted. self.assertEqual(len(list(mock_list_dir.return_value)), 10)
def test_recursion(self, mock_list_dir, mock_makedirs, mock_islink, mock_isdir, mock_copy, mock_stat, mock_utime): """ Check that copytree is called recursively on all directories within a tree The mock 'src' directory has following structure: - src - dir1 - file1 - dir2 - file2 - file3 """ mock_list_dir.side_effect = [['dir1', 'dir2', 'file3'], ['file1'], ['file2']] mock_isdir.side_effect = [True, False, True, False, False] util.copytree('src', 'dst') mock_list_dir.assert_has_calls([call('src'), call('src/dir1'), call('src/dir2')]) mock_isdir.assert_has_calls([call('src/dir1'), call('src/dir1/file1'), call('src/dir2'), call('src/dir2/file2'), call('src/file3')]) # Assert files are copied using copy() mock_copy.assert_has_calls([call('src/dir1/file1', 'dst/dir1/file1'), call('src/dir2/file2', 'dst/dir2/file2'), call('src/file3', 'dst/file3')]) # Assert that directories are created using makedirs() mock_makedirs.assert_has_calls([call('dst'), call('dst/dir1'), call('dst/dir2')])
def process_main(self, item=None): """ Publish a directory from the repo to a target directory. """ # Use the timestamp as the name of the current master repository # directory. This allows us to identify when these were created as well # as having more than one side-by-side during the publishing process. timestamp_master_dir = os.path.join(self.master_publish_dir, self.parent.timestamp) # Given that it is timestamped for this publish/repo we could skip the copytree # for items where http & https are published to a separate directory _logger.debug('Copying tree from %s to %s' % (self.source_dir, timestamp_master_dir)) copytree(self.source_dir, timestamp_master_dir, symlinks=True) for source_relative_location, publish_location in self.publish_locations: if source_relative_location.startswith('/'): source_relative_location = source_relative_location[1::] timestamp_master_location = os.path.join(timestamp_master_dir, source_relative_location) timestamp_master_location = timestamp_master_location.rstrip('/') # Without the trailing '/' publish_location = publish_location.rstrip('/') # Create the parent directory of the published repository tree, if needed publish_dir_parent = os.path.dirname(publish_location) if not os.path.exists(publish_dir_parent): os.makedirs(publish_dir_parent, 0750) if not self.only_publish_directory_contents: # Create a temporary symlink in the parent of the published directory tree tmp_link_name = os.path.join(publish_dir_parent, self.parent.timestamp) os.symlink(timestamp_master_location, tmp_link_name) # Rename the symlink to the official published location name. # This has two desirable effects: # 1. it will overwrite an existing link, if it's there # 2. the operation is atomic, instantly changing the published directory # NOTE: it's not easy (possible?) to directly edit the target of a symlink os.rename(tmp_link_name, publish_location) else: if not os.path.exists(publish_location): os.makedirs(publish_location, 0750) for file_name in os.listdir(timestamp_master_location): tmp_link_name = os.path.join(publish_location, self.parent.timestamp) master_source_file = os.path.join(timestamp_master_location, file_name) os.symlink(master_source_file, tmp_link_name) final_name = os.path.join(publish_location, file_name) os.rename(tmp_link_name, final_name) # Clear out any previously published masters misc.clear_directory(self.master_publish_dir, skip_list=[self.parent.timestamp])
def test_copy2_copystat_not_used(self, mock_list_dir, mock_makedirs, mock_islink, mock_isdir, mock_copy, mock_copy2, mock_copystat): """ Test that only copy is used and copy2 and copystat are never called The mock 'src' directory has following structure: - src - file """ mock_list_dir.side_effect = [['file'], []] mock_isdir.side_effect = [False] util.copytree('src', 'dst') mock_copy.assert_called_with('src/file', 'dst/file') self.assertFalse(mock_copy2.called) self.assertFalse(mock_copystat.called)
def test_copy_mtime(self, mock_list_dir, mock_makedirs, mock_islink, mock_isdir, mock_copy, mock_copy2, mock_copystat, mock_stat, mock_utime): """ Test that mtime is copied when copytree is ran The mock 'src' directory has following structure: - src - file """ mock_stat.return_value.st_atime = 1 mock_stat.return_value.st_mtime = 2 mock_list_dir.side_effect = [['file'], []] mock_isdir.side_effect = [False] util.copytree('src', 'dst') mock_utime.assert_called_with('dst/file', (1, 2))
def publish_repo(self, repo, publish_conduit, config): """ Publish the repository. :param repo: metadata describing the repo :type repo: pulp.plugins.model.Repository :param publish_conduit: The conduit for publishing a repo :type publish_conduit: pulp.plugins.conduits.repo_publish.RepoPublishConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginConfiguration :param config_conduit: Configuration Conduit; :type config_conduit: pulp.plugins.conduits.repo_validate.RepoConfigConduit :return: report describing the publish operation :rtype: pulp.plugins.model.PublishReport """ progress_report = FilePublishProgressReport(publish_conduit) _logger.info(_('Beginning publish for repository <%(repo)s>') % {'repo': repo.id}) try: progress_report.state = progress_report.STATE_IN_PROGRESS units = publish_conduit.get_units() # Set up an empty build_dir build_dir = os.path.join(repo.working_dir, BUILD_DIRNAME) # Let's erase the path at build_dir so we can be sure it's a clean directory self._rmtree_if_exists(build_dir) os.makedirs(build_dir) self.initialize_metadata(build_dir) try: # process each unit for unit in units: links_to_create = self.get_paths_for_unit(unit) self._symlink_unit(build_dir, unit, links_to_create) self.publish_metadata_for_unit(unit) finally: # Finalize the processing self.finalize_metadata() # Let's unpublish, and then republish self.unpublish_repo(repo, config) hosting_locations = self.get_hosting_locations(repo, config) for location in hosting_locations: copytree(build_dir, location, symlinks=True) self.post_repo_publish(repo, config) # Clean up our build_dir self._rmtree_if_exists(build_dir) # Report that we are done progress_report.state = progress_report.STATE_COMPLETE return progress_report.build_final_report() except Exception, e: _logger.exception(e) # Something failed. Let's put an error message on the report progress_report.error_message = str(e) progress_report.traceback = traceback.format_exc() progress_report.state = progress_report.STATE_FAILED report = progress_report.build_final_report() return report
def publish_repo(self, repo, publish_conduit, config): """ Publish the repository. :param repo: metadata describing the repo :type repo: pulp.plugins.model.Repository :param publish_conduit: The conduit for publishing a repo :type publish_conduit: pulp.plugins.conduits.repo_publish.RepoPublishConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginConfiguration :param config_conduit: Configuration Conduit; :type config_conduit: pulp.plugins.conduits.repo_validate.RepoConfigConduit :return: report describing the publish operation :rtype: pulp.plugins.model.PublishReport """ _logger.info( _('Beginning publish for repository <%(repo)s>') % {'repo': repo.id}) if not config.get("force_full", False) and publish_conduit.last_publish: try: return self.publish_repo_fast_forward(repo, publish_conduit, config) except FastForwardUnavailable: _logger.debug( "Fast-forward publish bailed out, continuing normally") progress_report = FilePublishProgressReport(publish_conduit) try: progress_report.state = progress_report.STATE_IN_PROGRESS units = publish_conduit.get_units() # Set up an empty build_dir working_dir = common_utils.get_working_directory() build_dir = os.path.join(working_dir, BUILD_DIRNAME) os.makedirs(build_dir) self.initialize_metadata(build_dir) try: # process each unit for unit in units: links_to_create = self.get_paths_for_unit(unit) self._symlink_unit(build_dir, unit, links_to_create) self.publish_metadata_for_unit(unit) finally: # Finalize the processing self.finalize_metadata() # Let's unpublish, and then republish self.unpublish_repo(repo, config) hosting_locations = self.get_hosting_locations(repo, config) for location in hosting_locations: copytree(build_dir, location, symlinks=True) self.post_repo_publish(repo, config) # Clean up our build_dir self._rmtree_if_exists(build_dir) # Report that we are done progress_report.state = progress_report.STATE_COMPLETE return progress_report.build_final_report() except Exception, e: _logger.exception(e) # Something failed. Let's put an error message on the report progress_report.error_message = str(e) progress_report.traceback = traceback.format_exc() progress_report.state = progress_report.STATE_FAILED report = progress_report.build_final_report() return report
def publish_repo_fast_forward(self, repo, publish_conduit, config): """ Publish the repository. :param repo: metadata describing the repo :type repo: pulp.plugins.model.Repository :param publish_conduit: The conduit for publishing a repo :type publish_conduit: pulp.plugins.conduits.repo_publish.RepoPublishConduit :param config: plugin configuration :type config: pulp.plugins.config.PluginConfiguration :return: report describing the publish operation :rtype: pulp.plugins.model.PublishReport """ progress_report = FilePublishProgressReport(publish_conduit) try: progress_report.state = progress_report.STATE_IN_PROGRESS units = publish_conduit.get_units() # Set up an empty build_dir working_dir = common_utils.get_working_directory() build_dir = os.path.join(working_dir, BUILD_DIRNAME) self._rmtree_if_exists(build_dir) os.makedirs(build_dir) self.initialize_metadata(build_dir) unit_checksum_set = set() try: # process each unit for unit in units: unit_checksum_set.add(unit.unit_key['checksum']) self.publish_metadata_for_unit(unit) finally: # Finalize the processing self.finalize_metadata() # Just generate increased files and copy them to publishing directories hosting_locations = self.get_hosting_locations(repo, config) for location in hosting_locations: unit_checksum_old_set = set() unit_over_path_map = {} metadata_filename = os.path.join(location, MANIFEST_FILENAME) if os.path.exists(metadata_filename): with open(metadata_filename, 'r') as metadata_file: for line in metadata_file: fields = line.split(',') checksum = fields[1] unit_checksum_old_set.add(checksum) if checksum not in unit_checksum_set: unit_over_path_map[checksum] = fields[0] _logger.debug( "%d items were in MANIFEST %s, which exists? %s." % (len(unit_checksum_old_set), metadata_filename, os.path.exists(metadata_filename))) # Copy incremental files into publishing directories checksum_absent_set = unit_checksum_set - unit_checksum_old_set _logger.debug("Increasing %d units" % len(checksum_absent_set)) # If added too many units, then publish repo with force_full max_increase_units = min(50000, len(units) / len(hosting_locations)) if len(checksum_absent_set) > max_increase_units: self._rmtree_if_exists(build_dir) raise FastForwardUnavailable criteria = UnitAssociationCriteria( unit_filters={ 'checksum': { "$in": list(checksum_absent_set) } }, unit_fields={'name', 'checksum', '_storage_path', 'size'}) unit_absent_set = publish_conduit.get_units(criteria=criteria) for unit in unit_absent_set: links_to_create = self.get_paths_for_unit(unit) self._symlink_unit(build_dir, unit, links_to_create) # Remove modified and deleted files from publishing directories for checksum, unit_path in unit_over_path_map.items(): unit_path = os.path.join(location, unit_path) if os.path.exists(unit_path): os.remove(unit_path) dir_name = os.path.dirname(unit_path) if not os.listdir(dir_name): os.removedirs(dir_name) elif os.path.islink(unit_path): os.unlink(unit_path) if len(unit_absent_set) > 0 or len(unit_over_path_map) > 0: if os.path.exists(metadata_filename): os.remove(metadata_filename) copytree(build_dir, location, symlinks=True) self.post_repo_publish(repo, config) # Clean up our build_dir self._rmtree_if_exists(build_dir) # Report that we are done progress_report.state = progress_report.STATE_COMPLETE return progress_report.build_final_report() except Exception, e: _logger.exception(e) # Something failed. Let's put an error message on the report progress_report.error_message = str(e) progress_report.traceback = traceback.format_exc() progress_report.state = progress_report.STATE_FAILED report = progress_report.build_final_report() return report