Example #1
0
    def extract_files(self, source_package, files_to_extract=None):
        """
        Extract files for just the given source package.

        :type source_package: :class:`SourcePackage <pts.core.models.SourcePackage>`
        :type files_to_extract: An iterable of file names which should be
            extracted
        """
        if self.cache is None:
            self.cache = AptCache()

        source_directory = self.cache.retrieve_source(
            source_package.source_package_name.name, source_package.version, debian_directory_only=True
        )
        debian_directory = os.path.join(source_directory, "debian")

        if files_to_extract is None:
            files_to_extract = self.ALL_FILES_TO_EXTRACT

        for file_name in files_to_extract:
            file_path = os.path.join(debian_directory, file_name)
            if not os.path.exists(file_path):
                continue
            with open(file_path, "r") as f:
                extracted_file = File(f)
                ExtractedSourceFile.objects.create(
                    source_package=source_package, extracted_file=extracted_file, name=file_name
                )
    def execute(self):
        self.apt_cache = AptCache()
        updated_sources, updated_packages = (
            self.apt_cache.update_repositories(self.force_update)
        )

        self.update_sources_files(updated_sources)
        self.update_packages_files(updated_packages)
        self.update_dependencies()
Example #3
0
 def create_cache(self):
     """
     Helper method which creates an :class:`pts.core.utils.packages.AptCache`
     instance which is used for testing. Some of its methods are replaced by
     mocks and stubs to avoid HTTP calls.
     """
     self.cache = AptCache()
     self.cache._get_apt_source_records = mock.MagicMock()
     self.cache._get_format = mock.MagicMock(return_value='1.0')
     self.cache._extract_dpkg_source = mock.MagicMock()
     self.cached_files = []
     self.cache._get_all_cached_files = mock.MagicMock(
         return_value=self.cached_files)
     self.cache._match_index_file_to_repository = mock.MagicMock()
Example #4
0
class AptCacheTests(TestCase):
    """
    Tests for :class:`pts.core.utils.packages.AptCache`.
    """
    @staticmethod
    def stub_acquire(source_records, dest_dir, debian_dir_only, content):
        # Create a file in the destination directory
        file_name = 'temp'
        file_path = os.path.join(dest_dir, file_name)
        # Create a file of the given size
        with open(file_path, 'wb') as f:
            f.write(content)
        return None, 'ekrem'

    def create_cache(self):
        """
        Helper method which creates an :class:`pts.core.utils.packages.AptCache`
        instance which is used for testing. Some of its methods are replaced by
        mocks and stubs to avoid HTTP calls.
        """
        self.cache = AptCache()
        self.cache._get_apt_source_records = mock.MagicMock()
        self.cache._get_format = mock.MagicMock(return_value='1.0')
        self.cache._extract_dpkg_source = mock.MagicMock()
        self.cached_files = []
        self.cache._get_all_cached_files = mock.MagicMock(
            return_value=self.cached_files)
        self.cache._match_index_file_to_repository = mock.MagicMock()

    def set_stub_acquire_content(self, content):
        """
        Helper method which sets the content of a file which is created by the
        cache instance when retrieve_source is called.
        """
        self.cache._apt_acquire_package = mock.MagicMock(side_effect=curry(
            AptCacheTests.stub_acquire, content=content))

    def set_stub_cached_files_for_repository(self, repository, files):
        """
        Helper method adds the given list of files to the stub list of cached
        files for a given repository.

        :param repository: The repository to which these files are associated.
        :type repository: :class:`Repository <pts.core.models.Repository>`
        :param files: List of cached file names. The function uses the list to
            build the stub by prefixing the names with expected repository
            identifiers.
        """
        # Build the prefix from the repository's URI and suite
        base_uri = repository.uri.rstrip('/')
        if base_uri.startswith('http://'):
            base_uri = base_uri[7:]
        prefix = base_uri + '/' + repository.suite + '/'
        prefix = prefix.replace('/', '_')
        for file_name in files:
            self.cached_files.append(prefix + file_name)
        self.cache._match_index_file_to_repository.return_value = repository

    def assert_cache_size_equal(self, size):
        self.assertEqual(size, self.cache.cache_size)

    def test_cache_size_increase_after_acquire(self):
        """
        Tests that the cache correctly increases its size after acquiring new
        files.
        """
        with make_temp_directory('-pts-cache') as cache_directory:
            with self.settings(
                    PTS_CACHE_DIRECTORY=cache_directory,
                    PTS_APT_CACHE_MAX_SIZE=10):
                self.create_cache()
                # Sanity check: old size is 0 as nothing was ever cached in the
                # brand new directory
                self.assert_cache_size_equal(0)
                content = b'a' * 5  # 5 bytes
                self.set_stub_acquire_content(content)

                self.cache.retrieve_source('dummy-package', '1.0.0')

                self.assert_cache_size_equal(5)

    def test_cache_multiple_insert_no_remove(self):
        """
        Tests that the cache does not remove packages unless the size limit is
        exceeded.
        """
        with make_temp_directory('-pts-cache') as cache_directory:
            with self.settings(
                    PTS_CACHE_DIRECTORY=cache_directory,
                    PTS_APT_CACHE_MAX_SIZE=10):
                self.create_cache()
                # Sanity check: old size is 0 as nothing was ever cached in the
                # brand new directory
                self.assert_cache_size_equal(0)
                content = b'a' * 5  # 5 bytes
                self.set_stub_acquire_content(content)
                # Add one file.
                self.cache.retrieve_source('dummy-package', '1.0.0')
                self.assert_cache_size_equal(5)
                # Same content in another file
                self.set_stub_acquire_content(content)

                self.cache.retrieve_source('package', '1.0.0')

                # Both files are now saved.
                self.assert_cache_size_equal(10)

    def test_clear_cache(self):
        """
        Tests that the cache removes packages when it exceeds its allocated
        size.
        """
        with make_temp_directory('-pts-cache') as cache_directory:
            with self.settings(
                    PTS_CACHE_DIRECTORY=cache_directory,
                    PTS_APT_CACHE_MAX_SIZE=10):
                self.create_cache()
                # Sanity check: old size is 0 as nothing was ever cached in the
                # brand new directory
                self.assert_cache_size_equal(0)
                initial_content = b'a' * 11
                self.set_stub_acquire_content(initial_content)
                # Set initial source content
                self.cache.retrieve_source('dummy-package', '1.0.0')
                self.assert_cache_size_equal(11)
                content = b'a' * 7
                self.set_stub_acquire_content(content)

                self.cache.retrieve_source('package', '1.0.0')

                # Only the second content is found in the package
                self.assert_cache_size_equal(7)

    def test_get_sources_for_repository(self):
        """
        Tests that the cache correctly returns a list of cached Sources files
        for a given repository.
        """
        with make_temp_directory('-pts-cache') as cache_directory:
            with self.settings(PTS_CACHE_DIRECTORY=cache_directory):
                self.create_cache()
                repository = Repository.objects.create(
                    name='stable',
                    shorthand='stable',
                    uri='http://cdn.debian.net/debian/dists',
                    suite='stable')
                expected_source_files = [
                    'main_source_Sources',
                    'contrib_source_Sources',
                ]
                files = expected_source_files + [
                    'Release',
                    'main_binary-amd64_Packages',
                ]
                self.set_stub_cached_files_for_repository(repository, files)

                sources = self.cache.get_sources_files_for_repository(repository)

                self.assertEqual(len(expected_source_files), len(sources))
                for expected_source, returned_source in zip(
                        expected_source_files, sources):
                    self.assertTrue(returned_source.endswith(expected_source))

    def test_get_packages_for_repository(self):
        """
        Tests that the cache correctly returns a list of cached Packages files
        for a given repository.
        """
        with make_temp_directory('-pts-cache') as cache_directory:
            with self.settings(PTS_CACHE_DIRECTORY=cache_directory):
                self.create_cache()
                repository = Repository.objects.create(
                    name='stable',
                    shorthand='stable',
                    uri='http://cdn.debian.net/debian/dists',
                    suite='stable')
                expected_packages_files = [
                    'main_binary-amd64_Packages',
                    'main_binary-i386_Packages',
                ]
                files = expected_packages_files + [
                    'Release',
                    'main_source_Sources',
                ]
                self.set_stub_cached_files_for_repository(repository, files)

                packages = self.cache.get_packages_files_for_repository(repository)

                self.assertEqual(len(expected_packages_files), len(packages))
                for expected, returned in zip(
                        expected_packages_files, packages):
                    self.assertTrue(returned.endswith(expected))
class UpdateRepositoriesTask(PackageUpdateTask):
    """
    Performs an update of repository information.

    New (source and binary) packages are created if necessary and old ones are
    deleted. An event is emitted for each situation, allowing other tasks to
    perform updates based on updated package information.
    """
    PRODUCES_EVENTS = (
        'new-source-package',
        'new-source-package-version',
        'new-source-package-in-repository',
        'new-source-package-version-in-repository',

        'new-binary-package',

        # Source package no longer found in any repository
        'lost-source-package',
        # Source package version no longer found in the given repository
        'lost-source-package-version-in-repository',
        # A particular version of a source package no longer found in any repo
        'lost-version-of-source-package',
        # Binary package name no longer used by any source package
        'lost-binary-package',
    )

    def __init__(self, *args, **kwargs):
        super(UpdateRepositoriesTask, self).__init__(*args, **kwargs)
        self._all_packages = []
        self._all_repository_entries = []

    def _clear_processed_repository_entries(self):
        self._all_repository_entries = []

    def _add_processed_repository_entry(self, repository_entry):
        self._all_repository_entries.append(repository_entry.id)

    def _extract_information_from_sources_entry(self, src_pkg, stanza):
        entry = extract_information_from_sources_entry(stanza)

        # Convert the parsed data into corresponding model instances
        if 'architectures' in entry:
            # Map the list of architecture names to their objects
            # Discards any unknown architectures.
            entry['architectures'] = Architecture.objects.filter(
                name__in=entry['architectures'])

        if 'binary_packages' in entry:
            # Map the list of binary package names to list of existing
            # binary package names.
            binary_package_names = entry['binary_packages']
            existing_binaries_qs = BinaryPackageName.objects.filter(
                name__in=binary_package_names)
            existing_binaries_names = []
            binaries = []
            for binary in existing_binaries_qs:
                binaries.append(binary)
                existing_binaries_names.append(binary.name)
            for binary_name in binary_package_names:
                if binary_name not in existing_binaries_names:
                    binary_package_name, _ = PackageName.objects.get_or_create(
                        name=binary_name)
                    binary_package_name.binary = True
                    binary_package_name.save()
                    binary_package_name = BinaryPackageName.objects.get(
                        name=binary_name)
                    binaries.append(binary_package_name)
                    self.raise_event('new-binary-package', {
                        'name': binary_name,
                    })
            entry['binary_packages'] = binaries

        if 'maintainer' in entry:
            maintainer_email, _ = UserEmail.objects.get_or_create(
                email=entry['maintainer']['email'])
            maintainer = ContributorName.objects.get_or_create(
                contributor_email=maintainer_email,
                name=entry['maintainer'].get('name', ''))[0]
            entry['maintainer'] = maintainer

        if 'uploaders' in entry:
            uploader_emails = [
                uploader['email']
                for uploader in entry['uploaders']
            ]
            uploader_names = [
                uploader.get('name', '')
                for uploader in entry['uploaders']
            ]
            existing_contributor_emails_qs = UserEmail.objects.filter(
                email__in=uploader_emails)
            existing_contributor_emails = {
                contributor.email: contributor
                for contributor in existing_contributor_emails_qs
            }
            uploaders = []
            for email, name in zip(uploader_emails, uploader_names):
                if email not in existing_contributor_emails:
                    contributor_email = UserEmail.objects.create(
                        email=email)
                else:
                    contributor_email = existing_contributor_emails[email]
                uploaders.append(ContributorName.objects.get_or_create(
                    contributor_email=contributor_email,
                    name=name)[0]
                )

            entry['uploaders'] = uploaders

        return entry

    def _extract_information_from_packages_entry(self, bin_pkg, stanza):
        entry = extract_information_from_packages_entry(stanza)

        return entry

    def _update_sources_file(self, repository, sources_file):
        for stanza in deb822.Sources.iter_paragraphs(file(sources_file)):
            allow, implemented = vendor.call('allow_package', stanza)
            if allow is not None and implemented and not allow:
                # The vendor-provided function indicates that the package
                # should not be included
                continue

            src_pkg_name, created = SourcePackageName.objects.get_or_create(
                name=stanza['package']
            )
            if created:
                self.raise_event('new-source-package', {
                    'name': src_pkg_name.name
                })

            src_pkg, created_new_version = SourcePackage.objects.get_or_create(
                source_package_name=src_pkg_name,
                version=stanza['version']
            )
            if created_new_version:
                self.raise_event('new-source-package-version', {
                    'name': src_pkg.name,
                    'version': src_pkg.version,
                    'pk': src_pkg.pk,
                })
                # Since it's a new version, extract package data from Sources
                entry = self._extract_information_from_sources_entry(
                    src_pkg, stanza)
                # Update the source package information based on the newly
                # extracted data.
                src_pkg.update(**entry)
                src_pkg.save()

            if not repository.has_source_package(src_pkg):
                # Does it have any version of the package?
                if not repository.has_source_package_name(src_pkg.name):
                    self.raise_event('new-source-package-in-repository', {
                        'name': src_pkg.name,
                        'repository': repository.name,
                    })

                # Add it to the repository
                kwargs = {
                    'priority': stanza.get('priority', ''),
                    'section': stanza.get('section', ''),
                }
                entry = repository.add_source_package(src_pkg, **kwargs)
                self.raise_event('new-source-package-version-in-repository', {
                    'name': src_pkg.name,
                    'version': src_pkg.version,
                    'repository': repository.name,
                })
            else:
                # We get the entry to mark that the package version is still in
                # the repository.
                entry = SourcePackageRepositoryEntry.objects.get(
                    repository=repository,
                    source_package=src_pkg
                )

            self._add_processed_repository_entry(entry)

    def get_source_for_binary(self, stanza):
        """
        :param stanza: a ``Packages`` file entry
        :returns: A ``(source_name, source_version)`` pair for the binary
            package described by the entry
        """
        source_name = (
            stanza['source']
            if 'source' in stanza else
            stanza['package'])
        # Extract the source version, if given in the Source field
        match = re.match(r'(.+) \((.+)\)', source_name)
        if match:
            source_name, source_version = match.group(1), match.group(2)
        else:
            source_version = stanza['version']

        return source_name, source_version

    def _update_packages_file(self, repository, packages_file):
        for stanza in deb822.Packages.iter_paragraphs(file(packages_file)):
            bin_pkg_name, created = BinaryPackageName.objects.get_or_create(
                name=stanza['package']
            )
            # Find the matching SourcePackage for the binary package
            source_name, source_version = self.get_source_for_binary(stanza)
            src_pkg, _ = SourcePackage.objects.get_or_create(
                source_package_name=SourcePackageName.objects.get_or_create(
                    name=source_name)[0],
                version=source_version)

            bin_pkg, created_new_version = BinaryPackage.objects.get_or_create(
                binary_package_name=bin_pkg_name,
                version=stanza['version'],
                source_package=src_pkg
            )
            if created_new_version:
                # Since it's a new version, extract package data from Packages
                entry = self._extract_information_from_packages_entry(
                    bin_pkg, stanza)
                # Update the binary package information based on the newly
                # extracted data.
                bin_pkg.update(**entry)
                bin_pkg.save()

            if not repository.has_binary_package(bin_pkg):
                # Add it to the repository
                architecture, _ = Architecture.objects.get_or_create(
                    name=stanza['architecture'])
                kwargs = {
                    'priority': stanza.get('priority', ''),
                    'section': stanza.get('section', ''),
                    'architecture': architecture,
                }
                entry = repository.add_binary_package(bin_pkg, **kwargs)
            else:
                # We get the entry to mark that the package version is still in
                # the repository.
                entry = BinaryPackageRepositoryEntry.objects.get(
                    repository=repository,
                    binary_package=bin_pkg)

            self._add_processed_repository_entry(entry)

    def _remove_query_set_if_count_zero(self, qs, count_field, event_generator=None):
        """
        Removes elements from the given query set if their count of the given
        ``count_field`` is ``0``.

        :param qs: Instances which should be deleted in case their count of the
            field ``count_field`` is 0.
        :type qs: :class:`QuerySet <django.db.models.query.QuerySet>`

        :param count_field: Each instance in ``qs`` that has a 0 count for the
            field with this name is deleted.
        :type count_field: string

        :param event_generator: A ``callable`` which returns a
            ``(name, arguments)`` pair describing the event which should be
            raised based on the model instance given to it as an argument.
        :type event_generator: ``callable``
        """
        qs = qs.annotate(count=models.Count(count_field))
        qs = qs.filter(count=0)
        if event_generator:
            for item in qs:
                self.raise_event(*event_generator(item))
        qs.delete()

    def _remove_obsolete_packages(self):
        # Clean up package versions which no longer exist in any repository.
        self._remove_query_set_if_count_zero(
            SourcePackage.objects.all(),
            'repository',
            lambda source_package: (
                'lost-version-of-source-package', {
                    'name': source_package.name,
                    'version': source_package.version,
                }
            )
        )
        # Clean up names which no longer exist.
        self._remove_query_set_if_count_zero(
            SourcePackageName.objects.all(),
            'source_package_versions',
            lambda package: (
                'lost-source-package', {
                    'name': package.name,
                }
            )
        )
        # Clean up binary package names which are no longer used by any source
        # package.
        self._remove_query_set_if_count_zero(
            BinaryPackageName.objects.all(),
            'sourcepackage',
            lambda binary_package_name: (
                'lost-binary-package', {
                    'name': binary_package_name.name,
                }
            )
        )

    def _update_repository_entries(self, all_entries_qs, event_generator=None):
        """
        Removes all repository entries which are no longer found in the
        repository after the last update.
        If the ``event_generator`` argument is provided, an event returned by
        the function is raised for each removed entry.

        :param all_entries_qs: All currently existing entries which should be
            filtered to only contain the ones still found after the update.
        :type all_entries_qs: :class:`QuerySet <django.db.models.query.QuerySet>`
        :event_generator: Takes a repository entry as a parameter and returns a
            two-tuple of ``(event_name, event_arguments)``. An event with the
            return parameters is raised by the function for each removed entry.
        :type event_generator: callable
        """
        # Out of all entries in this repository, only those found in
        # the last update need to stay, so exclude them from the delete
        all_entries_qs = all_entries_qs.exclude(
            id__in=self._all_repository_entries)
        # Emit events for all packages that were removed from the repository
        if event_generator:
            for entry in all_entries_qs:
                self.raise_event(*event_generator(entry))
        all_entries_qs.delete()

        self._clear_processed_repository_entries()

    def extract_package_versions(self, file_name):
        """
        :param file_name: The name of the file from which package versions
            should be extracted.
        :type file_name: string
        :returns: A dict mapping package names to a list of versions found in
            Deb822 formatted file.
        """
        with open(file_name, 'r') as packages_file:
            packages = {}
            for stanza in deb822.Deb822.iter_paragraphs(packages_file):
                package_name, version = stanza['package'], stanza['version']
                packages.setdefault(package_name, [])
                packages[package_name].append(version)

            return packages

    def _mark_file_not_processed(self, repository, file_name, entry_manager):
        """
        The given ``Sources`` or ``Packages`` file has not been changed in the
        last update. This method marks all package versions found in it as
        still existing in order to avoid deleting them.

        :param repository: The repository to which the file is associated
        :type repository: :class:`Repository <pts.core.models.Repository>`
        :param file_name: The name of the file whose packages should be saved
        :param entry_manager: The manager instance which handles the package
            entries.
        :type entry_manager: :class:`Manager <django.db.models.Manager>`
        """
        # Extract all package versions from the file
        packages = self.extract_package_versions(file_name)

        # Only issue one DB query to retrieve the entries for packages with
        # the given names
        repository_entries = entry_manager.filter_by_package_name(packages.keys())
        repository_entries = repository_entries.filter(
            repository=repository)
        repository_entries = repository_entries.select_related()
        # For each of those entries, make sure to keep only the ones
        # corresponding to the version found in the sources file
        for entry in repository_entries:
            if entry.version in packages[entry.source_package.name]:
                self._add_processed_repository_entry(entry)

    def group_files_by_repository(self, cached_files):
        """
        :param cached_files: A list of ``(repository, file_name)`` pairs
        :returns: A dict mapping repositories to all file names found for that
            repository.
        """
        repository_files = {}
        for repository, file_name in cached_files:
            repository_files.setdefault(repository, [])
            repository_files[repository].append(file_name)

        return repository_files

    def update_sources_files(self, updated_sources):
        """
        Performs an update of tracked packages based on the updated Sources
        files.

        :param updated_sources: A list of ``(repository, sources_file_name)``
            pairs giving the Sources files which were updated and should be
            used to update the PTS tracked information too.
        """
        # Group all files by repository to which they belong
        repository_files = self.group_files_by_repository(updated_sources)

        with transaction.commit_on_success():
            for repository, sources_files in repository_files.items():
                # First update package information based on updated files
                for sources_file in sources_files:
                    self._update_sources_file(repository, sources_file)

                # Mark package versions found in un-updated files as still existing
                all_sources = self.apt_cache.get_sources_files_for_repository(repository)
                for sources_file in all_sources:
                    if sources_file not in sources_files:
                        self._mark_file_not_processed(
                            repository,
                            sources_file,
                            SourcePackageRepositoryEntry.objects)

                # When all the files for the repository are handled, update
                # which packages are still found in it.
                self._update_repository_entries(
                    SourcePackageRepositoryEntry.objects.filter(
                        repository=repository),
                    lambda entry: (
                        'lost-source-package-version-in-repository', {
                            'name': entry.source_package.name,
                            'version': entry.source_package.version,
                            'repository': entry.repository.name,
                        })
                )

            # When all repositories are handled, update which packages are
            # still found in at least one repository.
            self._remove_obsolete_packages()

    def update_packages_files(self, updated_packages):
        """
        Performs an update of tracked packages based on the updated Packages
        files.

        :param updated_sources: A list of ``(repository, packages_file_name)``
            pairs giving the Packages files which were updated and should be
            used to update the PTS tracked information too.
        """
        # Group all files by repository to which they belong
        repository_files = self.group_files_by_repository(updated_packages)

        for repository, packages_files in repository_files.items():
            # First update package information based on updated files
            for packages_file in packages_files:
                self._update_packages_file(repository, packages_file)

            # Mark package versions found in un-updated files as still existing
            all_sources = self.apt_cache.get_packages_files_for_repository(repository)
            for packages_file in all_sources:
                if packages_file not in packages_files:
                    self._mark_file_not_processed(
                        repository, packages_file)

            # When all the files for the repository are handled, update
            # which packages are still found in it.
            self._update_repository_entries(
                BinaryPackageRepositoryEntry.objects.filter(
                    repository=repository))

    def _update_dependencies_for_source(self,
                                        stanza,
                                        dependency_types):
        """
        Updates the dependencies for a source package based on the ones found
        in the given ``Packages`` or ``Sources`` stanza.

        :param source_name: The name of the source package for which the
            dependencies are updated.
        :param stanza: The ``Packages`` or ``Sources`` entry
        :param dependency_type: A list of dependency types which should be
            considered (e.g. Build-Depends, Recommends, etc.)
        :param source_to_binary_deps: The dictionary which should be updated
            with the new dependencies. Maps source names to a list of dicts
            each describing a dependency.
        """
        binary_dependencies = []
        for dependency_type in dependency_types:
            # The Deb822 instance is case sensitive when it comes to relations
            dependencies = stanza.relations.get(dependency_type.lower(), ())

            for dependency in itertools.chain(*dependencies):
                binary_name = dependency['name']
                binary_dependencies.append({
                    'dependency_type': dependency_type,
                    'binary': binary_name,
                })

        return binary_dependencies

    def update_dependencies(self):
        """
        Updates source-to-source package dependencies stemming from
        build bependencies and their binary packages' dependencies.
        """
        # Build the dependency mapping
        try:
            default_repository = Repository.objects.get(default=True)
        except Repository.DoesNotExist:
            return

        sources_files = self.apt_cache.get_sources_files_for_repository(
            default_repository)
        packages_files = self.apt_cache.get_packages_files_for_repository(
            default_repository)

        bin_to_src = {}
        source_to_binary_deps = {}

        # First builds a list of binary dependencies of all source packages
        # based on the Sources file.
        source_dependency_types = ('Build-Depends', 'Build-Depends-Indep')
        for sources_file in sources_files:
            for stanza in deb822.Sources.iter_paragraphs(file(sources_file)):
                source_name = stanza['package']

                for binary in itertools.chain(*stanza.relations['binary']):
                    sources_set = bin_to_src.setdefault(binary['name'], set())
                    sources_set.add(source_name)

                dependencies = source_to_binary_deps.setdefault(source_name, [])
                dependencies.extend(self._update_dependencies_for_source(
                    stanza,
                    source_dependency_types))

        # Then a list of binary dependencies based on the Packages file.
        binary_dependency_types = (
            'Depends',
            'Recommends',
            'Suggests',
        )
        for packages_file in packages_files:
            for stanza in deb822.Packages.iter_paragraphs(file(packages_file)):
                binary_name = stanza['package']
                source_name, source_version = self.get_source_for_binary(stanza)

                sources_set = bin_to_src.setdefault(binary_name, set())
                sources_set.add(source_name)

                new_dependencies = self._update_dependencies_for_source(
                    stanza,
                    binary_dependency_types)
                for dependency in new_dependencies:
                    dependency['source_binary'] = binary_name
                dependencies = source_to_binary_deps.setdefault(source_name, [])
                dependencies.extend(new_dependencies)

        # The binary packages are matched with their source packages and each
        # source to source dependency created.
        all_sources = {
            source.name: source
            for source in SourcePackageName.objects.all()
        }
        # Keeps a list of SourcePackageDeps instances which are to be bulk
        # created in the end.
        dependency_instances = []

        for source_name, dependencies in source_to_binary_deps.items():
            if source_name not in all_sources:
                continue

            # All dependencies for the current source package.
            all_dependencies = {}
            for dependency in dependencies:
                binary_name = dependency['binary']
                dependency_type = dependency.pop('dependency_type')
                if binary_name not in bin_to_src:
                    continue

                for source_dependency in bin_to_src[binary_name]:
                    if source_name == source_dependency:
                        continue

                    source_dependencies = all_dependencies.setdefault(source_dependency, {})
                    source_dependencies.setdefault(dependency_type, [])
                    if dependency not in source_dependencies[dependency_type]:
                        source_dependencies[dependency_type].append(dependency)

            # Create the dependency instances for the current source package.
            for dependency_name, details in all_dependencies.items():
                if dependency_name in all_sources:
                    build_dep = any(dependency_type in details for dependency_type in source_dependency_types)
                    binary_dep = any(dependency_type in details for dependency_type in binary_dependency_types)
                    dependency_instances.append(
                        SourcePackageDeps(
                            source=all_sources[source_name],
                            dependency=all_sources[dependency_name],
                            build_dep=build_dep,
                            binary_dep=binary_dep,
                            repository=default_repository,
                            details=details))

        # Create all the model instances in one transaction
        SourcePackageDeps.objects.all().delete()
        SourcePackageDeps.objects.bulk_create(dependency_instances)

    @clear_all_events_on_exception
    def execute(self):
        self.apt_cache = AptCache()
        updated_sources, updated_packages = (
            self.apt_cache.update_repositories(self.force_update)
        )

        self.update_sources_files(updated_sources)
        self.update_packages_files(updated_packages)
        self.update_dependencies()
Example #6
0
class ExtractSourcePackageFiles(BaseTask):
    """
    A task which extracts some files from a new source package version.
    The extracted files are:

    - debian/changelog
    - debian/copyright
    - debian/rules
    - debian/control
    - debian/watch
    """

    DEPENDS_ON_EVENTS = ("new-source-package-version",)

    PRODUCES_EVENTS = ("source-files-extracted",)

    ALL_FILES_TO_EXTRACT = ("changelog", "copyright", "rules", "control", "watch")

    def __init__(self, *args, **kwargs):
        super(ExtractSourcePackageFiles, self).__init__(*args, **kwargs)
        self.cache = None

    def extract_files(self, source_package, files_to_extract=None):
        """
        Extract files for just the given source package.

        :type source_package: :class:`SourcePackage <pts.core.models.SourcePackage>`
        :type files_to_extract: An iterable of file names which should be
            extracted
        """
        if self.cache is None:
            self.cache = AptCache()

        source_directory = self.cache.retrieve_source(
            source_package.source_package_name.name, source_package.version, debian_directory_only=True
        )
        debian_directory = os.path.join(source_directory, "debian")

        if files_to_extract is None:
            files_to_extract = self.ALL_FILES_TO_EXTRACT

        for file_name in files_to_extract:
            file_path = os.path.join(debian_directory, file_name)
            if not os.path.exists(file_path):
                continue
            with open(file_path, "r") as f:
                extracted_file = File(f)
                ExtractedSourceFile.objects.create(
                    source_package=source_package, extracted_file=extracted_file, name=file_name
                )

    def _execute_initial(self):
        """
        When the task is directly ran, instead of relying on events to know
        which packages' source files should be retrieved, the task scans all
        existing packages and adds any missing source packages for each of
        them.
        """
        # First remove all source files which are no longer to be included.
        qs = ExtractedSourceFile.objects.exclude(name__in=self.ALL_FILES_TO_EXTRACT)
        qs.delete()

        # Retrieves the packages and all the associated files with each of them
        # in only two db queries.
        source_packages = SourcePackage.objects.all()
        source_packages.prefetch_related("extracted_source_files")

        # Find the difference of packages and extract only those for each
        # package
        for source_package in source_packages:
            extracted_files = [extracted_file.name for extracted_file in source_package.extracted_source_files.all()]
            files_to_extract = [
                file_name for file_name in self.ALL_FILES_TO_EXTRACT if file_name not in extracted_files
            ]
            if files_to_extract:
                try:
                    self.extract_files(source_package, files_to_extract)
                except:
                    logger.exception(
                        "Problem extracting source files for"
                        " {pkg} version {ver}".format(pkg=source_package, ver=source_package.version)
                    )

    def execute(self):
        if self.is_initial_task():
            return self._execute_initial()

        # When the task is not the initial task, then all the packages it
        # should process should come from received events.
        new_version_pks = [event.arguments["pk"] for event in self.get_all_events()]
        source_packages = SourcePackage.objects.filter(pk__in=new_version_pks)
        source_packages = source_packages.select_related()

        for source_package in source_packages:
            try:
                self.extract_files(source_package)
            except:
                logger.exception(
                    "Problem extracting source files for"
                    " {pkg} version {ver}".format(pkg=source_package, ver=source_package.version)
                )

        self.raise_event("source-files-extracted")