Example #1
0
    def test_DoneDeleteUnreferencedButNotExpiredAliases(self):
        # LibraryFileAliases can be removed only after they have expired.
        # If an explicit expiry is set and in recent past (currently up to
        # one week ago), the files hang around.

        # Merge the duplicates. Both our aliases now point to the same
        # LibraryFileContent
        librariangc.merge_duplicates(self.con)

        # Flag one of our LibraryFileAliases with an expiry date in the
        # recent past.
        self.ztm.begin()
        f1 = LibraryFileAlias.get(self.f1_id)
        f1.expires = self.recent_past
        del f1
        self.ztm.commit()

        # Delete unreferenced LibraryFileAliases. This should not remove our
        # example aliases, as one is unreferenced with a NULL expiry and
        # the other is unreferenced with an expiry in the recent past.
        librariangc.delete_unreferenced_aliases(self.con)

        # Make sure both our example files are still there
        self.ztm.begin()
        # Our recently expired LibraryFileAlias is still available.
        LibraryFileAlias.get(self.f1_id)
Example #2
0
    def test_cronscript(self):
        script_path = os.path.join(
                config.root, 'cronscripts', 'librarian-gc.py'
                )
        cmd = [sys.executable, script_path, '-q']
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT, stdin=PIPE)
        (script_output, _empty) = process.communicate()
        self.failUnlessEqual(
            process.returncode, 0, 'Error: %s' % script_output)
        self.failUnlessEqual(script_output, '')

        # Make sure that our example files have been garbage collected
        self.ztm.begin()
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f1_id)
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id)

        # And make sure stuff that *is* referenced remains
        LibraryFileAlias.get(2)
        cur = cursor()
        cur.execute("SELECT count(*) FROM LibraryFileAlias")
        count = cur.fetchone()[0]
        self.failIfEqual(count, 0)
        cur.execute("SELECT count(*) FROM LibraryFileContent")
        count = cur.fetchone()[0]
        self.failIfEqual(count, 0)
Example #3
0
    def test_DeleteUnreferencedAliases2(self):
        # Don't delete LibraryFileAliases accessed recently

        # Merge the duplicates. Both our aliases now point to the same
        # LibraryFileContent
        librariangc.merge_duplicates(self.con)

        # We now have two aliases sharing the same content.
        self.ztm.begin()
        f1 = LibraryFileAlias.get(self.f1_id)
        f2 = LibraryFileAlias.get(self.f2_id)
        self.assertEqual(f1.content, f2.content)

        # Flag one of our LibraryFileAliases as being recently created
        f1.date_created = self.recent_past

        del f1
        del f2
        self.ztm.commit()

        # Delete unreferenced LibraryFileAliases. This should remove
        # the alias with the ID self.f2_id, but the other should stay,
        # as it was accessed recently.
        librariangc.delete_unreferenced_aliases(self.con)

        self.ztm.begin()
        LibraryFileAlias.get(self.f1_id)
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id)
Example #4
0
    def test_DeleteUnreferencedAliases(self):
        self.ztm.begin()

        # Confirm that our sample files are there.
        f1 = LibraryFileAlias.get(self.f1_id)
        f2 = LibraryFileAlias.get(self.f2_id)
        # Grab the content IDs related to these
        # unreferenced LibraryFileAliases
        c1_id = f1.contentID
        c2_id = f2.contentID
        del f1, f2
        self.ztm.abort()

        # Delete unreferenced aliases
        librariangc.delete_unreferenced_aliases(self.con)

        # This should have committed
        self.ztm.begin()

        # Confirm that the LibaryFileContents are still there.
        LibraryFileContent.get(c1_id)
        LibraryFileContent.get(c2_id)

        # But the LibraryFileAliases should be gone
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f1_id)
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id)
Example #5
0
    def test_MergeDuplicates(self):
        # Merge the duplicates
        librariangc.merge_duplicates(self.con)

        # merge_duplicates should have committed
        self.ztm.begin()
        self.ztm.abort()

        # Confirm that the duplicates have been merged
        self.ztm.begin()
        f1 = LibraryFileAlias.get(self.f1_id)
        f2 = LibraryFileAlias.get(self.f2_id)
        self.failUnlessEqual(f1.contentID, f2.contentID)
Example #6
0
 def test__getURLForDownload(self):
     # This protected method is used by getFileByAlias. It is supposed to
     # use the internal host and port rather than the external, proxied
     # host and port. This is to provide relief for our own issues with the
     # problems reported in bug 317482.
     #
     # (Set up:)
     client = LibrarianClient()
     alias_id = client.addFile('sample.txt', 6, StringIO('sample'),
                               'text/plain')
     config.push(
         'test config',
         textwrap.dedent('''\
             [librarian]
             download_host: example.org
             download_port: 1234
             '''))
     try:
         # (Test:)
         # The LibrarianClient should use the download_host and
         # download_port.
         expected_host = 'http://example.org:1234/'
         download_url = client._getURLForDownload(alias_id)
         self.assertTrue(
             download_url.startswith(expected_host),
             'expected %s to start with %s' % (download_url, expected_host))
         # If the alias has been deleted, _getURLForDownload returns None.
         lfa = LibraryFileAlias.get(alias_id)
         lfa.content = None
         call = block_implicit_flushes(  # Prevent a ProgrammingError
             LibrarianClient._getURLForDownload)
         self.assertEqual(call(client, alias_id), None)
     finally:
         # (Tear down:)
         config.pop('test config')
Example #7
0
 def test_restricted_getURLForDownload(self):
     # The RestrictedLibrarianClient should use the
     # restricted_download_host and restricted_download_port, but is
     # otherwise identical to the behaviour of the LibrarianClient discussed
     # and demonstrated above.
     #
     # (Set up:)
     client = RestrictedLibrarianClient()
     alias_id = client.addFile('sample.txt', 6, StringIO('sample'),
                               'text/plain')
     config.push(
         'test config',
         textwrap.dedent('''\
             [librarian]
             restricted_download_host: example.com
             restricted_download_port: 5678
             '''))
     try:
         # (Test:)
         # The LibrarianClient should use the download_host and
         # download_port.
         expected_host = 'http://example.com:5678/'
         download_url = client._getURLForDownload(alias_id)
         self.assertTrue(
             download_url.startswith(expected_host),
             'expected %s to start with %s' % (download_url, expected_host))
         # If the alias has been deleted, _getURLForDownload returns None.
         lfa = LibraryFileAlias.get(alias_id)
         lfa.content = None
         call = block_implicit_flushes(  # Prevent a ProgrammingError
             RestrictedLibrarianClient._getURLForDownload)
         self.assertEqual(call(client, alias_id), None)
     finally:
         # (Tear down:)
         config.pop('test config')
Example #8
0
    def getAlias(self, aliasid, token, path):
        """Returns a LibraryFileAlias, or raises LookupError.

        A LookupError is raised if no record with the given ID exists
        or if not related LibraryFileContent exists.

        :param token: The token for the file. If None no token is present.
            When a token is supplied, it is looked up with path.
        :param path: The path the request is for, unused unless a token
            is supplied; when supplied it must match the token. The
            value of path is expected to be that from a twisted request.args
            e.g. /foo/bar.
        """
        restricted = self.restricted
        if token and path:
            # with a token and a path we may be able to serve restricted files
            # on the public port.
            store = session_store()
            token_found = store.find(TimeLimitedToken,
                SQL("age(created) < interval '1 day'"),
                TimeLimitedToken.token == token,
                TimeLimitedToken.path==path).is_empty()
            store.reset()
            if token_found:
                raise LookupError("Token stale/pruned/path mismatch")
            else:
                restricted = True
        alias = LibraryFileAlias.selectOne(And(
            LibraryFileAlias.id == aliasid,
            LibraryFileAlias.contentID == LibraryFileContent.q.id,
            LibraryFileAlias.restricted == restricted))
        if alias is None:
            raise LookupError("No file alias with LibraryFileContent")
        return alias
Example #9
0
    def addAlias(self, fileid, filename, mimetype, expires=None):
        """Add an alias, and return its ID.

        If a matching alias already exists, it will return that ID instead.
        """
        return LibraryFileAlias(
            contentID=fileid, filename=filename, mimetype=mimetype,
            expires=expires, restricted=self.restricted).id
Example #10
0
    def test_deleteWellExpiredAliases(self):
        # LibraryFileAlias records that are expired are unlinked from their
        # content.

        # Flag one of our LibraryFileAliases with an expiry date in the past
        self.ztm.begin()
        f1 = LibraryFileAlias.get(self.f1_id)
        f1.expires = self.ancient_past
        del f1
        self.ztm.commit()

        # Unlink expired LibraryFileAliases.
        librariangc.expire_aliases(self.con)

        self.ztm.begin()
        # Make sure the well expired f1 is still there, but has no content.
        f1 = LibraryFileAlias.get(self.f1_id)
        self.assert_(f1.content is None)
        # f2 should still have content, as it isn't flagged for expiry.
        f2 = LibraryFileAlias.get(self.f2_id)
        self.assert_(f2.content is not None)
Example #11
0
    def getAlias(self, aliasid, token, path):
        """Returns a LibraryFileAlias, or raises LookupError.

        A LookupError is raised if no record with the given ID exists
        or if not related LibraryFileContent exists.

        :param aliasid: A `LibraryFileAlias` ID.
        :param token: The token for the file. If None no token is present.
            When a token is supplied, it is looked up with path.
        :param path: The path the request is for, unused unless a token
            is supplied; when supplied it must match the token. The
            value of path is expected to be that from a twisted request.args
            e.g. /foo/bar.
        """
        restricted = self.restricted
        if token and path:
            # With a token and a path we may be able to serve restricted files
            # on the public port.
            if isinstance(token, Macaroon):
                # Macaroons have enough other constraints that they don't
                # need to be path-specific; it's simpler and faster to just
                # check the alias ID.
                token_ok = threads.blockingCallFromThread(
                    default_reactor, self._verifyMacaroon, token, aliasid)
            else:
                # The URL-encoding of the path may have changed somewhere
                # along the line, so reencode it canonically. LFA.filename
                # can't contain slashes, so they're safe to leave unencoded.
                # And urllib.quote erroneously excludes ~ from its safe set,
                # while RFC 3986 says it should be unescaped and Chromium
                # forcibly decodes it in any URL that it sees.
                #
                # This needs to match url_path_quote.
                normalised_path = urllib.quote(urllib.unquote(path),
                                               safe='/~+')
                store = session_store()
                token_ok = not store.find(
                    TimeLimitedToken, SQL("age(created) < interval '1 day'"),
                    TimeLimitedToken.token
                    == hashlib.sha256(token).hexdigest(), TimeLimitedToken.path
                    == normalised_path).is_empty()
                store.reset()
            if token_ok:
                restricted = True
            else:
                raise LookupError("Token stale/pruned/path mismatch")
        alias = LibraryFileAlias.selectOne(
            And(LibraryFileAlias.id == aliasid,
                LibraryFileAlias.contentID == LibraryFileContent.q.id,
                LibraryFileAlias.restricted == restricted))
        if alias is None:
            raise LookupError("No file alias with LibraryFileContent")
        return alias
Example #12
0
    def test_ignoreRecentlyExpiredAliases(self):
        # LibraryFileAlias records that have expired recently are not
        # garbage collected.

        # Flag one of our LibraryFileAliases with an expiry date in the
        # recent past.
        self.ztm.begin()
        f1 = LibraryFileAlias.get(self.f1_id)
        f1.expires = self.recent_past  # Within stay of execution.
        del f1
        self.ztm.commit()

        # Unlink expired LibraryFileAliases.
        librariangc.expire_aliases(self.con)

        self.ztm.begin()
        # Make sure f1 is still there and has content. This ensures that
        # our stay of execution is still working.
        f1 = LibraryFileAlias.get(self.f1_id)
        self.assert_(f1.content is not None)
        # f2 should still have content, as it isn't flagged for expiry.
        f2 = LibraryFileAlias.get(self.f2_id)
        self.assert_(f2.content is not None)
Example #13
0
    def test_addFile_hashes(self):
        # addFile() sets the MD5, SHA-1 and SHA-256 hashes on the
        # LibraryFileContent record.
        data = "i am some data"
        md5 = hashlib.md5(data).hexdigest()
        sha1 = hashlib.sha1(data).hexdigest()
        sha256 = hashlib.sha256(data).hexdigest()

        client = LibrarianClient()
        lfa = LibraryFileAlias.get(client.addFile("file", len(data), StringIO(data), "text/plain"))

        self.assertEqual(md5, lfa.content.md5)
        self.assertEqual(sha1, lfa.content.sha1)
        self.assertEqual(sha256, lfa.content.sha256)
Example #14
0
    def _makeDupes(self):
        """Create two duplicate LibraryFileContent entries with one
        LibraryFileAlias each. Return the two LibraryFileAlias ids as a
        tuple.
        """
        # Connect to the database as a user with file upload privileges,
        # in this case the PostgreSQL default user who happens to be an
        # administrator on launchpad development boxes.
        switch_dbuser('testadmin')
        ztm = self.layer.txn

        ztm.begin()
        # Add some duplicate files
        content = 'This is some content'
        f1_id = self.client.addFile(
                'foo.txt', len(content), StringIO(content), 'text/plain',
                )
        f1 = LibraryFileAlias.get(f1_id)
        f2_id = self.client.addFile(
                'foo.txt', len(content), StringIO(content), 'text/plain',
                )
        f2 = LibraryFileAlias.get(f2_id)

        # Make sure the duplicates really are distinct
        self.failIfEqual(f1_id, f2_id)
        self.failIfEqual(f1.contentID, f2.contentID)

        f1.date_created = self.ancient_past
        f2.date_created = self.ancient_past
        f1.content.datecreated = self.ancient_past
        f2.content.datecreated = self.ancient_past

        del f1, f2

        ztm.commit()

        return f1_id, f2_id
Example #15
0
    def test_addFile_hashes(self):
        # addFile() sets the MD5, SHA-1 and SHA-256 hashes on the
        # LibraryFileContent record.
        data = 'i am some data'
        md5 = hashlib.md5(data).hexdigest()
        sha1 = hashlib.sha1(data).hexdigest()
        sha256 = hashlib.sha256(data).hexdigest()

        client = LibrarianClient()
        lfa = LibraryFileAlias.get(
            client.addFile('file', len(data), StringIO(data), 'text/plain'))

        self.assertEqual(md5, lfa.content.md5)
        self.assertEqual(sha1, lfa.content.sha1)
        self.assertEqual(sha256, lfa.content.sha256)
def prepareHoaryForUploads(test):
    """Prepare ubuntu/hoary to receive uploads.

    Ensure ubuntu/hoary is ready to receive and build new uploads in
    the RELEASE pocket (they are auto-overridden to the 'universe'
    component).
    """
    ubuntu = getUtility(IDistributionSet)['ubuntu']
    hoary = ubuntu['hoary']

    # Allow uploads to the universe component.
    universe = getUtility(IComponentSet)['universe']
    ComponentSelection(distroseries=hoary, component=universe)

    # Create a fake hoary/i386 chroot.
    fake_chroot = LibraryFileAlias.get(1)
    hoary['i386'].addOrUpdateChroot(fake_chroot)

    LaunchpadZopelessLayer.txn.commit()
Example #17
0
    def _getAlias(self, aliasID, secure=False):
        """Retrieve the `LibraryFileAlias` with the given id.

        :param aliasID: A unique ID for the alias.
        :param secure: Controls the behaviour when looking up restricted
            files.  If False restricted files are only permitted when
            self.restricted is True.  See `getURLForAlias`.
        :returns: A `LibraryFileAlias`.
        :raises: `DownloadFailed` if the alias is invalid or
            inaccessible.
        """
        from lp.services.librarian.model import LibraryFileAlias
        from sqlobject import SQLObjectNotFound
        try:
            lfa = LibraryFileAlias.get(aliasID)
        except SQLObjectNotFound:
            lfa = None

        if lfa is None:
            raise DownloadFailed('Alias %d not found' % aliasID)
        self._checkAliasAccess(lfa, secure=secure)

        return lfa
Example #18
0
    def test_DeleteUnreferencedAndWellExpiredAliases(self):
        # LibraryFileAliases can be removed after they have expired

        # Merge the duplicates. Both our aliases now point to the same
        # LibraryFileContent
        librariangc.merge_duplicates(self.con)

        # Flag one of our LibraryFileAliases with an expiry date in the past
        self.ztm.begin()
        f1 = LibraryFileAlias.get(self.f1_id)
        f1.expires = self.ancient_past
        del f1
        self.ztm.commit()

        # Delete unreferenced LibraryFileAliases. This should remove our
        # example aliases, as one is unreferenced with a NULL expiry and
        # the other is unreferenced with an expiry in the past.
        librariangc.delete_unreferenced_aliases(self.con)

        # Make sure both our example files are gone
        self.ztm.begin()
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f1_id)
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id)
Example #19
0
    def _getAlias(self, aliasID, secure=False):
        """Retrieve the `LibraryFileAlias` with the given id.

        :param aliasID: A unique ID for the alias.
        :param secure: Controls the behaviour when looking up restricted
            files.  If False restricted files are only permitted when
            self.restricted is True.  See `getURLForAlias`.
        :returns: A `LibraryFileAlias`.
        :raises: `DownloadFailed` if the alias is invalid or
            inaccessible.
        """
        from lp.services.librarian.model import LibraryFileAlias
        from sqlobject import SQLObjectNotFound
        try:
            lfa = LibraryFileAlias.get(aliasID)
        except SQLObjectNotFound:
            lfa = None

        if lfa is None:
            raise DownloadFailed('Alias %d not found' % aliasID)
        self._checkAliasAccess(lfa, secure=secure)

        return lfa
Example #20
0
 def test__getURLForDownload(self):
     # This protected method is used by getFileByAlias. It is supposed to
     # use the internal host and port rather than the external, proxied
     # host and port. This is to provide relief for our own issues with the
     # problems reported in bug 317482.
     #
     # (Set up:)
     client = LibrarianClient()
     alias_id = client.addFile("sample.txt", 6, StringIO("sample"), "text/plain")
     config.push(
         "test config",
         textwrap.dedent(
             """\
             [librarian]
             download_host: example.org
             download_port: 1234
             """
         ),
     )
     try:
         # (Test:)
         # The LibrarianClient should use the download_host and
         # download_port.
         expected_host = "http://example.org:1234/"
         download_url = client._getURLForDownload(alias_id)
         self.failUnless(
             download_url.startswith(expected_host), "expected %s to start with %s" % (download_url, expected_host)
         )
         # If the alias has been deleted, _getURLForDownload returns None.
         lfa = LibraryFileAlias.get(alias_id)
         lfa.content = None
         call = block_implicit_flushes(LibrarianClient._getURLForDownload)  # Prevent a ProgrammingError
         self.assertEqual(call(client, alias_id), None)
     finally:
         # (Tear down:)
         config.pop("test config")
Example #21
0
 def test_restricted_getURLForDownload(self):
     # The RestrictedLibrarianClient should use the
     # restricted_download_host and restricted_download_port, but is
     # otherwise identical to the behavior of the LibrarianClient discussed
     # and demonstrated above.
     #
     # (Set up:)
     client = RestrictedLibrarianClient()
     alias_id = client.addFile("sample.txt", 6, StringIO("sample"), "text/plain")
     config.push(
         "test config",
         textwrap.dedent(
             """\
             [librarian]
             restricted_download_host: example.com
             restricted_download_port: 5678
             """
         ),
     )
     try:
         # (Test:)
         # The LibrarianClient should use the download_host and
         # download_port.
         expected_host = "http://example.com:5678/"
         download_url = client._getURLForDownload(alias_id)
         self.failUnless(
             download_url.startswith(expected_host), "expected %s to start with %s" % (download_url, expected_host)
         )
         # If the alias has been deleted, _getURLForDownload returns None.
         lfa = LibraryFileAlias.get(alias_id)
         lfa.content = None
         call = block_implicit_flushes(RestrictedLibrarianClient._getURLForDownload)  # Prevent a ProgrammingError
         self.assertEqual(call(client, alias_id), None)
     finally:
         # (Tear down:)
         config.pop("test config")
Example #22
0
    def addFile(self,
                name,
                size,
                file,
                contentType,
                expires=None,
                debugID=None,
                allow_zero_length=False):
        """Add a file to the librarian.

        :param name: Name to store the file as
        :param size: Size of the file
        :param file: File-like object with the content in it
        :param contentType: mime-type, e.g. text/plain
        :param expires: Expiry time of file. See LibrarianGarbageCollection.
            Set to None to only expire when it is no longer referenced.
        :param debugID: Optional.  If set, causes extra logging for this
            request on the server, which will be marked with the value
            given.
        :param allow_zero_length: If True permit zero length files.
        :returns: aliasID as an integer
        :raises UploadFailed: If the server rejects the upload for some
            reason.
        """
        if file is None:
            raise TypeError('Bad File Descriptor: %s' % repr(file))
        if allow_zero_length:
            min_size = -1
        else:
            min_size = 0
        if size <= min_size:
            raise UploadFailed('Invalid length: %d' % size)

        name = six.ensure_binary(name)

        # Import in this method to avoid a circular import
        from lp.services.librarian.model import LibraryFileContent
        from lp.services.librarian.model import LibraryFileAlias

        self._connect()
        try:
            # Get the name of the database the client is using, so that
            # the server can check that the client is using the same
            # database as the server.
            store = IMasterStore(LibraryFileAlias)
            databaseName = self._getDatabaseName(store)

            # Generate new content and alias IDs.
            # (we'll create rows with these IDs later, but not yet)
            contentID = store.execute(
                "SELECT nextval('libraryfilecontent_id_seq')").get_one()[0]
            aliasID = store.execute(
                "SELECT nextval('libraryfilealias_id_seq')").get_one()[0]

            # Send command
            self._sendLine('STORE %d %s' % (size, name))

            # Send headers
            self._sendHeader('Database-Name', databaseName)
            self._sendHeader('File-Content-ID', contentID)
            self._sendHeader('File-Alias-ID', aliasID)

            if debugID is not None:
                self._sendHeader('Debug-ID', debugID)

            # Send blank line. Do not check for a response from the
            # server when no data will be sent. Otherwise
            # _checkError() might consume the "200" response which
            # is supposed to be read below in this method.
            self._sendLine('', check_for_error_responses=(size > 0))

            # Prepare to the upload the file
            md5_digester = hashlib.md5()
            sha1_digester = hashlib.sha1()
            sha256_digester = hashlib.sha256()
            bytesWritten = 0

            # Read in and upload the file 64kb at a time, by using the two-arg
            # form of iter (see
            # /usr/share/doc/python/html/library/functions.html#iter).
            for chunk in iter(lambda: file.read(1024 * 64), ''):
                self.state.f.write(chunk)
                bytesWritten += len(chunk)
                md5_digester.update(chunk)
                sha1_digester.update(chunk)
                sha256_digester.update(chunk)

            assert bytesWritten == size, (
                'size is %d, but %d were read from the file' %
                (size, bytesWritten))
            self.state.f.flush()

            # Read response
            response = self.state.f.readline().strip()
            if response != '200':
                raise UploadFailed('Server said: ' + response)

            # Add rows to DB
            content = LibraryFileContent(id=contentID,
                                         filesize=size,
                                         sha256=sha256_digester.hexdigest(),
                                         sha1=sha1_digester.hexdigest(),
                                         md5=md5_digester.hexdigest())
            LibraryFileAlias(id=aliasID,
                             content=content,
                             filename=name.decode('UTF-8'),
                             mimetype=contentType,
                             expires=expires,
                             restricted=self.restricted)

            Store.of(content).flush()

            assert isinstance(aliasID, (int, long)), \
                    "aliasID %r not an integer" % (aliasID, )
            return aliasID
        finally:
            self._close()