Beispiel #1
0
class url_source(source):
    """
    A simple URL based source that parses HTML to find references to
    kernel files.
    """
    _extension_pattern = re.compile(r'.*\.[^/.]+$')

    def __init__(self, database, prefix):
        super(url_source, self).__init__(database)
        self.prefix = prefix
        self.urls = []


    def add_url(self, url, pattern):
        """
        Add a URL path to a HTML document with links to kernel files.

        @param url: URL path to a HTML file with links to kernel files
                (can be either an absolute URL or one relative to self.prefix)
        @param pattern: regex pattern to filter kernel files links out of
                all othe links found in the HTML document
        """
        # if it does not have an extension then it's a directory and it needs
        # a trailing '/'. NOTE: there are some false positives such as
        # directories named "v2.6" where ".6" will be assumed to be extension.
        # In order for these to work the caller must provide a trailing /
        if url[-1:] != '/' and not self._extension_pattern.match(url):
            url = url + '/'
        self.urls.append((url, re.compile(pattern)))


    @staticmethod
    def _get_item(url):
        """
        Get a database.item object by fetching relevant HTTP information
        from the document pointed to by the given url.
        """
        try:
            info = urllib2.urlopen(url).info()
        except IOError, err:
            # file is referenced but does not exist
            print 'WARNING: %s' % err
            return None

        size = info.get('content-length')
        if size:
            size = int(size)
        else:
            size = -1

        timestamp = int(time.mktime(info.getdate('date')))
        if not timestamp:
            timestamp = 0

        return database.item(url, size, timestamp)
Beispiel #2
0
    def _setup_merge_dictionary(self):
        # setup
        db = database.dict_database(self._path)
        self.god.stub_function(db, 'get_dictionary')
        self.god.stub_function(db, '_aquire_lock')

        new_files = {
            'file3': database.item('file3', 30, 30000),
            'file4': database.item('file4', 40, 40000),
        }
        all_files = dict(self._db_contents)
        all_files.update(new_files)

        # record
        db._aquire_lock.expect_call().and_return(3)
        db.get_dictionary.expect_call().and_return(self._db_contents)
        (database.tempfile.mkstemp.expect_call(prefix=self._path, dir='')
         .and_return((4, 'tmpfile')))
        database.os.fdopen.expect_call(4, 'wb').and_return(self._file_instance)

        return db, new_files, all_files
    def _setup_merge_dictionary(self):
        # setup
        db = database.dict_database(self._path)
        self.god.stub_function(db, 'get_dictionary')
        self.god.stub_function(db, '_aquire_lock')

        new_files = {
            'file3': database.item('file3', 30, 30000),
            'file4': database.item('file4', 40, 40000),
            }
        all_files = dict(self._db_contents)
        all_files.update(new_files)

        # record
        db._aquire_lock.expect_call().and_return(3)
        db.get_dictionary.expect_call().and_return(self._db_contents)
        (database.tempfile.mkstemp.expect_call(prefix=self._path, dir='')
                .and_return((4, 'tmpfile')))
        database.os.fdopen.expect_call(4, 'wb').and_return(self._file_instance)

        return db, new_files, all_files
Beispiel #4
0
    def _parse_output(self, output, prefix):
        """
        Parse rsync's "ls -l" style output and return a dictionary of
        database.item indexed by the "name" field.
        """
        regex = re.compile(
            '-[rwx-]{9} +(\d+) (\d{4}/\d\d/\d\d \d\d:\d\d:\d\d) (.*)')
        res = {}
        for line in output.splitlines():
            match = regex.match(line)
            if match:
                groups = match.groups()
                timestamp = time.mktime(time.strptime(groups[1],
                                        '%Y/%m/%d %H:%M:%S'))
                if prefix:
                    fname = '%s/%s' % (prefix, groups[2])
                else:
                    fname = groups[2]

                item = database.item(fname, int(groups[0]), int(timestamp))
                res[item.name] = item

        return res
Beispiel #5
0
    def _get_item(url):
        """
        Get a database.item object by fetching relevant HTTP information
        from the document pointed to by the given url.
        """
        try:
            info = urllib2.urlopen(url).info()
        except IOError as err:
            # file is referenced but does not exist
            print 'WARNING: %s' % err
            return None

        size = info.get('content-length')
        if size:
            size = int(size)
        else:
            size = -1

        timestamp = int(time.mktime(info.getdate('date')))
        if not timestamp:
            timestamp = 0

        return database.item(url, size, timestamp)
Beispiel #6
0
    def get_new_files(self, _stat_func=os.stat):
        """
        Main function, see source.get_new_files().

        @param _stat_func: Used for unit testing, if we stub os.stat in the
                unit test then unit test failures get reported confusingly
                because the unit test framework tries to stat() the unit test
                file.
        """
        all_files = {}
        for filename in os.listdir(self._path):
            full_filename = os.path.join(self._path, filename)
            try:
                stat_data = _stat_func(full_filename)
            except OSError:
                # File might have been removed/renamed since we listed the
                # directory so skip it.
                continue

            item = database.item(full_filename, stat_data.st_size,
                                 int(stat_data.st_mtime))
            all_files[filename] = item

        return self._get_new_files(all_files)
Beispiel #7
0
class dict_database_unittest(unittest.TestCase):
    _path = 'somepath.db'

    _db_contents = {
        'file1': database.item('file1', 10, 10000),
        'file2': database.item('file2', 20, 20000),
    }

    def setUp(self):
        self.god = mock.mock_god()

        self.god.stub_function(database.cPickle, 'load')
        self.god.stub_function(database.cPickle, 'dump')
        self.god.stub_function(database.tempfile, 'mkstemp')
        self.god.stub_function(database.os, 'fdopen')
        self.god.stub_function(database.os, 'close')
        self.god.stub_function(database.os, 'rename')
        self.god.stub_function(database.os, 'unlink')
        self._open_mock = self.god.create_mock_function('open')
        self._file_instance = self.god.create_mock_class(file, 'file')

    def tearDown(self):
        self.god.unstub_all()

    def test_get_dictionary_no_file(self):
        # record
        (self._open_mock.expect_call(self._path,
                                     'rb').and_raises(IOError('blah')))

        # playback
        db = database.dict_database(self._path)
        self.assertEqual(db.get_dictionary(_open_func=self._open_mock), {})

        self.god.check_playback()

    def test_get_dictionary(self):
        # record
        (self._open_mock.expect_call(self._path,
                                     'rb').and_return(self._file_instance))
        (database.cPickle.load.expect_call(self._file_instance).and_return(
            self._db_contents))
        self._file_instance.close.expect_call()

        # playback
        db = database.dict_database(self._path)
        self.assertEqual(db.get_dictionary(_open_func=self._open_mock),
                         self._db_contents)

        self.god.check_playback()

    def _setup_merge_dictionary(self):
        # setup
        db = database.dict_database(self._path)
        self.god.stub_function(db, 'get_dictionary')
        self.god.stub_function(db, '_aquire_lock')

        new_files = {
            'file3': database.item('file3', 30, 30000),
            'file4': database.item('file4', 40, 40000),
        }
        all_files = dict(self._db_contents)
        all_files.update(new_files)

        # record
        db._aquire_lock.expect_call().and_return(3)
        db.get_dictionary.expect_call().and_return(self._db_contents)
        (database.tempfile.mkstemp.expect_call(prefix=self._path,
                                               dir='').and_return(
                                                   (4, 'tmpfile')))
        database.os.fdopen.expect_call(4, 'wb').and_return(self._file_instance)

        return db, new_files, all_files

    def test_merge_dictionary(self):
        db, new_files, all_files = self._setup_merge_dictionary()

        database.cPickle.dump.expect_call(
            all_files,
            self._file_instance,
            protocol=database.cPickle.HIGHEST_PROTOCOL)
        self._file_instance.close.expect_call()
        database.os.rename.expect_call('tmpfile', self._path)
        database.os.close.expect_call(3)

        # playback
        db.merge_dictionary(new_files)
        self.god.check_playback()

    def test_merge_dictionary_disk_full(self):
        err = Exception('fail')
        db, new_files, all_files = self._setup_merge_dictionary()

        database.cPickle.dump.expect_call(
            all_files,
            self._file_instance,
            protocol=database.cPickle.HIGHEST_PROTOCOL).and_raises(err)
        self._file_instance.close.expect_call().and_raises(err)
        database.os.unlink.expect_call('tmpfile')
        database.os.close.expect_call(3)

        # playback
        self.assertRaises(Exception, db.merge_dictionary, new_files)
        self.god.check_playback()