def test_slave_reconnect_after_outage(self): '''The slave is again used once it becomes available.''' self.pgbouncer_fixture.stop() master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIs(master_store, slave_store) self.pgbouncer_fixture.start() transaction.abort() master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIsNot(master_store, slave_store)
def test_reportApprovalConflict_sets_error_output_just_once(self): # Repeated occurrence of the same approval conflict will not # result in repeated setting of error_output. series = self.factory.makeProductSeries() domain = self.factory.getUniqueString() templates = [ self.factory.makePOTemplate(productseries=series, translation_domain=domain) for counter in range(3) ] entry = removeSecurityProxy( self.factory.makeTranslationImportQueueEntry()) entry.reportApprovalConflict(domain, len(templates), templates) original_error = entry.error_output transaction.commit() # Try reporting the conflict again, with the templates # reshuffled to see if reportApprovalConflict can be fooled into # thinking it's a different error. Make as sure as we can that # entry.error_output is not modified. slave_entry = ISlaveStore(entry).get(TranslationImportQueueEntry, entry.id) slave_entry.setErrorOutput = FakeMethod() slave_entry.reportApprovalConflict(domain, len(templates), reversed(templates)) self.assertEqual(original_error, slave_entry.error_output) self.assertIn(domain, original_error) self.assertEqual(0, slave_entry.setErrorOutput.call_count)
def main(self): """See `LaunchpadScript`.""" # Avoid circular imports. from lp.registry.model.product import Product from lp.registry.model.productseries import ProductSeries errorlog.globalErrorUtility.configure(self.config_name) if self.options.no_fudge: self.fudge_factor = timedelta(0) self.logger.info("Exporting to translations branches.") self.store = ISlaveStore(Product) product_join = Join( ProductSeries, Product, ProductSeries.product == Product.id) productseries = self.store.using(product_join).find( ProductSeries, And( Product.translations_usage == ServiceUsage.LAUNCHPAD, ProductSeries.translations_branch != None)) # Anything deterministic will do, and even that is only for # testing. productseries = productseries.order_by(ProductSeries.id) bzrserver = get_rw_server() bzrserver.start_server() try: self._exportToBranches(productseries) finally: bzrserver.stop_server()
def test_exportToStaleBranch(self): # Attempting to export to a stale branch marks it for scanning. self.useBzrBranches(direct_database=False) exporter = ExportTranslationsToBranch(test_args=[]) exporter.logger = BufferLogger() productseries = self.factory.makeProductSeries() db_branch, tree = self.create_branch_and_tree( product=productseries.product) removeSecurityProxy(productseries).translations_branch = db_branch db_branch.last_mirrored_id = 'stale-id' db_branch.last_scanned_id = db_branch.last_mirrored_id self.becomeDbUser('translationstobranch') self.assertFalse(db_branch.pending_writes) self.assertNotEqual(db_branch.last_mirrored_id, tree.branch.last_revision()) # The export code works on a Branch from the slave store. It # shouldn't stop the scan request. slave_series = ISlaveStore(productseries).get(ProductSeries, productseries.id) exporter._exportToBranch(slave_series) self.assertEqual(db_branch.last_mirrored_id, tree.branch.last_revision()) self.assertTrue(db_branch.pending_writes) matches = MatchesRegex( "(.|\n)*WARNING Skipped .* due to stale DB info, and scheduled a " "new scan.") self.assertThat(exporter.logger.getLogBuffer(), matches)
def fetch_team_participation_info(log): """Fetch people, teams, memberships and participations.""" slurp = partial(execute_long_query, ISlaveStore(TeamParticipation), log, 10000) people = dict( slurp("SELECT id, name FROM Person" " WHERE teamowner IS NULL" " AND merged IS NULL")) teams = dict( slurp("SELECT id, name FROM Person" " WHERE teamowner IS NOT NULL" " AND merged IS NULL")) team_memberships = defaultdict(set) results = slurp("SELECT team, person FROM TeamMembership" " WHERE status in %s" % quote(ACTIVE_STATES)) for (team, person) in results: team_memberships[team].add(person) team_participations = defaultdict(set) results = slurp("SELECT team, person FROM TeamParticipation") for (team, person) in results: team_participations[team].add(person) # Don't hold any locks. transaction.commit() return people, teams, team_memberships, team_participations
def fetchProjectsForDisplay(self): """See `ITranslationGroup`.""" # Avoid circular imports. from lp.registry.model.product import ( Product, ProductWithLicenses, ) using = [ Product, LeftJoin(LibraryFileAlias, LibraryFileAlias.id == Product.iconID), LeftJoin( LibraryFileContent, LibraryFileContent.id == LibraryFileAlias.contentID), ] columns = ( Product, ProductWithLicenses.composeLicensesColumn(), LibraryFileAlias, LibraryFileContent, ) product_data = ISlaveStore(Product).using(*using).find( columns, Product.translationgroupID == self.id, Product.active == True) product_data = product_data.order_by(Product.displayname) return [ ProductWithLicenses(product, tuple(licenses)) for product, licenses, icon_alias, icon_content in product_data]
def _put(log, swift_connection, lfc_id, container, obj_name, fs_path): fs_size = os.path.getsize(fs_path) fs_file = HashStream(open(fs_path, 'rb')) db_md5_hash = ISlaveStore(LibraryFileContent).get(LibraryFileContent, lfc_id).md5 assert hasattr(fs_file, 'tell') and hasattr(fs_file, 'seek'), ''' File not rewindable ''' if fs_size <= MAX_SWIFT_OBJECT_SIZE: swift_md5_hash = swift_connection.put_object(container, obj_name, fs_file, fs_size) disk_md5_hash = fs_file.hash.hexdigest() if not (disk_md5_hash == db_md5_hash == swift_md5_hash): log.error("LibraryFileContent({0}) corrupt. " "disk md5={1}, db md5={2}, swift md5={3}".format( lfc_id, disk_md5_hash, db_md5_hash, swift_md5_hash)) try: swift_connection.delete_object(container, obj_name) except Exception: log.exception('Failed to delete corrupt file from Swift') raise AssertionError('md5 mismatch') else: # Large file upload. Create the segments first, then the # manifest. This order prevents partial downloads, and lets us # detect interrupted uploads and clean up. segment = 0 while fs_file.tell() < fs_size: assert segment <= 9999, 'Insane number of segments' seg_name = '%s/%04d' % (obj_name, segment) seg_size = min(fs_size - fs_file.tell(), MAX_SWIFT_OBJECT_SIZE) md5_stream = HashStream(fs_file, length=seg_size) swift_md5_hash = swift_connection.put_object( container, seg_name, md5_stream, seg_size) segment_md5_hash = md5_stream.hash.hexdigest() assert swift_md5_hash == segment_md5_hash, ( "LibraryFileContent({0}) segment {1} upload corrupted".format( lfc_id, segment)) segment = segment + 1 disk_md5_hash = fs_file.hash.hexdigest() if disk_md5_hash != db_md5_hash: # We don't have to delete the uploaded segments, as Librarian # Garbage Collection handles this for us. log.error("Large LibraryFileContent({0}) corrupt. " "disk md5={1}, db_md5={2}".format( lfc_id, disk_md5_hash, db_md5_hash)) raise AssertionError('md5 mismatch') manifest = '{0}/{1}/'.format(urllib.quote(container), urllib.quote(obj_name)) manifest_headers = {'X-Object-Manifest': manifest} swift_connection.put_object(container, obj_name, '', 0, headers=manifest_headers)
def test_startup_with_no_slave(self): '''An attempt is made for the first time to connect to a slave.''' self.pgbouncer_fixture.stop() master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) # The master and slave Stores are the same object. self.assertIs(master_store, slave_store)
def get_distroseries_pofiles(self, series, date=None, component=None, languagepack=None): """See `IVPOExport`. Selects `POFiles` based on the 'series', last modified 'date', archive 'component', and whether it belongs to a 'languagepack' """ tables = [ POFile, POTemplate, ] conditions = [ POTemplate.distroseries == series, POTemplate.iscurrent == True, POFile.potemplate == POTemplate.id, ] if date is not None: conditions.append( Or(POTemplate.date_last_updated > date, POFile.date_changed > date)) if component is not None: tables.extend([ SourcePackagePublishingHistory, Component, ]) conditions.extend([ SourcePackagePublishingHistory.distroseries == series, SourcePackagePublishingHistory.component == Component.id, POTemplate.sourcepackagename == SourcePackagePublishingHistory.sourcepackagenameID, Component.name == component, SourcePackagePublishingHistory.dateremoved == None, SourcePackagePublishingHistory.archive == series.main_archive, ]) if languagepack: conditions.append(POTemplate.languagepack == True) # Use the slave store. We may want to write to the distroseries # to register a language pack, but not to the translation data # we retrieve for it. query = ISlaveStore(POFile).using(*tables).find( POFile, And(*conditions)) # Order by POTemplate. Caching in the export scripts can be # much more effective when consecutive POFiles belong to the # same POTemplate, e.g. they'll have the same POTMsgSets. sort_list = [POFile.potemplateID, POFile.languageID] return query.order_by(sort_list).config(distinct=True)
def determineCandidates(self): """Find all distinct BugTask targets with their cached names. Returns a list of (target, set_of_cached_names) pairs, where target is a tuple of IDs from the columns in target_columns. """ store = ISlaveStore(BugTask) candidate_set = store.find(target_columns).config(distinct=True) candidates = defaultdict(set) for candidate in candidate_set: candidates[candidate[:-1]].add(candidate[-1]) return list(candidates.iteritems())
def test_slave_shutdown_between_transactions(self): '''Slave is shutdown in between transactions.''' master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIsNot(master_store, slave_store) transaction.abort() self.pgbouncer_fixture.stop() # The process doesn't notice the slave going down, and things # will fail the next time the slave is used. master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIsNot(master_store, slave_store) self.assertRaises(DisconnectionError, slave_store.get, Person, 1) # But now it has been discovered the socket is no longer # connected to anything, next transaction we get a master # Store when we ask for a slave. master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIs(master_store, slave_store)
def test_slave_shutdown_during_transaction(self): '''Slave is shutdown while running, but we can recover.''' master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIsNot(master_store, slave_store) self.pgbouncer_fixture.stop() # The transaction fails if the slave store is used. Robust # processes will handle this and retry (even if just means exit # and wait for the next scheduled invocation). self.assertRaises(DisconnectionError, slave_store.get, Person, 1) transaction.abort() # But in the next transaction, we get the master Store if we ask # for the slave Store so we can continue. master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) self.assertIs(master_store, slave_store)
def search(self, text): """See `ILanguageSet`.""" if text: text = ensure_unicode(text).lower() results = ISlaveStore(Language).find( Language, Or(Language.code.lower().contains_string(text), Language.englishname.lower().contains_string( text))).order_by(Language.englishname) else: results = None return results
def test_addFile_uses_master(self): # addFile is a write operation, so it should always use the # master store, even if the slave is the default. Close the # slave store and try to add a file, verifying that the master # is used. client = LibrarianClient() ISlaveStore(LibraryFileAlias).close() with SlaveDatabasePolicy(): alias_id = client.addFile('sample.txt', 6, StringIO('sample'), 'text/plain') transaction.commit() f = client.getFileByAlias(alias_id) self.assertEqual(f.read(), 'sample')
def _getHeadRequest(self): """Return oldest request on the queue.""" # Due to replication lag, it's possible that the slave store # still has copies of requests that have already been completed # and deleted from the master store. So first get the oldest # request that is "live," i.e. still present on the master # store. oldest_live = self._getOldestLiveRequest() if oldest_live is None: return None else: return ISlaveStore(POExportRequest).find( POExportRequest, POExportRequest.id == oldest_live.id).one()
def test_can_shutdown_slave_only(self): '''Confirm that this TestCase's test infrastructure works as needed. ''' master_store = IMasterStore(Person) slave_store = ISlaveStore(Person) # Both Stores work when pgbouncer is up. master_store.get(Person, 1) slave_store.get(Person, 1) # Slave Store breaks when pgbouncer is torn down. Master Store # is fine. self.pgbouncer_fixture.stop() master_store.get(Person, 2) self.assertRaises(DisconnectionError, slave_store.get, Person, 2)
def test_load_with_store(self): # load() can use an alternative store. db_object = self.factory.makeComponent() # Commit so the database object is available in both master # and slave stores. transaction.commit() # Master store. master_store = IMasterStore(db_object) [db_object_from_master] = bulk.load(Component, [db_object.id], store=master_store) self.assertEqual(Store.of(db_object_from_master), master_store) # Slave store. slave_store = ISlaveStore(db_object) [db_object_from_slave] = bulk.load(Component, [db_object.id], store=slave_store) self.assertEqual(Store.of(db_object_from_slave), slave_store)
def check_teamparticipation_circular(log): """Check circular references. There can be no mutual participation between teams. """ query = """ SELECT tp.team, tp2.team FROM TeamParticipation AS tp, TeamParticipation AS tp2 WHERE tp.team = tp2.person AND tp.person = tp2.team AND tp.id != tp2.id; """ circular_references = list(ISlaveStore(TeamParticipation).execute(query)) if len(circular_references) > 0: raise LaunchpadScriptFailure("Circular references found: %s" % circular_references)
def getBuildQueueSizes(self): """See `IBuilderSet`.""" results = ISlaveStore(BuildQueue).find( (Count(), Sum(BuildQueue.estimated_duration), Processor, Coalesce(BuildQueue.virtualized, True)), Processor.id == BuildQueue.processorID, BuildQueue.status == BuildQueueStatus.WAITING).group_by( Processor, Coalesce(BuildQueue.virtualized, True)) result_dict = {'virt': {}, 'nonvirt': {}} for size, duration, processor, virtualized in results: if virtualized is False: virt_str = 'nonvirt' else: virt_str = 'virt' result_dict[virt_str][processor.name] = (size, duration) return result_dict
def getProductsWithInfo(num_products=None): """See `IBranchCloud`.""" distinct_revision_author = Func("distinct", RevisionCache.revision_author_id) commits = Alias(Count(RevisionCache.revision_id)) epoch = datetime.now(pytz.UTC) - timedelta(days=30) # It doesn't matter if this query is even a whole day out of date, so # use the slave store. result = ISlaveStore(RevisionCache).find( (Product.name, commits, Count(distinct_revision_author), Max(RevisionCache.revision_date)), RevisionCache.product == Product.id, Not(RevisionCache.private), RevisionCache.revision_date >= epoch) result = result.group_by(Product.name) result = result.order_by(Desc(commits)) if num_products: result.config(limit=num_products) return result
def test_gen_reload_queries_with_mixed_stores(self): # gen_reload_queries() returns one query for each distinct # store even for the same object type. db_object = self.factory.makeComponent() db_object_type = bulk.get_type(db_object) # Commit so the database object is available in both master # and slave stores. transaction.commit() db_objects = set( (IMasterStore(db_object).get(db_object_type, db_object.id), ISlaveStore(db_object).get(db_object_type, db_object.id))) db_queries = list(bulk.gen_reload_queries(db_objects)) self.failUnlessEqual(2, len(db_queries)) db_objects_loaded = set() for db_query in db_queries: objects = set(db_query) # None of these objects should have been loaded before. self.failUnlessEqual(set(), objects.intersection(db_objects_loaded)) db_objects_loaded.update(objects) self.failUnlessEqual(db_objects, db_objects_loaded)
def rough_length(self): """See `IRangeFactory.""" from lp.services.librarian.model import LibraryFileAlias # get_select_expr() requires at least one column as a parameter. # getorderBy() already knows about columns that can appear # in the result set, so let's use them. Moreover, for SELECT # DISTINCT queries, each column used for sorting must appear # in the result. if self.empty_resultset: return 0 columns = [plain_expression(column) for column in self.getOrderBy()] select = removeSecurityProxy( self.plain_resultset).get_select_expr(*columns) explain = 'EXPLAIN ' + convert_storm_clause_to_string(select) result = ISlaveStore(LibraryFileAlias).execute(explain) _rows_re = re.compile("rows=(\d+)\swidth=") first_line = result.get_one()[0] match = _rows_re.search(first_line) if match is None: raise RuntimeError("Unexpected EXPLAIN output %s" % repr(first_line)) return int(match.group(1))
def check_preconditions(options): """Try to ensure that it's safe to run. This script must not run on a production server, or anything remotely like it. """ store = ISlaveStore(ComponentSelection) # Just a guess, but dev systems aren't likely to have ids this high # in this table. Production data does. real_data = (get_max_id(store, "TranslationMessage") >= 1000000) if real_data and not options.force: raise DoNotRunOnProduction( "Refusing to delete Ubuntu data unless you --force me.") # For some configs it's just absolutely clear this script shouldn't # run. Don't even accept --force there. forbidden_configs = re.compile('(edge|lpnet|production)') current_config = os.getenv('LPCONFIG', 'an unknown config') if forbidden_configs.match(current_config): raise DoNotRunOnProduction( "I won't delete Ubuntu data on %s and you can't --force me." % current_config)
def fetchDistrosForDisplay(self): """See `ITranslationGroup`.""" # Avoid circular imports. from lp.registry.model.distribution import Distribution using = [ Distribution, LeftJoin( LibraryFileAlias, LibraryFileAlias.id == Distribution.iconID), LeftJoin( LibraryFileContent, LibraryFileContent.id == LibraryFileAlias.contentID), ] tables = ( Distribution, LibraryFileAlias, LibraryFileContent, ) distro_data = ISlaveStore(Distribution).using(*using).find( tables, Distribution.translationgroupID == self.id).order_by( Distribution.display_name) return DecoratedResultSet(distro_data, operator.itemgetter(0))
def getRequest(self): """See `IPOExportRequestSet`.""" # Exports happen off the slave store. To ensure that export # does not happen until requests have been replicated to the # slave, they are read primarily from the slave even though they # are deleted on the master afterwards. head = self._getHeadRequest() if head is None: return None, None, None, None requests = ISlaveStore(POExportRequest).find( POExportRequest, POExportRequest.person == head.person, POExportRequest.format == head.format, POExportRequest.date_created == head.date_created).order_by( POExportRequest.potemplateID) summary = [(request.id, request.pofile or request.potemplate) for request in requests] sources = [source for request_id, source in summary] request_ids = [request_id for request_id, source in summary] return head.person, sources, head.format, request_ids
def fetchProjectGroupsForDisplay(self): """See `ITranslationGroup`.""" # Avoid circular imports. from lp.registry.model.projectgroup import ProjectGroup using = [ ProjectGroup, LeftJoin( LibraryFileAlias, LibraryFileAlias.id == ProjectGroup.iconID), LeftJoin( LibraryFileContent, LibraryFileContent.id == LibraryFileAlias.contentID), ] tables = ( ProjectGroup, LibraryFileAlias, LibraryFileContent, ) project_data = ISlaveStore(ProjectGroup).using(*using).find( tables, ProjectGroup.translationgroupID == self.id, ProjectGroup.active == True).order_by(ProjectGroup.display_name) return DecoratedResultSet(project_data, operator.itemgetter(0))
def test_master_slave_fast_downtime_rollout(self): '''Parts of your app can keep working during a fast downtime update. ''' # Everything is running happily. master_store = IMasterStore(Person) self.assertTrue(self.store_is_master(master_store)) self.assertTrue(self.store_is_working(master_store)) slave_store = ISlaveStore(Person) self.assertTrue(self.store_is_slave(slave_store)) self.assertTrue(self.store_is_working(slave_store)) # But fast downtime is about to happen. # Replication is stopped on the slave, and lag starts # increasing. # All connections to the master are killed so database schema # updates can be applied. self.pgbouncer_cur.execute('DISABLE %s' % self.master_dbname) self.pgbouncer_cur.execute('KILL %s' % self.master_dbname) # Of course, slave connections are unaffected. self.assertTrue(self.store_is_working(slave_store)) # But attempts to use a master store will fail. self.assertFalse(self.store_is_working(master_store)) transaction.abort() # After schema updates have been made to the master, it is # reenabled. self.pgbouncer_cur.execute('RESUME %s' % self.master_dbname) self.pgbouncer_cur.execute('ENABLE %s' % self.master_dbname) # And the slaves taken down, and replication reenabled so the # schema updates can replicate. self.pgbouncer_cur.execute('DISABLE %s' % self.slave_dbname) self.pgbouncer_cur.execute('KILL %s' % self.slave_dbname) # The master store is working again. master_store = IMasterStore(Person) self.assertTrue(self.store_is_master(master_store)) self.assertTrue(self.store_is_working(master_store)) # The next attempt at accessing the slave store will fail # with a DisconnectionError. slave_store = ISlaveStore(Person) self.assertTrue(self.store_is_slave(slave_store)) self.assertRaises(DisconnectionError, slave_store.execute, 'SELECT TRUE') transaction.abort() # But if we handle that and retry, we can continue. # Now the failed connection has been detected, the next Store # we are handed is a master Store instead of a slave. slave_store = ISlaveStore(Person) self.assertTrue(self.store_is_master(slave_store)) self.assertTrue(self.store_is_working(slave_store)) # Once replication has caught up, the slave is reenabled. self.pgbouncer_cur.execute('RESUME %s' % self.slave_dbname) self.pgbouncer_cur.execute('ENABLE %s' % self.slave_dbname) # And next transaction, we are back to normal. transaction.abort() master_store = IMasterStore(Person) self.assertTrue(self.store_is_master(master_store)) self.assertTrue(self.store_is_working(master_store)) slave_store = ISlaveStore(Person) self.assertTrue(self.store_is_slave(slave_store)) self.assertTrue(self.store_is_working(slave_store))
def get_stacked_branches(): """Iterate over all branches that, according to the db, are stacked.""" # Avoiding circular import. from lp.code.model.branch import Branch return ISlaveStore(Branch).find(Branch, Not(Branch.stacked_on == None))
def test_slave_only_fast_downtime_rollout(self): '''You can always access a working slave store during fast downtime. ''' # Everything is running happily. store = ISlaveStore(Person) original_store = store self.assertTrue(self.store_is_working(store)) self.assertTrue(self.store_is_slave(store)) # But fast downtime is about to happen. # Replication is stopped on the slave, and lag starts # increasing. # All connections to the master are killed so database schema # updates can be applied. self.pgbouncer_cur.execute('DISABLE %s' % self.master_dbname) self.pgbouncer_cur.execute('KILL %s' % self.master_dbname) # Of course, slave connections are unaffected. self.assertTrue(self.store_is_working(store)) # After schema updates have been made to the master, it is # reenabled. self.pgbouncer_cur.execute('RESUME %s' % self.master_dbname) self.pgbouncer_cur.execute('ENABLE %s' % self.master_dbname) # And the slaves taken down, and replication reenabled so the # schema updates can replicate. self.pgbouncer_cur.execute('DISABLE %s' % self.slave_dbname) self.pgbouncer_cur.execute('KILL %s' % self.slave_dbname) # The next attempt at accessing the slave store will fail # with a DisconnectionError. self.assertRaises(DisconnectionError, store.execute, 'SELECT TRUE') transaction.abort() # But if we handle that and retry, we can continue. # Now the failed connection has been detected, the next Store # we are handed is a master Store instead of a slave. store = ISlaveStore(Person) self.assertTrue(self.store_is_master(store)) self.assertIsNot(ISlaveStore(Person), original_store) # But alas, it might not work the first transaction. If it has # been earlier, its connection was killed by pgbouncer earlier # but it hasn't noticed yet. self.assertFalse(self.store_is_working(store)) transaction.abort() # Next retry attempt, everything is fine using the master # connection, even though our code only asked for a slave. store = ISlaveStore(Person) self.assertTrue(self.store_is_master(store)) self.assertTrue(self.store_is_working(store)) # The original Store is busted though. You cannot reuse Stores # across transaction bounderies because you might end up using # the wrong Store. self.assertFalse(self.store_is_working(original_store)) transaction.abort() # Once replication has caught up, the slave is reenabled. self.pgbouncer_cur.execute('RESUME %s' % self.slave_dbname) self.pgbouncer_cur.execute('ENABLE %s' % self.slave_dbname) # And next transaction, we are back to normal. store = ISlaveStore(Person) self.assertTrue(self.store_is_working(store)) self.assertTrue(self.store_is_slave(store)) self.assertIs(original_store, store)
def to_swift(log, start_lfc_id=None, end_lfc_id=None, remove_func=False): '''Copy a range of Librarian files from disk into Swift. start and end identify the range of LibraryFileContent.id to migrate (inclusive). If remove_func is set, it is called for every file after being copied into Swift. ''' swift_connection = connection_pool.get() fs_root = os.path.abspath(config.librarian_server.root) if start_lfc_id is None: start_lfc_id = 1 if end_lfc_id is None: # Maximum id capable of being stored on the filesystem - ffffffff end_lfc_id = 0xffffffff log.info("Walking disk store {0} from {1} to {2}, inclusive".format( fs_root, start_lfc_id, end_lfc_id)) start_fs_path = filesystem_path(start_lfc_id) end_fs_path = filesystem_path(end_lfc_id) # Walk the Librarian on disk file store, searching for matching # files that may need to be copied into Swift. We need to follow # symlinks as they are being used span disk partitions. for dirpath, dirnames, filenames in scandir.walk(fs_root, followlinks=True): # Don't recurse if we know this directory contains no matching # files. if (start_fs_path[:len(dirpath)] > dirpath or end_fs_path[:len(dirpath)] < dirpath): dirnames[:] = [] continue else: # We need to descend in order, making it possible to resume # an aborted job. dirnames.sort() log.debug('Scanning {0} for matching files'.format(dirpath)) _filename_re = re.compile('^[0-9a-f]{2}$') for filename in sorted(filenames): fs_path = os.path.join(dirpath, filename) # Skip any files with names that are not two hex digits. # This is noise in the filesystem database. if _filename_re.match(filename) is None: log.debug('Skipping noise %s' % fs_path) continue if fs_path < start_fs_path: continue if fs_path > end_fs_path: break # Skip files which have been modified recently, as they # may be uploads still in progress. if os.path.getmtime(fs_path) > time.time() - ONE_DAY: log.debug('Skipping recent upload %s' % fs_path) continue # Reverse engineer the LibraryFileContent.id from the # file's path. Warn about and skip bad filenames. rel_fs_path = fs_path[len(fs_root) + 1:] hex_lfc = ''.join(rel_fs_path.split('/')) if len(hex_lfc) != 8: log.warning( 'Filename length fail, skipping {0}'.format(fs_path)) continue try: lfc = int(hex_lfc, 16) except ValueError: log.warning('Invalid hex fail, skipping {0}'.format(fs_path)) continue log.debug('Found {0} ({1})'.format(lfc, filename)) if ISlaveStore(LibraryFileContent).get(LibraryFileContent, lfc) is None: log.info("{0} exists on disk but not in the db".format(lfc)) continue container, obj_name = swift_location(lfc) try: quiet_swiftclient(swift_connection.head_container, container) log.debug2('{0} container already exists'.format(container)) except swiftclient.ClientException as x: if x.http_status != 404: raise log.info('Creating {0} container'.format(container)) swift_connection.put_container(container) try: headers = quiet_swiftclient(swift_connection.head_object, container, obj_name) log.debug("{0} already exists in Swift({1}, {2})".format( lfc, container, obj_name)) if ('X-Object-Manifest' not in headers and int(headers['content-length']) != os.path.getsize(fs_path)): raise AssertionError( '{0} has incorrect size in Swift'.format(lfc)) except swiftclient.ClientException as x: if x.http_status != 404: raise log.info('Putting {0} into Swift ({1}, {2})'.format( lfc, container, obj_name)) _put(log, swift_connection, lfc, container, obj_name, fs_path) if remove_func: remove_func(fs_path)