def close_account(username, log): """Close a person's account. Return True on success, or log an error message and return False """ store = IMasterStore(Person) janitor = getUtility(ILaunchpadCelebrities).janitor cur = cursor() references = list(postgresql.listReferences(cur, 'person', 'id')) postgresql.check_indirect_references(references) person = store.using( Person, LeftJoin(EmailAddress, Person.id == EmailAddress.personID)).find( Person, Or(Person.name == username, Lower(EmailAddress.email) == Lower(username))).one() if person is None: raise LaunchpadScriptFailure("User %s does not exist" % username) person_name = person.name # We don't do teams if person.is_team: raise LaunchpadScriptFailure("%s is a team" % person_name) log.info("Closing %s's account" % person_name) def table_notification(table): log.debug("Handling the %s table" % table) # All names starting with 'removed' are blacklisted, so this will always # succeed. new_name = 'removed%d' % person.id # Some references can safely remain in place and link to the cleaned-out # Person row. skip = { # These references express some kind of audit trail. The actions in # question still happened, and in some cases the rows may still have # functional significance (e.g. subscriptions or access grants), but # we no longer identify the actor. ('accessartifactgrant', 'grantor'), ('accesspolicygrant', 'grantor'), ('binarypackagepublishinghistory', 'removed_by'), ('branch', 'registrant'), ('branchmergeproposal', 'merge_reporter'), ('branchmergeproposal', 'merger'), ('branchmergeproposal', 'queuer'), ('branchmergeproposal', 'registrant'), ('branchmergeproposal', 'reviewer'), ('branchsubscription', 'subscribed_by'), ('bug', 'owner'), ('bug', 'who_made_private'), ('bugactivity', 'person'), ('bugnomination', 'decider'), ('bugnomination', 'owner'), ('bugtask', 'owner'), ('bugsubscription', 'subscribed_by'), ('codeimport', 'owner'), ('codeimport', 'registrant'), ('codeimportevent', 'person'), ('faq', 'last_updated_by'), ('featureflagchangelogentry', 'person'), ('gitactivity', 'changee'), ('gitactivity', 'changer'), ('gitrepository', 'registrant'), ('gitrule', 'creator'), ('gitrulegrant', 'grantor'), ('gitsubscription', 'subscribed_by'), ('message', 'owner'), ('messageapproval', 'disposed_by'), ('messageapproval', 'posted_by'), ('packagecopyrequest', 'requester'), ('packagediff', 'requester'), ('packageupload', 'signing_key_owner'), ('personlocation', 'last_modified_by'), ('persontransferjob', 'major_person'), ('persontransferjob', 'minor_person'), ('poexportrequest', 'person'), ('pofile', 'lasttranslator'), ('pofiletranslator', 'person'), ('product', 'registrant'), ('question', 'answerer'), ('questionreopening', 'answerer'), ('questionreopening', 'reopener'), ('snapbuild', 'requester'), ('sourcepackagepublishinghistory', 'creator'), ('sourcepackagepublishinghistory', 'removed_by'), ('sourcepackagepublishinghistory', 'sponsor'), ('sourcepackagerecipebuild', 'requester'), ('sourcepackagerelease', 'creator'), ('sourcepackagerelease', 'maintainer'), ('sourcepackagerelease', 'signing_key_owner'), ('specification', 'approver'), ('specification', 'completer'), ('specification', 'drafter'), ('specification', 'goal_decider'), ('specification', 'goal_proposer'), ('specification', 'last_changed_by'), ('specification', 'starter'), ('structuralsubscription', 'subscribed_by'), ('teammembership', 'acknowledged_by'), ('teammembership', 'proposed_by'), ('teammembership', 'reviewed_by'), ('translationimportqueueentry', 'importer'), ('translationmessage', 'reviewer'), ('translationmessage', 'submitter'), ('translationrelicensingagreement', 'person'), ('usertouseremail', 'recipient'), ('usertouseremail', 'sender'), ('xref', 'creator'), # This is maintained by trigger functions and a garbo job. It # doesn't need to be updated immediately. ('bugsummary', 'viewed_by'), # XXX cjwatson 2019-05-02 bug=1827399: This is suboptimal because it # does retain some personal information, but it's currently hard to # deal with due to the size and complexity of references to it. We # can hopefully provide a garbo job for this eventually. ('revisionauthor', 'person'), } reference_names = {(src_tab, src_col) for src_tab, src_col, _, _, _, _ in references} for src_tab, src_col in skip: if (src_tab, src_col) not in reference_names: raise AssertionError( "%s.%s is not a Person reference; possible typo?" % (src_tab, src_col)) # XXX cjwatson 2018-11-29: Registrants could possibly be left as-is, but # perhaps we should pretend that the registrant was ~registry in that # case instead? # Remove the EmailAddress. This is the most important step, as # people requesting account removal seem to primarily be interested # in ensuring we no longer store this information. table_notification('EmailAddress') store.find(EmailAddress, EmailAddress.personID == person.id).remove() # Clean out personal details from the Person table table_notification('Person') person.display_name = 'Removed by request' person.name = new_name person.homepage_content = None person.icon = None person.mugshot = None person.hide_email_addresses = False person.registrant = None person.logo = None person.creation_rationale = PersonCreationRationale.UNKNOWN person.creation_comment = None # Keep the corresponding PersonSettings row, but reset everything to the # defaults. table_notification('PersonSettings') store.find(PersonSettings, PersonSettings.personID == person.id).set( selfgenerated_bugnotifications=DEFAULT, # XXX cjwatson 2018-11-29: These two columns have NULL defaults, but # perhaps shouldn't? expanded_notification_footers=False, require_strong_email_authentication=False) skip.add(('personsettings', 'person')) # Remove almost everything from the Account row and the corresponding # OpenIdIdentifier rows, preserving only a minimal audit trail. if person.account is not None: table_notification('Account') account = removeSecurityProxy(person.account) account.displayname = 'Removed by request' account.creation_rationale = AccountCreationRationale.UNKNOWN person.setAccountStatus(AccountStatus.CLOSED, janitor, "Closed using close-account.") table_notification('OpenIdIdentifier') store.find(OpenIdIdentifier, OpenIdIdentifier.account_id == account.id).remove() # Reassign their bugs table_notification('BugTask') store.find(BugTask, BugTask.assigneeID == person.id).set(assigneeID=None) # Reassign questions assigned to the user, and close all their questions # in non-final states since nobody else can. table_notification('Question') store.find(Question, Question.assigneeID == person.id).set(assigneeID=None) owned_non_final_questions = store.find( Question, Question.ownerID == person.id, Question.status.is_in([ QuestionStatus.OPEN, QuestionStatus.NEEDSINFO, QuestionStatus.ANSWERED, ])) owned_non_final_questions.set( status=QuestionStatus.SOLVED, whiteboard=( 'Closed by Launchpad due to owner requesting account removal')) skip.add(('question', 'owner')) # Remove rows from tables in simple cases in the given order removals = [ # Trash their email addresses. People who request complete account # removal would be unhappy if they reregistered with their old email # address and this resurrected their deleted account, as the email # address is probably the piece of data we store that they were most # concerned with being removed from our systems. ('EmailAddress', 'person'), # Trash their codes of conduct and GPG keys ('SignedCodeOfConduct', 'owner'), ('GpgKey', 'owner'), # Subscriptions and notifications ('BranchSubscription', 'person'), ('BugMute', 'person'), ('BugNotificationRecipient', 'person'), ('BugSubscription', 'person'), ('BugSubscriptionFilterMute', 'person'), ('GitSubscription', 'person'), ('MailingListSubscription', 'person'), ('QuestionSubscription', 'person'), ('SpecificationSubscription', 'person'), ('StructuralSubscription', 'subscriber'), # Personal stuff, freeing up the namespace for others who want to play # or just to remove any fingerprints identifying the user. ('IrcId', 'person'), ('JabberId', 'person'), ('WikiName', 'person'), ('PersonLanguage', 'person'), ('PersonLocation', 'person'), ('SshKey', 'person'), # Karma ('Karma', 'person'), ('KarmaCache', 'person'), ('KarmaTotalCache', 'person'), # Team memberships ('TeamMembership', 'person'), ('TeamParticipation', 'person'), # Contacts ('AnswerContact', 'person'), # Pending items in queues ('POExportRequest', 'person'), # Access grants ('AccessArtifactGrant', 'grantee'), ('AccessPolicyGrant', 'grantee'), ('ArchivePermission', 'person'), ('GitRuleGrant', 'grantee'), ('SharingJob', 'grantee'), # Soyuz reporting ('LatestPersonSourcePackageReleaseCache', 'creator'), ('LatestPersonSourcePackageReleaseCache', 'maintainer'), # "Affects me too" information ('BugAffectsPerson', 'person'), ] for table, person_id_column in removals: table_notification(table) store.execute( """ DELETE FROM %(table)s WHERE %(person_id_column)s = ? """ % { 'table': table, 'person_id_column': person_id_column, }, (person.id, )) # Trash Sprint Attendance records in the future. table_notification('SprintAttendance') store.execute( """ DELETE FROM SprintAttendance USING Sprint WHERE Sprint.id = SprintAttendance.sprint AND attendee = ? AND Sprint.time_starts > CURRENT_TIMESTAMP AT TIME ZONE 'UTC' """, (person.id, )) # Any remaining past sprint attendance records can harmlessly refer to # the placeholder person row. skip.add(('sprintattendance', 'attendee')) # generate_ppa_htaccess currently relies on seeing active # ArchiveAuthToken rows so that it knows which ones to remove from # .htpasswd files on disk in response to the cancellation of the # corresponding ArchiveSubscriber rows; but even once PPA authorisation # is handled dynamically, we probably still want to have the per-person # audit trail here. archive_subscriber_ids = set( store.find( ArchiveSubscriber.id, ArchiveSubscriber.subscriber_id == person.id, ArchiveSubscriber.status == ArchiveSubscriberStatus.CURRENT)) if archive_subscriber_ids: getUtility(IArchiveSubscriberSet).cancel(archive_subscriber_ids, janitor) skip.add(('archivesubscriber', 'subscriber')) skip.add(('archiveauthtoken', 'person')) # Remove hardware submissions. table_notification('HWSubmissionDevice') store.execute( """ DELETE FROM HWSubmissionDevice USING HWSubmission WHERE HWSubmission.id = HWSubmissionDevice.submission AND owner = ? """, (person.id, )) table_notification('HWSubmission') store.find(HWSubmission, HWSubmission.ownerID == person.id).remove() has_references = False # Check for active related projects, and skip inactive ones. for col in 'bug_supervisor', 'driver', 'owner': # Raw SQL because otherwise using Product._owner while displaying it # as Product.owner is too fiddly. result = store.execute( """ SELECT COUNT(*) FROM product WHERE active AND %(col)s = ? """ % {'col': col}, (person.id, )) count = result.get_one()[0] if count: log.error("User %s is still referenced by %d product.%s values" % (person_name, count, col)) has_references = True skip.add(('product', col)) for col in 'driver', 'owner': count = store.find(ProductSeries, ProductSeries.product == Product.id, Product.active, getattr(ProductSeries, col) == person).count() if count: log.error( "User %s is still referenced by %d productseries.%s values" % (person_name, count, col)) has_references = True skip.add(('productseries', col)) # Closing the account will only work if all references have been handled # by this point. If not, it's safer to bail out. It's OK if this # doesn't work in all conceivable situations, since some of them may # require careful thought and decisions by a human administrator. for src_tab, src_col, ref_tab, ref_col, updact, delact in references: if (src_tab, src_col) in skip: continue result = store.execute( """ SELECT COUNT(*) FROM %(src_tab)s WHERE %(src_col)s = ? """ % { 'src_tab': src_tab, 'src_col': src_col, }, (person.id, )) count = result.get_one()[0] if count: log.error("User %s is still referenced by %d %s.%s values" % (person_name, count, src_tab, src_col)) has_references = True if has_references: raise LaunchpadScriptFailure("User %s is still referenced" % person_name) return True
def __init__(self, con): self.con = con # Database connection to use self.total_deleted = 0 # Running total self.index = 1 log.info("Deleting unreferenced LibraryFileAliases") cur = con.cursor() drop_tables(cur, "ReferencedLibraryFileAlias") cur.execute(""" CREATE TEMPORARY TABLE ReferencedLibraryFileAlias ( alias integer) """) # Determine what columns link to LibraryFileAlias # references = [(table, column), ...] references = [ tuple(ref[:2]) for ref in listReferences(cur, 'libraryfilealias', 'id') if ref[0] != 'libraryfiledownloadcount' ] assert len(references) > 10, ( 'Database introspection returned nonsense') log.debug( "Found %d columns referencing LibraryFileAlias", len(references)) # Find all relevant LibraryFileAlias references and fill in # ReferencedLibraryFileAlias for table, column in references: cur.execute(""" INSERT INTO ReferencedLibraryFileAlias SELECT LibraryFileAlias.id FROM LibraryFileAlias, %(table)s WHERE LibraryFileAlias.id = %(table)s.%(column)s """ % { 'table': quoteIdentifier(table), 'column': quoteIdentifier(column)}) log.debug("%s.%s references %d LibraryFileContent rows." % ( table, column, cur.rowcount)) con.commit() log.debug("Calculating unreferenced LibraryFileAlias set.") drop_tables(cur, "UnreferencedLibraryFileAlias") cur.execute(""" CREATE TEMPORARY TABLE UnreferencedLibraryFileAlias ( id serial PRIMARY KEY, alias integer UNIQUE) """) # Calculate the set of unreferenced LibraryFileAlias. # We also exclude all unexpired records - we don't remove them # even if they are unlinked. We currently don't remove stuff # until it has been expired for more than one week, but we will # change this if disk space becomes short and it actually will # make a noticeable difference. We handle excluding recently # created content here rather than earlier when creating the # ReferencedLibraryFileAlias table to handle uploads going on # while this script is running. cur.execute(""" INSERT INTO UnreferencedLibraryFileAlias (alias) SELECT id AS alias FROM LibraryFileAlias WHERE content IS NULL OR ((expires IS NULL OR expires < CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - interval '1 week' ) AND date_created < CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - interval '1 week' ) EXCEPT SELECT alias FROM ReferencedLibraryFileAlias """) con.commit() drop_tables(cur, "ReferencedLibraryFileAlias") cur.execute( "SELECT COALESCE(max(id),0) FROM UnreferencedLibraryFileAlias") self.max_id = cur.fetchone()[0] log.debug( "%d unreferenced LibraryFileContent to remove." % self.max_id) con.commit()
def merge_people(from_person, to_person, reviewer, delete=False): """Helper for merge and delete methods.""" # since we are doing direct SQL manipulation, make sure all # changes have been flushed to the database store = Store.of(from_person) store.flush() if (from_person.is_team and not to_person.is_team or not from_person.is_team and to_person.is_team): raise AssertionError("Users cannot be merged with teams.") if from_person.is_team and reviewer is None: raise AssertionError("Team merged require a reviewer.") if getUtility(IArchiveSet).getPPAOwnedByPerson( from_person, statuses=[ArchiveStatus.ACTIVE, ArchiveStatus.DELETING]) is not None: raise AssertionError( 'from_person has a ppa in ACTIVE or DELETING status') from_person_branches = getUtility(IAllBranches).ownedBy(from_person) if not from_person_branches.isPrivate().is_empty(): raise AssertionError('from_person has private branches.') if from_person.is_team: _purgeUnmergableTeamArtifacts(from_person, to_person, reviewer) if not getUtility( IEmailAddressSet).getByPerson(from_person).is_empty(): raise AssertionError('from_person still has email addresses.') # Get a database cursor. cur = cursor() # These table.columns will be skipped by the 'catch all' # update performed later skip = [ # The AccessPolicy.person reference is to allow private teams to # see their own +junk branches. We don't allow merges for teams who # own private branches so we can skip this column. ('accesspolicy', 'person'), ('teammembership', 'person'), ('teammembership', 'team'), ('teamparticipation', 'person'), ('teamparticipation', 'team'), ('personlanguage', 'person'), ('person', 'merged'), ('personsettings', 'person'), ('emailaddress', 'person'), # Polls are not carried over when merging teams. ('poll', 'team'), # We can safely ignore the mailinglist table as there's a sanity # check above which prevents teams with associated mailing lists # from being merged. ('mailinglist', 'team'), # I don't think we need to worry about the votecast and vote # tables, because a real human should never have two profiles # in Launchpad that are active members of a given team and voted # in a given poll. -- GuilhermeSalgado 2005-07-07 # We also can't afford to change poll results after they are # closed -- StuartBishop 20060602 ('votecast', 'person'), ('vote', 'person'), ('translationrelicensingagreement', 'person'), # These are ON DELETE CASCADE and maintained by triggers. ('bugsummary', 'viewed_by'), ('bugsummaryjournal', 'viewed_by'), ('latestpersonsourcepackagereleasecache', 'creator'), ('latestpersonsourcepackagereleasecache', 'maintainer'), ] references = list(postgresql.listReferences(cur, 'person', 'id')) # Sanity check. If we have an indirect reference, it must # be ON DELETE CASCADE. We only have one case of this at the moment, # but this code ensures we catch any new ones added incorrectly. for src_tab, src_col, ref_tab, ref_col, updact, delact in references: # If the ref_tab and ref_col is not Person.id, then we have # an indirect reference. Ensure the update action is 'CASCADE' if ref_tab != 'person' and ref_col != 'id': if updact != 'c': raise RuntimeError( '%s.%s reference to %s.%s must be ON UPDATE CASCADE' % (src_tab, src_col, ref_tab, ref_col)) # These rows are in a UNIQUE index, and we can only move them # to the new Person if there is not already an entry. eg. if # the destination and source persons are both subscribed to a bug, # we cannot change the source persons subscription. We just leave them # as noise for the time being. to_id = to_person.id from_id = from_person.id # Update PersonLocation, which is a Person-decorator table. _merge_person_decoration( to_person, from_person, skip, 'PersonLocation', 'person', ['last_modified_by', ]) # Update GPGKey. It won't conflict, but our sanity checks don't # know that. cur.execute( 'UPDATE GPGKey SET owner=%(to_id)d WHERE owner=%(from_id)d' % vars()) skip.append(('gpgkey', 'owner')) _mergeAccessArtifactGrant(cur, from_id, to_id) _mergeAccessPolicyGrant(cur, from_id, to_id) skip.append(('accessartifactgrant', 'grantee')) skip.append(('accesspolicygrant', 'grantee')) # Update the Branches that will not conflict, and fudge the names of # ones that *do* conflict. _mergeBranches(from_person, to_person) skip.append(('branch', 'owner')) _mergeBranchMergeQueues(cur, from_id, to_id) skip.append(('branchmergequeue', 'owner')) _mergeSourcePackageRecipes(from_person, to_person) skip.append(('sourcepackagerecipe', 'owner')) _mergeMailingListSubscriptions(cur, from_id, to_id) skip.append(('mailinglistsubscription', 'person')) _mergeBranchSubscription(cur, from_id, to_id) skip.append(('branchsubscription', 'person')) _mergeBugAffectsPerson(cur, from_id, to_id) skip.append(('bugaffectsperson', 'person')) _mergeAnswerContact(cur, from_id, to_id) skip.append(('answercontact', 'person')) _mergeQuestionSubscription(cur, from_id, to_id) skip.append(('questionsubscription', 'person')) _mergeBugNotificationRecipient(cur, from_id, to_id) skip.append(('bugnotificationrecipient', 'person')) # We ignore BugSubscriptionFilterMutes. skip.append(('bugsubscriptionfiltermute', 'person')) # We ignore BugMutes. skip.append(('bugmute', 'person')) _mergeStructuralSubscriptions(cur, from_id, to_id) skip.append(('structuralsubscription', 'subscriber')) _mergeSpecificationSubscription(cur, from_id, to_id) skip.append(('specificationsubscription', 'person')) _mergeSprintAttendance(cur, from_id, to_id) skip.append(('sprintattendance', 'attendee')) _mergePOExportRequest(cur, from_id, to_id) skip.append(('poexportrequest', 'person')) _mergeTranslationMessage(cur, from_id, to_id) skip.append(('translationmessage', 'submitter')) skip.append(('translationmessage', 'reviewer')) # Handle the POFileTranslator cache by doing nothing. As it is # maintained by triggers, the data migration has already been done # for us when we updated the source tables. skip.append(('pofiletranslator', 'person')) _mergeTranslationImportQueueEntry(cur, from_id, to_id) skip.append(('translationimportqueueentry', 'importer')) # XXX cprov 2007-02-22 bug=87098: # Since we only allow one PPA for each user, # we can't reassign the old user archive to the new user. # It need to be done manually, probably by reasinning all publications # to the old PPA to the new one, performing a careful_publishing on it # and removing the old one from disk. skip.append(('archive', 'owner')) _mergeCodeReviewVote(cur, from_id, to_id) skip.append(('codereviewvote', 'reviewer')) _mergeKarmaCache(cur, from_id, to_id, from_person.karma) skip.append(('karmacache', 'person')) skip.append(('karmatotalcache', 'person')) _mergeDateCreated(cur, from_id, to_id) _mergeLoginTokens(cur, from_id, to_id) skip.append(('logintoken', 'requester')) # Sanity check. If we have a reference that participates in a # UNIQUE index, it must have already been handled by this point. # We can tell this by looking at the skip list. for src_tab, src_col, ref_tab, ref_col, updact, delact in references: uniques = postgresql.listUniques(cur, src_tab, src_col) if len(uniques) > 0 and (src_tab, src_col) not in skip: raise NotImplementedError( '%s.%s reference to %s.%s is in a UNIQUE index ' 'but has not been handled' % ( src_tab, src_col, ref_tab, ref_col)) # Handle all simple cases for src_tab, src_col, ref_tab, ref_col, updact, delact in references: if (src_tab, src_col) in skip: continue cur.execute('UPDATE %s SET %s=%d WHERE %s=%d' % ( src_tab, src_col, to_person.id, src_col, from_person.id)) _mergeTeamMembership(cur, from_id, to_id) _mergeProposedInvitedTeamMembership(cur, from_id, to_id) # Flag the person as merged cur.execute(''' UPDATE Person SET merged=%(to_id)d WHERE id=%(from_id)d ''' % vars()) # Append a -merged suffix to the person's name. name = base = "%s-merged" % from_person.name.encode('ascii') cur.execute("SELECT id FROM Person WHERE name = %s" % sqlvalues(name)) i = 1 while cur.fetchone(): name = "%s%d" % (base, i) cur.execute("SELECT id FROM Person WHERE name = %s" % sqlvalues(name)) i += 1 cur.execute("UPDATE Person SET name = %s WHERE id = %s" % sqlvalues(name, from_person)) # Since we've updated the database behind Storm's back, # flush its caches. store.invalidate() # Move OpenId Identifiers from the merged account to the new # account. if from_person.account is not None and to_person.account is not None: store.execute(""" UPDATE OpenIdIdentifier SET account=%s WHERE account=%s """ % sqlvalues(to_person.accountID, from_person.accountID)) if delete: # We don't notify anyone about deletes. return # Inform the user of the merge changes. if to_person.is_team: mail_text = get_email_template( 'team-merged.txt', app='registry') subject = 'Launchpad teams merged' else: mail_text = get_email_template( 'person-merged.txt', app='registry') subject = 'Launchpad accounts merged' mail_text = mail_text % { 'dupename': from_person.name, 'person': to_person.name, } getUtility(IPersonNotificationSet).addNotification( to_person, subject, mail_text)
def merge_people(from_person, to_person, reviewer, delete=False): """Helper for merge and delete methods.""" # since we are doing direct SQL manipulation, make sure all # changes have been flushed to the database store = Store.of(from_person) store.flush() if (from_person.is_team and not to_person.is_team or not from_person.is_team and to_person.is_team): raise AssertionError("Users cannot be merged with teams.") if from_person.is_team and reviewer is None: raise AssertionError("Team merged require a reviewer.") if getUtility(IArchiveSet).getPPAOwnedByPerson(from_person, statuses=[ ArchiveStatus.ACTIVE, ArchiveStatus.DELETING ]) is not None: raise AssertionError( 'from_person has a ppa in ACTIVE or DELETING status') from_person_branches = getUtility(IAllBranches).ownedBy(from_person) if not from_person_branches.isPrivate().is_empty(): raise AssertionError('from_person has private branches.') if from_person.is_team: _purgeUnmergableTeamArtifacts(from_person, to_person, reviewer) if not getUtility(IEmailAddressSet).getByPerson(from_person).is_empty(): raise AssertionError('from_person still has email addresses.') # Get a database cursor. cur = cursor() # These table.columns will be skipped by the 'catch all' # update performed later skip = [ # The AccessPolicy.person reference is to allow private teams to # see their own +junk branches. We don't allow merges for teams who # own private branches so we can skip this column. ('accesspolicy', 'person'), ('teammembership', 'person'), ('teammembership', 'team'), ('teamparticipation', 'person'), ('teamparticipation', 'team'), ('personlanguage', 'person'), ('person', 'merged'), ('personsettings', 'person'), ('emailaddress', 'person'), # Polls are not carried over when merging teams. ('poll', 'team'), # We can safely ignore the mailinglist table as there's a sanity # check above which prevents teams with associated mailing lists # from being merged. ('mailinglist', 'team'), # I don't think we need to worry about the votecast and vote # tables, because a real human should never have two profiles # in Launchpad that are active members of a given team and voted # in a given poll. -- GuilhermeSalgado 2005-07-07 # We also can't afford to change poll results after they are # closed -- StuartBishop 20060602 ('votecast', 'person'), ('vote', 'person'), ('translationrelicensingagreement', 'person'), # These are ON DELETE CASCADE and maintained by triggers. ('bugsummary', 'viewed_by'), ('bugsummaryjournal', 'viewed_by'), ('latestpersonsourcepackagereleasecache', 'creator'), ('latestpersonsourcepackagereleasecache', 'maintainer'), ] references = list(postgresql.listReferences(cur, 'person', 'id')) # Sanity check. If we have an indirect reference, it must # be ON DELETE CASCADE. We only have one case of this at the moment, # but this code ensures we catch any new ones added incorrectly. for src_tab, src_col, ref_tab, ref_col, updact, delact in references: # If the ref_tab and ref_col is not Person.id, then we have # an indirect reference. Ensure the update action is 'CASCADE' if ref_tab != 'person' and ref_col != 'id': if updact != 'c': raise RuntimeError( '%s.%s reference to %s.%s must be ON UPDATE CASCADE' % (src_tab, src_col, ref_tab, ref_col)) # These rows are in a UNIQUE index, and we can only move them # to the new Person if there is not already an entry. eg. if # the destination and source persons are both subscribed to a bug, # we cannot change the source persons subscription. We just leave them # as noise for the time being. to_id = to_person.id from_id = from_person.id # Update PersonLocation, which is a Person-decorator table. _merge_person_decoration(to_person, from_person, skip, 'PersonLocation', 'person', [ 'last_modified_by', ]) # Update GPGKey. It won't conflict, but our sanity checks don't # know that. cur.execute('UPDATE GPGKey SET owner=%(to_id)d WHERE owner=%(from_id)d' % vars()) skip.append(('gpgkey', 'owner')) _mergeAccessArtifactGrant(cur, from_id, to_id) _mergeAccessPolicyGrant(cur, from_id, to_id) skip.append(('accessartifactgrant', 'grantee')) skip.append(('accesspolicygrant', 'grantee')) # Update the Branches that will not conflict, and fudge the names of # ones that *do* conflict. _mergeBranches(from_person, to_person) skip.append(('branch', 'owner')) _mergeBranchMergeQueues(cur, from_id, to_id) skip.append(('branchmergequeue', 'owner')) _mergeSourcePackageRecipes(from_person, to_person) skip.append(('sourcepackagerecipe', 'owner')) _mergeMailingListSubscriptions(cur, from_id, to_id) skip.append(('mailinglistsubscription', 'person')) _mergeBranchSubscription(cur, from_id, to_id) skip.append(('branchsubscription', 'person')) _mergeBugAffectsPerson(cur, from_id, to_id) skip.append(('bugaffectsperson', 'person')) _mergeAnswerContact(cur, from_id, to_id) skip.append(('answercontact', 'person')) _mergeQuestionSubscription(cur, from_id, to_id) skip.append(('questionsubscription', 'person')) _mergeBugNotificationRecipient(cur, from_id, to_id) skip.append(('bugnotificationrecipient', 'person')) # We ignore BugSubscriptionFilterMutes. skip.append(('bugsubscriptionfiltermute', 'person')) # We ignore BugMutes. skip.append(('bugmute', 'person')) _mergeStructuralSubscriptions(cur, from_id, to_id) skip.append(('structuralsubscription', 'subscriber')) _mergeSpecificationSubscription(cur, from_id, to_id) skip.append(('specificationsubscription', 'person')) _mergeSprintAttendance(cur, from_id, to_id) skip.append(('sprintattendance', 'attendee')) _mergePOExportRequest(cur, from_id, to_id) skip.append(('poexportrequest', 'person')) _mergeTranslationMessage(cur, from_id, to_id) skip.append(('translationmessage', 'submitter')) skip.append(('translationmessage', 'reviewer')) # Handle the POFileTranslator cache by doing nothing. As it is # maintained by triggers, the data migration has already been done # for us when we updated the source tables. skip.append(('pofiletranslator', 'person')) _mergeTranslationImportQueueEntry(cur, from_id, to_id) skip.append(('translationimportqueueentry', 'importer')) # XXX cprov 2007-02-22 bug=87098: # Since we only allow one PPA for each user, # we can't reassign the old user archive to the new user. # It need to be done manually, probably by reasinning all publications # to the old PPA to the new one, performing a careful_publishing on it # and removing the old one from disk. skip.append(('archive', 'owner')) _mergeCodeReviewVote(cur, from_id, to_id) skip.append(('codereviewvote', 'reviewer')) _mergeKarmaCache(cur, from_id, to_id, from_person.karma) skip.append(('karmacache', 'person')) skip.append(('karmatotalcache', 'person')) _mergeDateCreated(cur, from_id, to_id) _mergeLoginTokens(cur, from_id, to_id) skip.append(('logintoken', 'requester')) # Sanity check. If we have a reference that participates in a # UNIQUE index, it must have already been handled by this point. # We can tell this by looking at the skip list. for src_tab, src_col, ref_tab, ref_col, updact, delact in references: uniques = postgresql.listUniques(cur, src_tab, src_col) if len(uniques) > 0 and (src_tab, src_col) not in skip: raise NotImplementedError( '%s.%s reference to %s.%s is in a UNIQUE index ' 'but has not been handled' % (src_tab, src_col, ref_tab, ref_col)) # Handle all simple cases for src_tab, src_col, ref_tab, ref_col, updact, delact in references: if (src_tab, src_col) in skip: continue cur.execute('UPDATE %s SET %s=%d WHERE %s=%d' % (src_tab, src_col, to_person.id, src_col, from_person.id)) _mergeTeamMembership(cur, from_id, to_id) _mergeProposedInvitedTeamMembership(cur, from_id, to_id) # Flag the person as merged cur.execute(''' UPDATE Person SET merged=%(to_id)d WHERE id=%(from_id)d ''' % vars()) # Append a -merged suffix to the person's name. name = base = "%s-merged" % from_person.name.encode('ascii') cur.execute("SELECT id FROM Person WHERE name = %s" % sqlvalues(name)) i = 1 while cur.fetchone(): name = "%s%d" % (base, i) cur.execute("SELECT id FROM Person WHERE name = %s" % sqlvalues(name)) i += 1 cur.execute("UPDATE Person SET name = %s WHERE id = %s" % sqlvalues(name, from_person)) # Since we've updated the database behind Storm's back, # flush its caches. store.invalidate() # Move OpenId Identifiers from the merged account to the new # account. if from_person.account is not None and to_person.account is not None: store.execute(""" UPDATE OpenIdIdentifier SET account=%s WHERE account=%s """ % sqlvalues(to_person.accountID, from_person.accountID)) if delete: # We don't notify anyone about deletes. return # Inform the user of the merge changes. if to_person.is_team: mail_text = get_email_template('team-merged.txt', app='registry') subject = 'Launchpad teams merged' else: mail_text = get_email_template('person-merged.txt', app='registry') subject = 'Launchpad accounts merged' mail_text = mail_text % { 'dupename': from_person.name, 'person': to_person.name, } getUtility(IPersonNotificationSet).addNotification(to_person, subject, mail_text)
def main(): parser = OptionParser() db_options(parser) parser.add_option( "-f", "--from", dest="from_date", default=None, metavar="DATE", help="Only count new files since DATE (yyyy/mm/dd)") parser.add_option( "-u", "--until", dest="until_date", default=None, metavar="DATE", help="Only count new files until DATE (yyyy/mm/dd)") options, args = parser.parse_args() if len(args) > 0: parser.error("Too many command line arguments.") # Handle date filters. We use LibraryFileContent.datecreated rather # than LibraryFileAlias.datecreated as this report is about actual # disk space usage. A new row in the database linking to a # previously existing file in the Librarian takes up no new space. if options.from_date is not None: from_date = 'AND LFC.datecreated >= %s' % sqlvalues( options.from_date) else: from_date = '' if options.until_date is not None: until_date = 'AND LFC.datecreated <= %s' % sqlvalues( options.until_date) else: until_date = '' con = connect() cur = con.cursor() # Collect direct references to the LibraryFileAlias table. references = set( (from_table, from_column) # Note that listReferences is recursive, which we don't # care about in this simple report. We also ignore the # irrelevant constraint type update and delete flags. for from_table, from_column, to_table, to_column, update, delete in listReferences(cur, 'libraryfilealias', 'id') if to_table == 'libraryfilealias' ) totals = set() for referring_table, referring_column in sorted(references): if referring_table == 'libraryfiledownloadcount': continue quoted_referring_table = quoteIdentifier(referring_table) quoted_referring_column = quoteIdentifier(referring_column) cur.execute(""" SELECT COALESCE(SUM(filesize), 0), pg_size_pretty(CAST(COALESCE(SUM(filesize), 0) AS bigint)), COUNT(*) FROM ( SELECT DISTINCT ON (LFC.id) LFC.id, LFC.filesize FROM LibraryFileContent AS LFC, LibraryFileAlias AS LFA, %s WHERE LFC.id = LFA.content AND LFA.id = %s.%s AND ( LFA.expires IS NULL OR LFA.expires > CURRENT_TIMESTAMP AT TIME ZONE 'UTC') %s %s ORDER BY LFC.id ) AS Whatever """ % ( quoted_referring_table, quoted_referring_table, quoted_referring_column, from_date, until_date)) total_bytes, formatted_size, num_files = cur.fetchone() totals.add((total_bytes, referring_table, formatted_size, num_files)) for total_bytes, tab_name, formatted_size, num_files in sorted( totals, reverse=True): print '%-10s %s in %d files' % (formatted_size, tab_name, num_files) return 0
def main(): parser = OptionParser() db_options(parser) parser.add_option("-f", "--from", dest="from_date", default=None, metavar="DATE", help="Only count new files since DATE (yyyy/mm/dd)") parser.add_option("-u", "--until", dest="until_date", default=None, metavar="DATE", help="Only count new files until DATE (yyyy/mm/dd)") options, args = parser.parse_args() if len(args) > 0: parser.error("Too many command line arguments.") # Handle date filters. We use LibraryFileContent.datecreated rather # than LibraryFileAlias.datecreated as this report is about actual # disk space usage. A new row in the database linking to a # previously existing file in the Librarian takes up no new space. if options.from_date is not None: from_date = 'AND LFC.datecreated >= %s' % sqlvalues(options.from_date) else: from_date = '' if options.until_date is not None: until_date = 'AND LFC.datecreated <= %s' % sqlvalues( options.until_date) else: until_date = '' con = connect() cur = con.cursor() # Collect direct references to the LibraryFileAlias table. references = set( (from_table, from_column) # Note that listReferences is recursive, which we don't # care about in this simple report. We also ignore the # irrelevant constraint type update and delete flags. for from_table, from_column, to_table, to_column, update, delete in listReferences(cur, 'libraryfilealias', 'id') if to_table == 'libraryfilealias') totals = set() for referring_table, referring_column in sorted(references): if referring_table == 'libraryfiledownloadcount': continue quoted_referring_table = quoteIdentifier(referring_table) quoted_referring_column = quoteIdentifier(referring_column) cur.execute(""" SELECT COALESCE(SUM(filesize), 0), pg_size_pretty(CAST(COALESCE(SUM(filesize), 0) AS bigint)), COUNT(*) FROM ( SELECT DISTINCT ON (LFC.id) LFC.id, LFC.filesize FROM LibraryFileContent AS LFC, LibraryFileAlias AS LFA, %s WHERE LFC.id = LFA.content AND LFA.id = %s.%s AND ( LFA.expires IS NULL OR LFA.expires > CURRENT_TIMESTAMP AT TIME ZONE 'UTC') %s %s ORDER BY LFC.id ) AS Whatever """ % (quoted_referring_table, quoted_referring_table, quoted_referring_column, from_date, until_date)) total_bytes, formatted_size, num_files = cur.fetchone() totals.add((total_bytes, referring_table, formatted_size, num_files)) for total_bytes, tab_name, formatted_size, num_files in sorted( totals, reverse=True): print '%-10s %s in %d files' % (formatted_size, tab_name, num_files) return 0