Example #1
0
 def migrate2(self):
     # fix the dirnames and basenames column types for postgresql
     cu = self.db.cursor()
     if self.db.driver == 'postgresql':
         logMe(2, "fixing column types for pathfields")
         cu.execute("create table saveDirnames as select dirnameId, dirname from Dirnames")
         cu.execute("create table saveBasenames as select basenameId, basename from Basenames")
         self.db.dropForeignKey("FilePaths", "dirnameId")
         self.db.dropForeignKey("FilePaths", "basenameId")
         cu.execute("drop table Dirnames")
         cu.execute("drop table Basenames")
         self.db.loadSchema()
         schema.createTroves(self.db, createIndex = False)
         cu.execute("select dirnameId, dirname from saveDirnames")
         self.db.bulkload("Dirnames", ( (x[0], cu.binary(x[1])) for x in cu.fetchall() ),
                          ["dirnameId", "dirname"])
         cu.execute("select basenameId, basename from saveBasenames")
         self.db.bulkload("Basenames", ( (x[0], cu.binary(x[1])) for x in cu.fetchall() ),
                          ["basenameId", "basename"])
         schema.createTroves(self.db, createIndex = True)
         self.db.addForeignKey("FilePaths", "dirnameId", "Dirnames", "dirnameId")
         self.db.addForeignKey("FilePaths", "basenameId", "Basenames", "basenameId")
         cu.execute("drop table saveDirnames")
         cu.execute("drop table saveBasenames")
         self.db.analyze("Dirnames")
         self.db.analyze("Basenames")
         self.db.setAutoIncrement("Dirnames", "dirnameId")
         self.db.setAutoIncrement("Basenames", "basenameId")
     return True
Example #2
0
 def migrate2(self):
     # fix the dirnames and basenames column types for postgresql
     cu = self.db.cursor()
     if self.db.driver == 'postgresql':
         logMe(2, "fixing column types for pathfields")
         cu.execute("create table saveDirnames as select dirnameId, dirname from Dirnames")
         cu.execute("create table saveBasenames as select basenameId, basename from Basenames")
         cu.execute("create table savePrefixes as select dirnameId, prefixId from Prefixes")
         self.db.dropForeignKey("FilePaths", "dirnameId")
         self.db.dropForeignKey("FilePaths", "basenameId")
         cu.execute("drop table Prefixes")
         cu.execute("drop table Dirnames")
         cu.execute("drop table Basenames")
         self.db.loadSchema()
         schema.createTroves(self.db, createIndex = False)
         cu.execute("select dirnameId, dirname from saveDirnames")
         self.db.bulkload("Dirnames", ( (x[0], cu.binary(x[1])) for x in cu.fetchall() ),
                          ["dirnameId", "dirname"])
         cu.execute("select basenameId, basename from saveBasenames")
         self.db.bulkload("Basenames", ( (x[0], cu.binary(x[1])) for x in cu.fetchall() ),
                          ["basenameId", "basename"])
         cu.execute("insert into Prefixes(dirnameId, prefixId) "
                    "select dirnameId, prefixId from savePrefixes")
         schema.createTroves(self.db, createIndex = True)
         self.db.addForeignKey("FilePaths", "dirnameId", "Dirnames", "dirnameId")
         self.db.addForeignKey("FilePaths", "basenameId", "Basenames", "basenameId")
         cu.execute("drop table saveDirnames")
         cu.execute("drop table saveBasenames")
         cu.execute("drop table savePrefixes")
         self.db.analyze("Dirnames")
         self.db.analyze("Basenames")
         self.db.setAutoIncrement("Dirnames", "dirnameId")
         self.db.setAutoIncrement("Basenames", "basenameId")
     # fix the missing dirnames/prefixes links
     schema.setupTempTables(self.db)
     logMe(2, "looking for missing dirnames/prefixes links")
     cu = self.db.cursor()
     cu.execute("""select distinct d.dirnameId, d.dirname
     from Dirnames as d
     join ( select fp.dirnameId as dirnameId
            from FilePaths as fp
            left join Prefixes as p using(dirnameId)
            where p.dirnameId is null ) as dq using(dirnameId) """)
     ret = cu.fetchall()
     if ret:
         logMe(2, "fixing missing dirnames/prefixes links in %d dirnames" % (len(ret),))
         trovestore.addPrefixesFromList(self.db, ret)
     self.db.analyze("Prefixes")
     return True
Example #3
0
 def fixRedirects(self, repos):
     logMe(2, "removing dep provisions from redirects...")
     self.db.loadSchema()
     # avoid creating this index until we had a chance to check the path indexes
     self.db.tables["TroveFiles"].append("TroveFilesPathIdx")
     # remove dependency provisions from redirects -- the conary 1.0
     # branch would set redirects to provide their own names. this doesn't
     # clean up the dependency table; that would only matter on a trove
     # which was cooked as only a redirect in the repository; any other
     # instances would still need the depId anyway
     cu = self.db.cursor()
     cu.execute("delete from provides where instanceId in "
                "(select instanceId from instances "
                "where troveType=? and isPresent=1)",
                trove.TROVE_TYPE_REDIRECT)
     # need to make sure TroveRedirects is defined...
     schema.createTroves(self.db)
     # loop over redirects...
     cu.execute("select instanceId from instances "
                "where troveType=? and isPresent=1",
                trove.TROVE_TYPE_REDIRECT)
     for (instanceId,) in cu:
         self.fixTroveSig(repos, instanceId)
Example #4
0
 def fixRedirects(self, repos):
     logMe(2, "removing dep provisions from redirects...")
     self.db.loadSchema()
     # avoid creating this index until we had a chance to check the path indexes
     self.db.tables["TroveFiles"].append("TroveFilesPathIdx")
     # remove dependency provisions from redirects -- the conary 1.0
     # branch would set redirects to provide their own names. this doesn't
     # clean up the dependency table; that would only matter on a trove
     # which was cooked as only a redirect in the repository; any other
     # instances would still need the depId anyway
     cu = self.db.cursor()
     cu.execute(
         "delete from provides where instanceId in "
         "(select instanceId from instances "
         "where troveType=? and isPresent=1)", trove.TROVE_TYPE_REDIRECT)
     # need to make sure TroveRedirects is defined...
     schema.createTroves(self.db)
     # loop over redirects...
     cu.execute(
         "select instanceId from instances "
         "where troveType=? and isPresent=1", trove.TROVE_TYPE_REDIRECT)
     for (instanceId, ) in cu:
         self.fixTroveSig(repos, instanceId)
Example #5
0
    def _createFilePaths(self):
        logMe(2, "splitting paths in dirnames and basenames")
        cu = self.db.cursor()
        cu.execute("""
        create table tmpDirnames (
            filePathId %(PRIMARYKEY)s,
            dirname %(PATHTYPE)s,
            basename %(PATHTYPE)s
        ) """ % self.db.keywords)
        # save a copy of FilePaths before updating the table definition
        cu.execute("""create table oldFilePaths as
        select filePathId, pathId, path from FilePaths""")
        self.db.createIndex("oldFilePaths", "oldFilePathsIdx", "filePathId",
                            check=False, unique=True)
        self.db.analyze("oldFilePaths")

        # drop the FK constraint from TroveFiles into FilePaths
        self.db.loadSchema()
        self.db.dropForeignKey("TroveFiles", name = "TroveFiles_filePathId_fk")
        # create Dirnames, Basenames and the new FilePaths tables
        cu.execute("drop table FilePaths")
        self.db.loadSchema()
        schema.createTroves(self.db, createIndex=False)

        # this is to avoid processing too many entries at once...
        sliceSize = 200000
        analyze = 1
        while True:
            cu.execute("""
            select fp.filePathId, fp.path from oldFilePaths as fp
            left join tmpDirnames as d using(filePathId)
            where d.filePathId is null limit ?""", sliceSize)
            tmpl = [ (_fpid, os.path.split(_path)) for _fpid,_path in cu.fetchall() ]
            if not tmpl:
                break # no more entries found
            self.db.bulkload("tmpDirnames", [ (x[0], x[1][0], x[1][1]) for x in tmpl ],
                             ["filePathId", "dirname", "basename"])
            # don't analyze too often for speed reasons
            analyze -= 1
            if not analyze:
                analyze = 5
                self.db.analyze("tmpDirnames")
        self.db.createIndex("tmpDirnames", "tmpDirnames_dirname_idx", "dirname",
                            check = False)
        logMe(2, "extracting unique dirnames and basenames...")
        self.db.analyze("tmpDirnames")
        # the '' and '/' dirnames should already be in the Dirnames table
        cu.execute("""
        insert into Dirnames(dirname)
        select distinct dirname from tmpDirnames
        order by dirname """)
        self.db.analyze("Dirnames")
        cu.execute("""
        insert into Basenames(basename)
        select distinct basename from tmpDirnames
        order by basename """)
        self.db.analyze("Basenames")

        logMe(2, "generating the new FilePaths table...")
        cu.execute("""insert into FilePaths(filePathId, dirnameId, basenameId, pathId)
        select fp.filePathId, d.dirnameId, b.basenameId, fp.pathId
        from oldFilePaths as fp
        join tmpDirnames as td using(filePathId)
        join Dirnames as d on td.dirname = d.dirname
        join Basenames as b on td.basename =  b.basename """)
        cu.execute("drop table oldFilePaths")
        cu.execute("drop table tmpDirnames")
        # fix the autoincrement primary key value on the new FilePaths
        cu.execute("select max(filePathId) from FilePaths")
        maxId = cu.fetchone()[0]
        if maxId:
            self.db.setAutoIncrement("FilePaths", "filePathId", maxId)
        self.db.analyze("FilePaths")

        # re-enable the FK constraint and create indexes
        logMe(3, "adding foreign key constraints...")
        self.db.addForeignKey("TroveFiles", "filePathId", "FilePaths", "filePathId")
        self.db.analyze("TroveFiles")
        schema.createTroves(self.db)
Example #6
0
 def fixDuplicatePaths(self, repos):
     logMe(2, "checking database for duplicate path entries...")
     cu = self.db.cursor()
     # we'll have to do a full table scan on TroveFiles. no way
     # around it...
     cu.execute("""
     create temporary table tmpDups(
         instanceId integer,
         path %(PATHTYPE)s,
         counter integer
     ) %(TABLEOPTS)s""" % self.db.keywords)
     logMe(2, "looking for troves with duplicate paths...")
     # sqlite has a real challenge dealing with large datasets
     if self.db.driver == 'sqlite':
         cu2 = self.db.cursor()
         cu.execute("select distinct instanceId from TroveFiles")
         # so we split this in very little tasks. lots of them
         for (instanceId,) in cu:
             cu2.execute("""
             insert into tmpDups (instanceId, path, counter)
             select instanceId, path, count(*)
             from TroveFiles
             where instanceId = ?
             group by instanceId, path
             having count(*) > 1""", instanceId)
     else: # other backends should be able to process this in one shot
         cu.execute("""
         insert into tmpDups (instanceId, path, counter)
         select instanceId, path, count(*)
         from TroveFiles
         group by instanceId, path
         having count(*) > 1""")
     counter = cu.execute("select count(*) from tmpDups").fetchall()[0][0]
     if counter > 0:
         # drop the old index, if any
         self.db.loadSchema()
         self.db.dropIndex("TroveFiles", "TroveFilesPathIdx")
     logMe(3, "detected %d duplicates" % (counter,))
     # loop over every duplicate and apply the appropiate fix
     cu.execute("select instanceId, path from tmpDups")
     for (instanceId, path) in cu.fetchall():
         cu.execute("""select distinct
         instanceId, streamId, versionId, pathId, path
         from trovefiles where instanceId = ? and path = ?
         order by streamId, versionId, pathId""", (instanceId, path))
         ret = cu.fetchall()
         # delete all the duplicates and put the first one back
         cu.execute("delete from trovefiles "
                    "where instanceId = ? and path = ?",
                    (instanceId, path))
         # in case they are different, we pick the oldest, chances are it is
         # more "original"
         cu.execute("insert into trovefiles "
                    "(instanceId, streamId, versionId, pathId, path) "
                    "values (?,?,?,?,?)", tuple(ret[0]))
         if len(ret) > 1:
             # need to recompute the sha1 - we might have changed the trove manifest
             # if the records were different
             self.fixTroveSig(repos, instanceId)
     # recreate the indexes and triggers - including new path
     # index for TroveFiles.  Also recreates the indexes table.
     logMe(2, 'Recreating indexes... (this could take a while)')
     cu.execute("drop table tmpDups")
     self.db.loadSchema()
     schema.createTroves(self.db)
     logMe(2, 'Indexes created.')
Example #7
0
 def migrate2(self):
     # fix the dirnames and basenames column types for postgresql
     cu = self.db.cursor()
     if self.db.driver == 'postgresql':
         logMe(2, "fixing column types for pathfields")
         cu.execute(
             "create table saveDirnames as select dirnameId, dirname from Dirnames"
         )
         cu.execute(
             "create table saveBasenames as select basenameId, basename from Basenames"
         )
         cu.execute(
             "create table savePrefixes as select dirnameId, prefixId from Prefixes"
         )
         self.db.dropForeignKey("FilePaths", "dirnameId")
         self.db.dropForeignKey("FilePaths", "basenameId")
         cu.execute("drop table Prefixes")
         cu.execute("drop table Dirnames")
         cu.execute("drop table Basenames")
         self.db.loadSchema()
         schema.createTroves(self.db, createIndex=False)
         cu.execute("select dirnameId, dirname from saveDirnames")
         self.db.bulkload("Dirnames",
                          ((x[0], cu.binary(x[1])) for x in cu.fetchall()),
                          ["dirnameId", "dirname"])
         cu.execute("select basenameId, basename from saveBasenames")
         self.db.bulkload("Basenames",
                          ((x[0], cu.binary(x[1])) for x in cu.fetchall()),
                          ["basenameId", "basename"])
         cu.execute("insert into Prefixes(dirnameId, prefixId) "
                    "select dirnameId, prefixId from savePrefixes")
         schema.createTroves(self.db, createIndex=True)
         self.db.addForeignKey("FilePaths", "dirnameId", "Dirnames",
                               "dirnameId")
         self.db.addForeignKey("FilePaths", "basenameId", "Basenames",
                               "basenameId")
         cu.execute("drop table saveDirnames")
         cu.execute("drop table saveBasenames")
         cu.execute("drop table savePrefixes")
         self.db.analyze("Dirnames")
         self.db.analyze("Basenames")
         self.db.setAutoIncrement("Dirnames", "dirnameId")
         self.db.setAutoIncrement("Basenames", "basenameId")
     # fix the missing dirnames/prefixes links
     schema.setupTempTables(self.db)
     logMe(2, "looking for missing dirnames/prefixes links")
     cu = self.db.cursor()
     cu.execute("""select distinct d.dirnameId, d.dirname
     from Dirnames as d
     join ( select fp.dirnameId as dirnameId
            from FilePaths as fp
            left join Prefixes as p using(dirnameId)
            where p.dirnameId is null ) as dq using(dirnameId) """)
     ret = cu.fetchall()
     if ret:
         logMe(
             2, "fixing missing dirnames/prefixes links in %d dirnames" %
             (len(ret), ))
         trovestore.addPrefixesFromList(self.db, ret)
     self.db.analyze("Prefixes")
     return True
Example #8
0
    def _createFilePaths(self):
        logMe(2, "splitting paths in dirnames and basenames")
        cu = self.db.cursor()
        cu.execute("""
        create table tmpDirnames (
            filePathId %(PRIMARYKEY)s,
            dirname %(PATHTYPE)s,
            basename %(PATHTYPE)s
        ) """ % self.db.keywords)
        # save a copy of FilePaths before updating the table definition
        cu.execute("""create table oldFilePaths as
        select filePathId, pathId, path from FilePaths""")
        self.db.createIndex("oldFilePaths",
                            "oldFilePathsIdx",
                            "filePathId",
                            check=False,
                            unique=True)
        self.db.analyze("oldFilePaths")

        # drop the FK constraint from TroveFiles into FilePaths
        self.db.loadSchema()
        self.db.dropForeignKey("TroveFiles", name="TroveFiles_filePathId_fk")
        # create Dirnames, Basenames and the new FilePaths tables
        cu.execute("drop table FilePaths")
        self.db.loadSchema()
        schema.createTroves(self.db, createIndex=False)

        # this is to avoid processing too many entries at once...
        sliceSize = 200000
        analyze = 1
        while True:
            cu.execute(
                """
            select fp.filePathId, fp.path from oldFilePaths as fp
            left join tmpDirnames as d using(filePathId)
            where d.filePathId is null limit ?""", sliceSize)
            tmpl = [(_fpid, os.path.split(_path))
                    for _fpid, _path in cu.fetchall()]
            if not tmpl:
                break  # no more entries found
            self.db.bulkload("tmpDirnames",
                             [(x[0], x[1][0], x[1][1]) for x in tmpl],
                             ["filePathId", "dirname", "basename"])
            # don't analyze too often for speed reasons
            analyze -= 1
            if not analyze:
                analyze = 5
                self.db.analyze("tmpDirnames")
        self.db.createIndex("tmpDirnames",
                            "tmpDirnames_dirname_idx",
                            "dirname",
                            check=False)
        logMe(2, "extracting unique dirnames and basenames...")
        self.db.analyze("tmpDirnames")
        # the '' and '/' dirnames should already be in the Dirnames table
        cu.execute("""
        insert into Dirnames(dirname)
        select distinct dirname from tmpDirnames
        order by dirname """)
        self.db.analyze("Dirnames")
        cu.execute("""
        insert into Basenames(basename)
        select distinct basename from tmpDirnames
        order by basename """)
        self.db.analyze("Basenames")

        logMe(2, "generating the new FilePaths table...")
        cu.execute(
            """insert into FilePaths(filePathId, dirnameId, basenameId, pathId)
        select fp.filePathId, d.dirnameId, b.basenameId, fp.pathId
        from oldFilePaths as fp
        join tmpDirnames as td using(filePathId)
        join Dirnames as d on td.dirname = d.dirname
        join Basenames as b on td.basename =  b.basename """)
        cu.execute("drop table oldFilePaths")
        cu.execute("drop table tmpDirnames")
        # fix the autoincrement primary key value on the new FilePaths
        cu.execute("select max(filePathId) from FilePaths")
        maxId = cu.fetchone()[0]
        if maxId:
            self.db.setAutoIncrement("FilePaths", "filePathId", maxId)
        self.db.analyze("FilePaths")

        # re-enable the FK constraint and create indexes
        logMe(3, "adding foreign key constraints...")
        self.db.addForeignKey("TroveFiles", "filePathId", "FilePaths",
                              "filePathId")
        self.db.analyze("TroveFiles")
        schema.createTroves(self.db)
Example #9
0
 def fixDuplicatePaths(self, repos):
     logMe(2, "checking database for duplicate path entries...")
     cu = self.db.cursor()
     # we'll have to do a full table scan on TroveFiles. no way
     # around it...
     cu.execute("""
     create temporary table tmpDups(
         instanceId integer,
         path %(PATHTYPE)s,
         counter integer
     ) %(TABLEOPTS)s""" % self.db.keywords)
     logMe(2, "looking for troves with duplicate paths...")
     # sqlite has a real challenge dealing with large datasets
     if self.db.driver == 'sqlite':
         cu2 = self.db.cursor()
         cu.execute("select distinct instanceId from TroveFiles")
         # so we split this in very little tasks. lots of them
         for (instanceId, ) in cu:
             cu2.execute(
                 """
             insert into tmpDups (instanceId, path, counter)
             select instanceId, path, count(*)
             from TroveFiles
             where instanceId = ?
             group by instanceId, path
             having count(*) > 1""", instanceId)
     else:  # other backends should be able to process this in one shot
         cu.execute("""
         insert into tmpDups (instanceId, path, counter)
         select instanceId, path, count(*)
         from TroveFiles
         group by instanceId, path
         having count(*) > 1""")
     counter = cu.execute("select count(*) from tmpDups").fetchall()[0][0]
     if counter > 0:
         # drop the old index, if any
         self.db.loadSchema()
         self.db.dropIndex("TroveFiles", "TroveFilesPathIdx")
     logMe(3, "detected %d duplicates" % (counter, ))
     # loop over every duplicate and apply the appropiate fix
     cu.execute("select instanceId, path from tmpDups")
     for (instanceId, path) in cu.fetchall():
         cu.execute(
             """select distinct
         instanceId, streamId, versionId, pathId, path
         from trovefiles where instanceId = ? and path = ?
         order by streamId, versionId, pathId""", (instanceId, path))
         ret = cu.fetchall()
         # delete all the duplicates and put the first one back
         cu.execute(
             "delete from trovefiles "
             "where instanceId = ? and path = ?", (instanceId, path))
         # in case they are different, we pick the oldest, chances are it is
         # more "original"
         cu.execute(
             "insert into trovefiles "
             "(instanceId, streamId, versionId, pathId, path) "
             "values (?,?,?,?,?)", tuple(ret[0]))
         if len(ret) > 1:
             # need to recompute the sha1 - we might have changed the trove manifest
             # if the records were different
             self.fixTroveSig(repos, instanceId)
     # recreate the indexes and triggers - including new path
     # index for TroveFiles.  Also recreates the indexes table.
     logMe(2, 'Recreating indexes... (this could take a while)')
     cu.execute("drop table tmpDups")
     self.db.loadSchema()
     schema.createTroves(self.db)
     logMe(2, 'Indexes created.')