Ejemplo n.º 1
0
def PerformClean():
    """Perform the cleaning of files on the local repository."""
    global repositories
    global filesToKeep

    logger.info("## Clean Mode ##")
    print()

    cleanRepositories = []

    # 1. Ensure that the Repositories are actually on disk
    for repository in repositories:
        if os.path.isdir(
                f"{Settings.MirrorPath()}/{SanitiseUri(repository.Uri)}/dists/{repository.Distribution}"
        ):
            cleanRepositories.append(repository)
        else:
            logger.debug(
                f"Repository not found on disk: {SanitiseUri(repository.Uri)} {repository.Distribution}"
            )

    # 2. Get the Release files for each of the Repositories
    releaseFiles = []
    for repository in cleanRepositories:
        releaseFiles += repository.GetReleaseFiles()

    for releaseFile in releaseFiles:
        filesToKeep.append(os.path.normpath(SanitiseUri(releaseFile)))

    # 3. Parse the Release files for the list of Index files that are on Disk
    indexFiles = []
    for repository in cleanRepositories:
        indexFiles += repository.ParseReleaseFilesFromLocalMirror()

    for indexFile in indexFiles:
        filesToKeep.append(os.path.normpath(SanitiseUri(indexFile)))

    # 4. Generate list of all files on disk according to the Index files
    logger.info("Reading all Packages...")
    fileList = []
    for repository in tqdm.tqdm(cleanRepositories,
                                position=0,
                                unit=" repo",
                                desc="Repositories ",
                                leave=False):
        fileList += repository.ParseIndexFilesFromLocalMirror()

    # Packages potentially add duplicates - remove duplicates now
    requiredFiles = []  # type: list[str]
    requiredFiles = list(set(filesToKeep)) + [x.Filename for x in fileList]

    os.chdir(Settings.MirrorPath())

    Clean(cleanRepositories, requiredFiles)
Ejemplo n.º 2
0
def Clean(repositories: list, requiredFiles: list):
    # 5. Determine which files are in the mirror, but not listed in the Index files
    items = []  # type: list[str]
    logger.info("\tCompiling list of files to clean...")
    uris = {repository.Uri for repository in repositories}
    for uri in tqdm.tqdm(uris,
                         position=0,
                         unit=" repo",
                         desc="Repositories ",
                         leave=False):
        walked = []  # type: list[str]
        for root, _, files in tqdm.tqdm(os.walk(SanitiseUri(uri)),
                                        position=1,
                                        unit=" fso",
                                        desc="FSO          ",
                                        leave=False,
                                        delay=0.5):
            for file in tqdm.tqdm(files,
                                  position=2,
                                  unit=" file",
                                  desc="Files        ",
                                  leave=False,
                                  delay=0.5):
                walked.append(os.path.join(root, file))

        logger.debug(f"{SanitiseUri(uri)}: Walked {len(walked)} items")
        items += [
            x for x in walked if os.path.normpath(x) not in requiredFiles
            and not os.path.islink(x)
        ]

    logger.debug(f"Found {len(items)} which can be freed")
    for item in items:
        logger.debug(item)

    # 6. Calculate size of items to clean
    if items:
        logger.info("\tCalculating space savings...")
        clearSize = 0
        for file in tqdm.tqdm(items, unit=" files", leave=False):
            clearSize += os.path.getsize(file)
    else:
        logger.info("\tNo files eligible to clean")
        return

    if Settings.Test():
        logger.info(
            f"\tFound {ConvertSize(clearSize)} in {len(items)} files and directories that could be freed."
        )
        return

    logger.info(
        f"\t{ConvertSize(clearSize)} in {len(items)} files and directories will be freed..."
    )

    # 7. Clean files
    for item in items:
        os.remove(item)
Ejemplo n.º 3
0
    def _ProcessIndex(self, indexRoot: str, index: str, skipUpdateCheck: bool) -> list[Package]:
        """
            Processes each package listed in the Index file.

            For each Package that is found in the Index file,
            it is checked to see whether the file exists in the
            local mirror, and if not, adds it to the collection
            for download.

            If the file does exist, checks based on the filesize
            to determine if the file has been updated.
        """

        packageList = [] # type: list[Package]

        path = SanitiseUri(self.Uri)

        indexFile = Index(f"{indexRoot}/{index}")
        indexFile.Read()
        logging.debug(f"Processing Index file: {indexRoot}/{index}")

        packages = indexFile.GetPackages() # type: list[dict[str,str]]

        mirror = Settings.MirrorPath() + "/" + path
        
        for package in tqdm.tqdm(packages, position=2, unit=" pkgs", desc="Packages     ", leave=False, delay=0.5):
            if "Filename" in package:
                # Packages Index
                filename = package["Filename"]

                if filename.startswith("./"):
                    filename = filename[2:]
                
                packageList.append(Package(os.path.normpath(f"{path}/{filename}"), int(package["Size"]), skipUpdateCheck or not self._NeedUpdate(os.path.normpath(f"{mirror}/{filename}"), int(package["Size"]))))
            else:
                # Sources Index
                for key, value in package.items():
                    if "Files" in key:
                        files = list(filter(None, value.splitlines())) # type: list[str]
                        for file in files:
                            directory = package["Directory"]
                            sourceFile = file.split(" ")

                            size = int(sourceFile[1])
                            filename = sourceFile[2]

                            if filename.startswith("./"):
                                filename = filename[2:]

                            packageList.append(Package(os.path.normpath(f"{path}/{directory}/{filename}"), int(package["Size"]), skipUpdateCheck or not self._NeedUpdate(os.path.normpath(f"{mirror}/{directory}/{filename}"), int(package["Size"]))))

        if [x for x in packageList if not x.Latest]:
            logger.debug(f"Packages to update ({len([x for x in packageList if not x.Latest])}):")
            for pkg in [x.Filename for x in packageList if not x.Latest]:
                logger.debug(f"\t{pkg}")

        return packageList
Ejemplo n.º 4
0
def main(conf: str, test: bool, clean: bool):
    """A tool to mirror Debian Repositories for use as a local mirror."""

    global repositories
    global filesToKeep

    startTime = time.perf_counter()

    ConfigureLogger()

    logger.info("Starting Refrapt process")

    configData = GetConfig(conf)

    # Parse the configuration file
    Settings.Parse(configData)
    logging.getLogger().setLevel(Settings.LogLevel())

    # Ensure that command line argument for Test overrides if it is set in the configuration file
    if test:
        Settings.EnableTest()

    if Settings.Test():
        logger.info("## Running in Test mode ##\n")

    repositories = GetRepositories(configData)

    if not repositories:
        logger.info(
            "No Repositories found in configuration file. Application exiting."
        )
        sys.exit()

    # Create working directories
    Path(Settings.MirrorPath()).mkdir(parents=True, exist_ok=True)
    Path(Settings.SkelPath()).mkdir(parents=True, exist_ok=True)
    Path(Settings.VarPath()).mkdir(parents=True, exist_ok=True)

    Downloader.Init()

    # Change to the Skel directory for working repository structure
    os.chdir(Settings.SkelPath())

    # Check for any "-lock" files.
    for file in os.listdir(Settings.VarPath()):
        if "Download-lock" in file:
            # A download was in progress and interrupted. This means a
            # partial download will be sitting on the drive. Remove
            # it to guarantee that it will be fully downloaded.
            uri = None
            with open(f"{Settings.VarPath()}/{file}") as f:
                uri = f.readline()

            uri = SanitiseUri(uri)
            if os.path.isfile(f"{Settings.MirrorPath()}/{uri}"):
                os.remove(f"{Settings.MirrorPath()}/{uri}")
            elif os.path.isfile(f"{Settings.VarPath()}/{uri}"):
                os.remove(f"{Settings.VarPath()}/{uri}")
            logger.info(f"Removed incomplete download {uri}")
        if appLockFile in file:
            # Refrapt was interrupted during processing.
            # To ensure that files which now may not
            # be marked as Modified due to recently being
            # downloaded, force processing of all files
            logger.info(
                "The previous Refrapt run was interrupted. Full processing will be performed to ensure completeness"
            )
            Settings.SetForce()

    # Delete existing /var files
    logger.info("Removing previous /var files...")
    for item in os.listdir(Settings.VarPath()):
        os.remove(f"{Settings.VarPath()}/{item}")

    # Create a lock file for the Application
    with FileLock(f"{Settings.VarPath()}/{appLockFile}.lock"):
        with open(f"{Settings.VarPath()}/{appLockFile}", "w+") as f:
            pass

        print()
        if clean:
            PerformClean()
        else:
            PerformMirroring()

    # Lock file no longer required
    os.remove(f"{Settings.VarPath()}/{appLockFile}")
    if os.path.isfile(f"{Settings.VarPath()}/{appLockFile}.lock"):
        # Requires manual deletion on Unix
        os.remove(f"{Settings.VarPath()}/{appLockFile}.lock")

    print()
    logger.info(
        f"Refrapt completed in {datetime.timedelta(seconds=round(time.perf_counter() - startTime))}"
    )
Ejemplo n.º 5
0
def PerformMirroring():
    """Perform the main mirroring function of this application."""

    global repositories
    global filesToKeep

    filesToDownload = list([tuple()])  # type: list[tuple[str, int]]
    filesToDownload.clear()

    logger.info(f"Processing {len(repositories)} Repositories...")

    # 1. Get the Release files for each of the Repositories
    releaseFiles = []
    for repository in repositories:
        releaseFiles += repository.GetReleaseFiles()

    logger.debug("Adding Release Files to filesToKeep:")
    for releaseFile in releaseFiles:
        logger.debug(f"\t{SanitiseUri(releaseFile)}")
        filesToKeep.append(os.path.normpath(SanitiseUri(releaseFile)))

    logger.info(
        f"Compiled a list of {len(releaseFiles)} Release files for download")
    Downloader.Download(releaseFiles, UrlType.Release)

    # 2. Parse the Release files for the list of Index files to download
    indexFiles = []
    for repository in repositories:
        indexFiles += repository.ParseReleaseFilesFromRemote()

    logger.debug("Adding Index Files to filesToKeep:")
    for indexFile in indexFiles:
        logger.debug(f"\t{SanitiseUri(indexFile)}")
        filesToKeep.append(os.path.normpath(SanitiseUri(indexFile)))

    print()
    logger.info(
        f"Compiled a list of {len(indexFiles)} Index files for download")
    Downloader.Download(indexFiles, UrlType.Index)

    # Record timestamps of downloaded files to later detemine which files have changed,
    # and therefore need to be processsed
    for repository in repositories:
        repository.Timestamp()

    # 3. Unzip each of the Packages / Sources indices and obtain a list of all files to download
    print()
    logger.info(f"Decompressing Packages / Sources Indices...")
    for repository in tqdm.tqdm(repositories,
                                position=0,
                                unit=" repo",
                                desc="Repositories "):
        repository.DecompressIndexFiles()

    # 4. Parse all Index files (Package or Source) to collate all files that need to be downloaded
    print()
    logger.info("Building file list...")
    for repository in tqdm.tqdm([x for x in repositories if x.Modified],
                                position=0,
                                unit=" repo",
                                desc="Repositories ",
                                leave=False):
        filesToDownload += repository.ParseIndexFiles()

    # Packages potentially add duplicate downloads, slowing down the rest
    # of the process. To counteract, remove duplicates now
    filesToKeep = list(
        set(filesToKeep)) + [x.Filename for x in filesToDownload]

    logger.debug(f"Files to keep: {len(filesToKeep)}")
    for file in filesToKeep:
        logger.debug(f"\t{file}")

    # 5. Perform the main download of Binary and Source files
    downloadSize = ConvertSize(
        sum([x.Size for x in filesToDownload if not x.Latest]))
    logger.info(
        f"Compiled a list of {len([x for x in filesToDownload if not x.Latest])} Binary and Source files of size {downloadSize} for download"
    )

    os.chdir(Settings.MirrorPath())
    if not Settings.Test():
        Downloader.Download([x.Filename for x in filesToDownload],
                            UrlType.Archive)

    # 6. Copy Skel to Main Archive
    if not Settings.Test():
        print()
        logger.info("Copying Skel to Mirror")
        for indexUrl in tqdm.tqdm(filesToKeep, unit=" files"):
            skelFile = f"{Settings.SkelPath()}/{SanitiseUri(indexUrl)}"
            if os.path.isfile(skelFile):
                mirrorFile = f"{Settings.MirrorPath()}/{SanitiseUri(indexUrl)}"
                copy = True
                if os.path.isfile(mirrorFile):
                    # Compare files using Timestamp to save moving files that don't need to be
                    skelTimestamp = os.path.getmtime(Path(skelFile))
                    mirrorTimestamp = os.path.getmtime(Path(mirrorFile))
                    copy = skelTimestamp > mirrorTimestamp

                if copy:
                    os.makedirs(Path(mirrorFile).parent.absolute(),
                                exist_ok=True)
                    shutil.copyfile(skelFile, mirrorFile)

    # 7. Remove any unused files
    print()
    if Settings.CleanEnabled():
        PostMirrorClean()
    else:
        logger.info("Skipping Clean")

    if Settings.Test():
        # Remove Release Files and Index Files added to /skel to ensure normal processing
        # next time the application is run, otherwise the app will think it has all
        # the latest files downloaded, when actually it only has the latest /skel Index files
        print()
        os.chdir(Settings.SkelPath())

        logger.info("Test mode - Removing Release and Index files from /skel")
        for skelFile in releaseFiles + indexFiles:
            file = os.path.normpath(
                f"{Settings.SkelPath()}/{SanitiseUri(skelFile)}")
            if os.path.isfile(file):
                os.remove(file)
Ejemplo n.º 6
0
 def Add(self, component: str, file: str):
     """Add a file to the collection for a given Component."""
     self._sourceCollection[component][SanitiseUri(file)] = Timestamp()
Ejemplo n.º 7
0
 def Add(self, component: str, architecture: str, file: str):
     """Add a file to the collection for a given Component and Architecture."""
     self._packageCollection[component][architecture][SanitiseUri(file)] = Timestamp()
Ejemplo n.º 8
0
    def _ParseReleaseFiles(self, rootPath: str) -> list:
        """
            Get a list of all Index files from the Release file.

            Section 1.2 of the DebianRepository Format document states:
            - "Servers shall provide the InRelease file, and might provide a Release files and its signed counterparts"
            - https://wiki.debian.org/DebianRepository/Format#A.22Release.22_files

            Therefore default to parsing the InRelease file.

            For the purposes of identifying which package indexes are required for download,
            the MD5Sum, SHA1 and SHA256 fields are parsed.

            Section 1.2.10 states:
            - "Those fields shall be multi-line fields containing multiple lines of whitespace separated data. 
               Each line shall contain;
                - The checksum of the file in the format corresponding to the field
                - The size of the file (integer >= 0)
                - The filename relative to the directory of the Release file

              Each datum must be separated by one or more whitespace characters."
            - https://wiki.debian.org/DebianRepository/Format#MD5Sum.2C_SHA1.2C_SHA256
        """

        baseUrl = self._uri + "/"
        if self._components:
            baseUrl += "dists/" + self._distribution + "/"

        inReleaseFilePath = rootPath + "/" + SanitiseUri(baseUrl) + "/InRelease"
        releaseFilePath   = rootPath + "/" + SanitiseUri(baseUrl) + "/Release"

        # Default to InRelease
        releaseFileToRead = inReleaseFilePath

        if not os.path.isfile(inReleaseFilePath):
            # Fall back to Release
            releaseFileToRead = releaseFilePath

        checksums = False

        indexFiles = []

        with open(releaseFileToRead) as f:
            for line in f:
                if ("SHA256:" in line or "SHA1:" in line or "MD5Sum:" in line) and "Hash:" not in line:
                    checksumType = line
                    checksumType = checksumType.replace(":", "").strip()
                    checksums = False

                if checksums:
                    if re.search("^ +(.*)$", line):
                        parts = list(filter(None, line.split(" ")))

                        # parts[0] = checksum
                        # parts[1] = size
                        # parts[2] = filename

                        if not len(parts) == 3:
                            logger.warning(f"Malformed checksum line '{line}' in {releaseFileToRead}")
                            continue

                        checksum = parts[0].strip()
                        filename = parts[2].rstrip()

                        if self._repositoryType == RepositoryType.Bin:
                            for architecture in self._architectures:
                                if Settings.Contents():
                                    if re.match(rf"Contents-{architecture}", filename):
                                        indexFiles.append(f"{baseUrl}{filename}")

                                if self._components:
                                    for component in self._components:
                                        if Settings.Contents():
                                            if re.search(rf"{component}/Contents-{architecture}", filename):
                                                indexFiles.append(f"{baseUrl}{filename}")

                                        binaryByHash = rf"{baseUrl}{component}/binary-{architecture}/by-hash/{checksumType}/{checksum}"

                                        if re.match(rf"{component}/binary-{architecture}/Release", filename):
                                            indexFiles.append(f"{baseUrl}{filename}")
                                            if Settings.ByHash():
                                                indexFiles.append(binaryByHash)

                                        if re.match(rf"{component}/binary-{architecture}/Packages", filename):
                                            indexFiles.append(f"{baseUrl}{filename}")

                                            if re.match(rf"{component}/binary-{architecture}/Packages[^./]*(\.gz|\.bz2|\.xz|$)$", filename):
                                                self._packageCollection.Add(component, architecture, f"{baseUrl}{filename}")
                                            if Settings.ByHash():
                                                indexFiles.append(binaryByHash)

                                        if re.match(rf"{component}/cnf/Commands-{architecture}", filename):
                                            indexFiles.append(f"{baseUrl}{filename}")
                                            if Settings.ByHash():
                                                indexFiles.append(rf"{baseUrl}{component}/cnf/by-hash/{checksumType}/{checksum}")

                                        i18nByHash = rf"{baseUrl}{component}/i18n/by-hash/{checksumType}/{checksum}"

                                        if re.match(rf"{component}/i18n/cnf/Commands-{architecture}", filename):
                                            indexFiles.append(f"{baseUrl}{filename}")
                                            if Settings.ByHash():
                                                indexFiles.append(i18nByHash)

                                        if re.match(rf"{component}/i18n/Index", filename):
                                            indexFiles.append(f"{baseUrl}{filename}")
                                            if Settings.ByHash():
                                                indexFiles.append(i18nByHash)

                                        for language in Settings.Language():
                                            if re.match(rf"{component}/i18n/Translation-{language}", filename):
                                                indexFiles.append(f"{baseUrl}{filename}")
                                                if Settings.ByHash():
                                                    indexFiles.append(i18nByHash)

                                        if re.match(rf"{component}/dep11/(Components-{architecture}\.yml|icons-[^./]+\.tar)", filename):
                                            indexFiles.append(f"{baseUrl}{filename}")
                                            if Settings.ByHash():
                                                indexFiles.append(f"{baseUrl}{component}/dep11/by-hash/{checksumType}/{checksum}")
                                else:
                                    indexFiles.append(f"{baseUrl}{filename}")
                                    self._packageCollection.Add("Flat", architecture, f"{baseUrl}{filename}")

                        elif self._repositoryType == RepositoryType.Src:
                            for component in self._components:
                                if re.match(rf"{component}/source/Release", filename):
                                    indexFiles.append(f"{baseUrl}{filename}")

                                if re.match(rf"{component}/source/Sources[^./]*(\.gz|\.bz2|\.xz|$)$", filename):
                                    indexFiles.append(f"{baseUrl}{filename}")
                                    self._sourceCollection.Add(component, f"{baseUrl}{filename}")
                    else:
                        checksums = False
                else:
                    checksums = "SHA256:" in line or "SHA1:" in line or "MD5Sum:" in line

        if self._repositoryType == RepositoryType.Bin:
            self._packageCollection.DetermineCurrentTimestamps()
        elif self._repositoryType == RepositoryType.Src:
            self._sourceCollection.DetermineCurrentTimestamps()

        for file in indexFiles:
            file = os.path.normpath(file)

        return list(set(indexFiles)) # Remove duplicates caused by reading multiple listings for each checksum type