def collect_individual_checksums(self): """This step grabs all of the small checksums files for the release, filters out any unwanted files from within them, and adds the remainder to self.checksums for subsequent steps to use.""" bucket = self._get_bucket() self.info("File prefix is: {}".format(self.file_prefix)) # Temporary holding place for checksums raw_checksums = [] def worker(item): self.debug("Downloading {}".format(item)) # TODO: It would be nice to download the associated .asc file # and verify against it. sums = bucket.get_key(item).get_contents_as_string() raw_checksums.append(sums) def find_checksums_files(): self.info("Getting key names from bucket") checksum_files = {"beets": [], "checksums": []} for key in bucket.list(prefix=self.file_prefix): if key.key.endswith(".checksums"): self.debug("Found checksums file: {}".format(key.key)) checksum_files["checksums"].append(key.key) elif key.key.endswith(".beet"): self.debug("Found beet file: {}".format(key.key)) checksum_files["beets"].append(key.key) else: self.debug("Ignoring non-checksums file: {}".format( key.key)) if checksum_files["beets"]: self.log("Using beet format") return checksum_files["beets"] else: self.log("Using checksums format") return checksum_files["checksums"] pool = ThreadPool(self.config["parallelization"]) pool.map(worker, find_checksums_files()) for c in raw_checksums: for f, info in parse_checksums_file(c).iteritems(): for pattern in self.config["includes"]: if re.search(pattern, f): if f in self.checksums: self.fatal( "Found duplicate checksum entry for {}, don't know which one to pick." .format(f)) if not set(self.config["formats"]) <= set( info["hashes"]): self.fatal( "Missing necessary format for file {}".format( f)) self.debug("Adding checksums for file: {}".format(f)) self.checksums[f] = info break else: self.debug("Ignoring checksums for file: {}".format(f))
def collect_individual_checksums(self): """This step grabs all of the small checksums files for the release, filters out any unwanted files from within them, and adds the remainder to self.checksums for subsequent steps to use.""" from boto.s3.connection import S3Connection bucket_name = self._get_bucket_name() file_prefix = self._get_file_prefix() self.info("Bucket name is: {}".format(bucket_name)) self.info("File prefix is: {}".format(file_prefix)) self.info("Connecting to S3") conn = S3Connection(anon=True) self.debug("Successfully connected to S3") candidates = conn.get_bucket(bucket_name) # Temporary holding place for checksums raw_checksums = [] def worker(item): self.debug("Downloading {}".format(item)) # TODO: It would be nice to download the associated .asc file # and verify against it. sums = candidates.get_key(item).get_contents_as_string() raw_checksums.append(sums) def find_checksums_files(): self.info("Getting key names from bucket") for key in candidates.list(prefix=file_prefix): if key.key.endswith(".checksums"): self.debug("Found checksums file: {}".format(key.key)) yield key.key else: self.debug("Ignoring non-checksums file: {}".format( key.key)) pool = ThreadPool(self.config["parallelization"]) pool.map(worker, find_checksums_files()) for c in raw_checksums: for f, info in parse_checksums_file(c).iteritems(): for pattern in self.config["includes"]: if re.search(pattern, f): if f in self.checksums: self.fatal( "Found duplicate checksum entry for {}, don't know which one to pick." .format(f)) if not set(self.config["formats"]) <= set( info["hashes"]): self.fatal( "Missing necessary format for file {}".format( f)) self.debug("Adding checksums for file: {}".format(f)) self.checksums[f] = info break else: self.debug("Ignoring checksums for file: {}".format(f))
def collect_individual_checksums(self): """This step grabs all of the small checksums files for the release, filters out any unwanted files from within them, and adds the remainder to self.checksums for subsequent steps to use.""" bucket = self._get_bucket() self.info("File prefix is: {}".format(self.file_prefix)) # Temporary holding place for checksums raw_checksums = [] def worker(item): self.debug("Downloading {}".format(item)) # TODO: It would be nice to download the associated .asc file # and verify against it. sums = bucket.get_key(item).get_contents_as_string() raw_checksums.append(sums) def find_checksums_files(): self.info("Getting key names from bucket") checksum_files = {"beets": [], "checksums": []} for key in bucket.list(prefix=self.file_prefix): if key.key.endswith(".checksums"): self.debug("Found checksums file: {}".format(key.key)) checksum_files["checksums"].append(key.key) elif key.key.endswith(".beet"): self.debug("Found beet file: {}".format(key.key)) checksum_files["beets"].append(key.key) else: self.debug("Ignoring non-checksums file: {}".format(key.key)) if checksum_files["beets"]: self.log("Using beet format") return checksum_files["beets"] else: self.log("Using checksums format") return checksum_files["checksums"] pool = ThreadPool(self.config["parallelization"]) pool.map(worker, find_checksums_files()) for c in raw_checksums: for f, info in parse_checksums_file(c).iteritems(): for pattern in self.config["includes"]: if re.search(pattern, f): if f in self.checksums: self.fatal("Found duplicate checksum entry for {}, " "don't know which one to pick.".format(f)) if not set(self.config["formats"]) <= set(info["hashes"]): self.fatal("Missing necessary format for file {}".format(f)) self.debug("Adding checksums for file: {}".format(f)) self.checksums[f] = info break else: self.debug("Ignoring checksums for file: {}".format(f))
def collect_individual_checksums(self): """This step grabs all of the small checksums files for the release, filters out any unwanted files from within them, and adds the remainder to self.checksums for subsequent steps to use.""" from boto.s3.connection import S3Connection bucket_name = self._get_bucket_name() file_prefix = self._get_file_prefix() self.info("Bucket name is: {}".format(bucket_name)) self.info("File prefix is: {}".format(file_prefix)) self.info("Connecting to S3") conn = S3Connection(anon=True) self.debug("Successfully connected to S3") candidates = conn.get_bucket(bucket_name) # Temporary holding place for checksums raw_checksums = [] def worker(item): self.debug("Downloading {}".format(item)) # TODO: It would be nice to download the associated .asc file # and verify against it. sums = candidates.get_key(item).get_contents_as_string() raw_checksums.append(sums) def find_checksums_files(): self.info("Getting key names from bucket") for key in candidates.list(prefix=file_prefix): if key.key.endswith(".checksums"): self.debug("Found checksums file: {}".format(key.key)) yield key.key else: self.debug("Ignoring non-checksums file: {}".format(key.key)) pool = ThreadPool(self.config["parallelization"]) pool.map(worker, find_checksums_files()) for c in raw_checksums: for f, info in parse_checksums_file(c).iteritems(): for pattern in self.config["includes"]: if re.search(pattern, f): if f in self.checksums: self.fatal("Found duplicate checksum entry for {}, don't know which one to pick.".format(f)) if not set(self.config["formats"]) <= set(info["hashes"]): self.fatal("Missing necessary format for file {}".format(f)) self.debug("Adding checksums for file: {}".format(f)) self.checksums[f] = info break else: self.debug("Ignoring checksums for file: {}".format(f))