def fetch(self): """Download and extract the dataset.""" home = self.home() if not os.path.exists(home): os.makedirs(home) for b in self.S3_ARCHIVES: if len(b) == 2: base, sha1 = b dirn = home else: base, sha1, dn = b dirn = os.path.join(home, dn) archive_filename = os.path.join(home, base.split("/")[-1]) if not os.path.exists(archive_filename): # credentials have to be properly set in ~/.boto # or environment variables url = "http://pics-from-sam.s3.amazonaws.com/" + base print("downloading %s" % url) download_boto(url, self.credentials, archive_filename, sha1=sha1) extract(archive_filename, dirn, sha1=sha1, verbose=True) for x, sha1 in self.S3_FILES: dirn = os.path.join(home, "/".join(x.split("/")[:-1])) if not os.path.isdir(dirn): os.makedirs(dirn) filename = os.path.join(home, x) if not os.path.exists(filename): url = "http://pics-from-sam.s3.amazonaws.com/" + x print("downloading %s" % url) download_boto(url, self.credentials, filename, sha1=sha1)
def install(self, local_fer2013): """ Verify SHA1 and copy given file into .skdata cache directory. """ verify_sha1(local_fer2013, TGZ_SHA1) if not os.path.isdir(self.home()): os.makedirs(self.home()) lock = lockfile.FileLock(self.home()) if lock.is_locked(): logger.warn('%s is locked, waiting for release' % self.home()) with lock: shutil.copyfile(local_fer2013, self.home(TGZ_FILENAME)) extract(self.home(TGZ_FILENAME), self.home())