def fetch(self): """Download and extract the dataset.""" home = self.home() lock = lockfile.FileLock(home) if lock.is_locked(): log.warn('%s is locked, waiting for release' % home) with lock: # -- download pair labels for fname, sha1 in PAIRS_FILENAMES: url = path.join(PAIRS_BASE_URL, fname) basename = path.basename(url) filename = path.join(home, basename) if not path.exists(filename): if not path.exists(home): os.makedirs(home) download(url, filename, sha1=sha1) # -- download and extract images url = self.URL sha1 = self.SHA1 output_dirname = self.home('images') if not path.exists(output_dirname): os.makedirs(output_dirname) # -- various disruptions might cause this to fail # but if any process gets as far as writing the completion # marker, then it should be all good. done_marker = os.path.join(output_dirname, 'completion_marker') if not path.exists(done_marker): download_and_extract(url, output_dirname, sha1=sha1) open(done_marker, 'w').close()
def fetch(self, download_if_missing=True): """Download and extract the dataset.""" home = self.home() if not download_if_missing: raise IOError("'%s' exists!" % home) lock = lockfile.FileLock(home) if lock.is_locked(): log.warn('%s is locked, waiting for release' % home) with lock: for fkey, (fname, sha1) in self.FILES.iteritems(): url = path.join(BASE_URL, fname) basename = path.basename(url) archive_filename = self.home(basename) marker = self.home(basename + '.marker') if ('extra' not in url) or self.need_extra: if not path.exists(marker): if not download_if_missing: return if not path.exists(home): os.makedirs(home) download(url, archive_filename, sha1=sha1) open(marker, 'w').close()
def fetch(self, download_if_missing=True): """Download and extract the dataset.""" home = self.home() if not os.path.exists(home): if download_if_missing: raise NotImplementedError() else: raise IOError("'%s' does not exists!" % home) for filename, url in urls.items(): download(url, self.home(filename), md5=md5s[filename]) return # XXX REST IS CUT AND PASTE FROM ELSEWHERE for fkey, (fname, sha1) in self.FILES.iteritems(): url = path.join(BASE_URL, fname) basename = path.basename(url) archive_filename = path.join(home, basename) if not path.exists(archive_filename): if not download_if_missing: return if not path.exists(home): os.makedirs(home) download(url, archive_filename, sha1=sha1)
def fetch(self, download_if_missing=True): """Download and extract the dataset.""" home = self.home() if not download_if_missing: raise IOError("'%s' exists!" % home) for fkey, (fname, sha1) in self.FILES.iteritems(): url = path.join(BASE_URL, fname) basename = path.basename(url) archive_filename = path.join(home, basename) if not path.exists(archive_filename): if not download_if_missing: return if not path.exists(home): os.makedirs(home) download(url, archive_filename, sha1=sha1)
def fetch(self, download_if_missing=True): home = self.home() if not download_if_missing: raise IOError("'%s' exists!" % home) # download archive url = self.URL sha1 = self.SHA1 basename = os.path.basename(url) archive_filename = os.path.join(home, basename) if not os.path.exists(archive_filename): if not download_if_missing: return if not os.path.exists(home): os.makedirs(home) download(url, archive_filename, sha1=sha1) # extract it if not os.path.exists(self.home(self.SUBDIR)): extract(archive_filename, home, sha1=sha1, verbose=True)
def fetch(self, download_if_missing=True): if not download_if_missing: return if not os.path.exists(self.home()): os.makedirs(self.home()) def checkmd5md5(): md5sums = open(self.home('md5sums'), 'rb').read() md5md5 = hashlib.md5(md5sums).hexdigest() if md5md5 != 'da55092603cb2628e91e759aec79f654': print 'Re-downloading corrupt md5sums file' download(self.BASE_URL + 'md5sums', self.home('md5sums')) try: checkmd5md5() except IOError: download(self.BASE_URL + 'md5sums', self.home('md5sums')) checkmd5md5() meta = self._get_meta() for ii, item in enumerate(meta): if self.n_item_limit is None: required = True else: required = ii < self.n_item_limit try: data = open(self.home(item['basename']), 'rb').read() if hashlib.md5(data).hexdigest() != item['md5']: # -- ignore 'required' flag for incorrect files print 'Re-downloading incorrect file', item['basename'] download(self.BASE_URL + item['basename'], self.home(item['basename']), md5=item['md5']) # TODO: catch ctrl-C, check md5, # and remove partial download except IOError: if required: download(self.BASE_URL + item['basename'], self.home(item['basename']), md5=item['md5'])
def checkmd5md5(): md5sums = open(self.home('md5sums'), 'rb').read() md5md5 = hashlib.md5(md5sums).hexdigest() if md5md5 != 'da55092603cb2628e91e759aec79f654': print 'Re-downloading corrupt md5sums file' download(self.BASE_URL + 'md5sums', self.home('md5sums'))