예제 #1
0
    def fetch(self):
        """Download and extract the dataset."""

        home = self.home()

        lock = lockfile.FileLock(home)
        if lock.is_locked():
            log.warn('%s is locked, waiting for release' % home)

        with lock:
            # -- download pair labels
            for fname, sha1 in PAIRS_FILENAMES:
                url = path.join(PAIRS_BASE_URL, fname)
                basename = path.basename(url)
                filename = path.join(home, basename)
                if not path.exists(filename):
                    if not path.exists(home):
                        os.makedirs(home)
                    download(url, filename, sha1=sha1)

            # -- download and extract images
            url = self.URL
            sha1 = self.SHA1
            output_dirname = self.home('images')
            if not path.exists(output_dirname):
                os.makedirs(output_dirname)

            # -- various disruptions might cause this to fail
            #    but if any process gets as far as writing the completion
            #    marker, then it should be all good.
            done_marker = os.path.join(output_dirname, 'completion_marker')
            if not path.exists(done_marker):
                download_and_extract(url, output_dirname, sha1=sha1)
                open(done_marker, 'w').close()
예제 #2
0
파일: dataset.py 프로젝트: Afey/skdata
    def fetch(self, download_if_missing=True):
        """Download and extract the dataset."""

        home = self.home()

        if not download_if_missing:
            raise IOError("'%s' exists!" % home)

        lock = lockfile.FileLock(home)
        if lock.is_locked():
            log.warn('%s is locked, waiting for release' % home)

        with lock:
            for fkey, (fname, sha1) in self.FILES.iteritems():
                url = path.join(BASE_URL, fname)
                basename = path.basename(url)
                archive_filename = self.home(basename)
                marker = self.home(basename + '.marker')
                
                if ('extra' not in url) or self.need_extra:
                    if not path.exists(marker):
                        if not download_if_missing:
                            return
                        if not path.exists(home):
                            os.makedirs(home)
                        download(url, archive_filename, sha1=sha1)
                        open(marker, 'w').close()
예제 #3
0
파일: dataset.py 프로젝트: Afey/skdata
    def fetch(self, download_if_missing=True):
        """Download and extract the dataset."""

        home = self.home()

        if not os.path.exists(home):
            if download_if_missing:
                raise NotImplementedError()
            else:
                raise IOError("'%s' does not exists!" % home)

        for filename, url in urls.items():
            download(url, self.home(filename), md5=md5s[filename])

        return  # XXX REST IS CUT AND PASTE FROM ELSEWHERE

        for fkey, (fname, sha1) in self.FILES.iteritems():
            url = path.join(BASE_URL, fname)
            basename = path.basename(url)
            archive_filename = path.join(home, basename)
            if not path.exists(archive_filename):
                if not download_if_missing:
                    return
                if not path.exists(home):
                    os.makedirs(home)
                download(url, archive_filename, sha1=sha1)
예제 #4
0
    def fetch(self, download_if_missing=True):
        """Download and extract the dataset."""

        home = self.home()

        if not os.path.exists(home):
            if download_if_missing:
                raise NotImplementedError()
            else:
                raise IOError("'%s' does not exists!" % home)

        for filename, url in urls.items():
            download(url, self.home(filename), md5=md5s[filename])

        return  # XXX REST IS CUT AND PASTE FROM ELSEWHERE

        for fkey, (fname, sha1) in self.FILES.iteritems():
            url = path.join(BASE_URL, fname)
            basename = path.basename(url)
            archive_filename = path.join(home, basename)
            if not path.exists(archive_filename):
                if not download_if_missing:
                    return
                if not path.exists(home):
                    os.makedirs(home)
                download(url, archive_filename, sha1=sha1)
예제 #5
0
    def fetch(self, download_if_missing=True):
        """Download and extract the dataset."""

        home = self.home()

        if not download_if_missing:
            raise IOError("'%s' exists!" % home)

        lock = lockfile.FileLock(home)
        if lock.is_locked():
            log.warn('%s is locked, waiting for release' % home)

        with lock:
            for fkey, (fname, sha1) in self.FILES.iteritems():
                url = path.join(BASE_URL, fname)
                basename = path.basename(url)
                archive_filename = self.home(basename)
                marker = self.home(basename + '.marker')

                if ('extra' not in url) or self.need_extra:
                    if not path.exists(marker):
                        if not download_if_missing:
                            return
                        if not path.exists(home):
                            os.makedirs(home)
                        download(url, archive_filename, sha1=sha1)
                        open(marker, 'w').close()
예제 #6
0
파일: dataset.py 프로젝트: vincentcr/skdata
    def fetch(self, download_if_missing=True):
        """Download and extract the dataset."""

        home = self.home()

        if not download_if_missing:
            raise IOError("'%s' exists!" % home)

        for fkey, (fname, sha1) in self.FILES.iteritems():
            url = path.join(BASE_URL, fname)
            basename = path.basename(url)
            archive_filename = path.join(home, basename)
            if not path.exists(archive_filename):
                if not download_if_missing:
                    return
                if not path.exists(home):
                    os.makedirs(home)
                download(url, archive_filename, sha1=sha1)
예제 #7
0
    def fetch(self, download_if_missing=True):
        """Download and extract the dataset."""

        home = self.home()

        if not download_if_missing:
            raise IOError("'%s' exists!" % home)

        for fkey, (fname, sha1) in self.FILES.iteritems():
            url = path.join(BASE_URL, fname)
            basename = path.basename(url)
            archive_filename = path.join(home, basename)
            if not path.exists(archive_filename):
                if not download_if_missing:
                    return
                if not path.exists(home):
                    os.makedirs(home)
                download(url, archive_filename, sha1=sha1)
    def fetch(self, download_if_missing=True):
        home = self.home()

        if not download_if_missing:
            raise IOError("'%s' exists!" % home)

        # download archive
        url = self.URL
        sha1 = self.SHA1
        basename = os.path.basename(url)
        archive_filename = os.path.join(home, basename)
        if not os.path.exists(archive_filename):
            if not download_if_missing:
                return
            if not os.path.exists(home):
                os.makedirs(home)
            download(url, archive_filename, sha1=sha1)

        # extract it
        if not os.path.exists(self.home(self.SUBDIR)):
            extract(archive_filename, home, sha1=sha1, verbose=True)
예제 #9
0
    def fetch(self, download_if_missing=True):
        if not download_if_missing:
            return
        if not os.path.exists(self.home()):
            os.makedirs(self.home())

        def checkmd5md5():
            md5sums = open(self.home('md5sums'), 'rb').read()
            md5md5 = hashlib.md5(md5sums).hexdigest()
            if md5md5 != 'da55092603cb2628e91e759aec79f654':
                print 'Re-downloading corrupt md5sums file'
                download(self.BASE_URL + 'md5sums', self.home('md5sums'))

        try:
            checkmd5md5()
        except IOError:
            download(self.BASE_URL + 'md5sums', self.home('md5sums'))
            checkmd5md5()

        meta = self._get_meta()
        for ii, item in enumerate(meta):
            if self.n_item_limit is None:
                required = True
            else:
                required = ii < self.n_item_limit
            try:
                data = open(self.home(item['basename']), 'rb').read()
                if hashlib.md5(data).hexdigest() != item['md5']:
                    # -- ignore 'required' flag for incorrect files
                    print 'Re-downloading incorrect file', item['basename']
                    download(self.BASE_URL + item['basename'],
                             self.home(item['basename']),
                             md5=item['md5'])
                    # TODO: catch ctrl-C, check md5,
                    # and remove partial download
            except IOError:
                if required:
                    download(self.BASE_URL + item['basename'],
                             self.home(item['basename']),
                             md5=item['md5'])
예제 #10
0
파일: dataset.py 프로젝트: Afey/skdata
    def fetch(self, download_if_missing=True):
        if not download_if_missing:
            return
        if not os.path.exists(self.home()):
            os.makedirs(self.home())

        def checkmd5md5():
            md5sums = open(self.home('md5sums'), 'rb').read()
            md5md5 = hashlib.md5(md5sums).hexdigest()
            if md5md5 != 'da55092603cb2628e91e759aec79f654':
                print 'Re-downloading corrupt md5sums file'
                download(self.BASE_URL + 'md5sums', self.home('md5sums'))
        try:
            checkmd5md5()
        except IOError:
            download(self.BASE_URL + 'md5sums', self.home('md5sums'))
            checkmd5md5()

        meta = self._get_meta()
        for ii, item in enumerate(meta):
            if self.n_item_limit is None:
                required = True
            else:
                required = ii < self.n_item_limit
            try:
                data = open(self.home(item['basename']), 'rb').read()
                if hashlib.md5(data).hexdigest() != item['md5']:
                    # -- ignore 'required' flag for incorrect files
                    print 'Re-downloading incorrect file', item['basename']
                    download(self.BASE_URL + item['basename'],
                             self.home(item['basename']),
                             md5=item['md5'])
                    # TODO: catch ctrl-C, check md5,
                    # and remove partial download
            except IOError:
                if required:
                    download(self.BASE_URL + item['basename'],
                             self.home(item['basename']),
                             md5=item['md5'])
예제 #11
0
파일: dataset.py 프로젝트: Afey/skdata
 def checkmd5md5():
     md5sums = open(self.home('md5sums'), 'rb').read()
     md5md5 = hashlib.md5(md5sums).hexdigest()
     if md5md5 != 'da55092603cb2628e91e759aec79f654':
         print 'Re-downloading corrupt md5sums file'
         download(self.BASE_URL + 'md5sums', self.home('md5sums'))
예제 #12
0
 def checkmd5md5():
     md5sums = open(self.home('md5sums'), 'rb').read()
     md5md5 = hashlib.md5(md5sums).hexdigest()
     if md5md5 != 'da55092603cb2628e91e759aec79f654':
         print 'Re-downloading corrupt md5sums file'
         download(self.BASE_URL + 'md5sums', self.home('md5sums'))