Exemple #1
0
    def test_md5(self):

        from ckcache import new_cache, md5_for_file
        from ckcache.filesystem import make_metadata

        fn = self.make_test_file()

        md5 = md5_for_file(fn)

        cache = new_cache(os.path.join(self.root, 'fscache'))

        cache.put(fn, 'foo1')

        abs_path = cache.path('foo1')

        self.assertEquals(md5, cache.md5('foo1'))

        cache = new_cache(os.path.join(self.root, 'compressioncache'))

        cache.put(fn, 'foo2', metadata=make_metadata(fn))

        abs_path = cache.path('foo2')

        self.assertEquals(md5, cache.md5('foo2'))

        os.remove(fn)
Exemple #2
0
    def test_alt_cache(self):

        from ckcache.multi import AltReadCache


        testfile = self.new_rand_file(os.path.join(self.root, 'testfile'), size=2)

        fs1 = new_cache(dict(dir=os.path.join(self.root, 'fs1')))
        fs2 = new_cache(dict(dir=os.path.join(self.root, 'fs2')))

        fs2.put(testfile, 'fs2', {'foo': 'bar'})

        self.assertFalse(fs1.has('fs2'))
        self.assertTrue(fs2.has('fs2'))

        arc = AltReadCache(fs1, fs2)
        self.assertTrue(arc.has('fs2'))
        self.assertEquals(['/tmp/ckcache-test/fs2'], arc.list()['fs2']['caches'])

        self.assertEquals('/tmp/ckcache-test/fs1/fs2', arc.get('fs2'))

        # Now the fs1 cache should have the file too
        self.assertTrue(fs1.has('fs2'))
        self.assertTrue(fs2.has('fs2'))

        self.assertIn('foo', fs1.metadata('fs2'))
Exemple #3
0
    def test_http(self):

        http_cache = new_cache('http://devtest.sandiegodata.org/jdoc')

        self.assertTrue(bool(http_cache.has('library.json')))
        self.assertFalse(http_cache.has('missing'))

        with http_cache.get_stream('library.json') as f:
            import json
            d = json.load(f)

            self.assertIn('bundles', d.keys())

        file_cache = new_cache(os.path.join(self.root,'fc'))
        file_cache.upstream = http_cache

        self.assertTrue(bool(file_cache.has('library.json')))
        self.assertFalse(file_cache.has('missing'))

        with file_cache.get_stream('library.json') as f:
            import json

            d = json.load(f)

            self.assertIn('bundles', d.keys())
Exemple #4
0
    def test_fallback(self):
        from ckcache import FallbackFlo
        import gzip

        o1 = new_cache(os.path.join(self.root,'o1'))
        o2 = new_cache(os.path.join(self.root, 'o2'))

        testfile = self.make_test_file()

        if not os.path.exists(o1._cache_dir):
            os.makedirs(o1._cache_dir)

        o1.put(testfile,'x')
        o2.put(testfile, 'x')

        with self.assertRaises(IOError):
            with gzip.GzipFile(fileobj=o1.get_stream('x')) as f:
                print f.read()

        ff = FallbackFlo(gzip.GzipFile(fileobj=o1.get_stream('x')), o2.get_stream('x'))

        self.assertEquals(1048576,  len(ff.read()))

        ff = FallbackFlo(gzip.GzipFile(fileobj=o1.get_stream('x')), gzip.GzipFile(fileobj=o2.get_stream('x')))

        with self.assertRaises(IOError):
            self.assertEquals(1048576, len(ff.read()))
Exemple #5
0
    def test_attachment(self):
        from ckcache import new_cache


        testfile = self.new_rand_file(os.path.join(self.root, 'testfile'))

        fs1 = new_cache(dict(dir=os.path.join(self.root, 'fs1')))

        fs3 = new_cache(dict(dir=os.path.join(self.root, 'fs3')))

        fs3.put(testfile, 'tf')
        self.assertTrue(fs3.has('tf'))
        fs3.remove('tf')
        self.assertFalse(fs3.has('tf'))

        # Show that attachment works, and that deletes propagate.
        fs3.attach(fs1)
        fs3.put(testfile, 'tf')
        self.assertTrue(fs3.has('tf'))
        self.assertTrue(fs1.has('tf'))
        fs3.remove('tf', propagate=True)
        self.assertFalse(fs3.has('tf'))
        self.assertFalse(fs1.has('tf'))
        fs3.detach()

        # Show detachment works
        fs3.attach(fs1)
        fs3.put(testfile, 'tf')
        self.assertTrue(fs3.has('tf'))
        self.assertTrue(fs1.has('tf'))
        fs3.detach()
        fs3.remove('tf', propagate=True)
        self.assertFalse(fs3.has('tf'))
        self.assertTrue(fs1.has('tf'))
Exemple #6
0
def _new_library(config):
    from database import LibraryDb
    from ckcache import new_cache

    cache = new_cache(config['filesystem'])

    database = LibraryDb(**dict(config['database']))

    try:
        database.create()
    except OperationalError as e:

        raise DatabaseError('Failed to create {} : {}'.format(database.dsn, e.message))

    if 'upstream' in config:
        raise DeprecationWarning("Upstream no longer allowed in configuration")

    remotes = {}

    for name, remote in config.get('remotes', {}).items():

        try:
            remotes[name] = new_cache(remote)
        except S3ResponseError as e:
            from ..util import get_logger

            logger = get_logger(__name__)
            logger.error('Failed to init cache {} : {}; {} '.format(name, str(remote.bucket), e))

    for i, remote in enumerate(remotes.values()):
        remote.set_priority(i)

    source_dir = config.get('source', None)

    if 'documentation' in config:
        doc_cache = new_cache(config['documentation'])
    else:
        doc_cache = cache.subcache('_doc')

    if 'warehouses' in config:
        warehouse_cache = new_cache(config['warehouses'])
    else:
        warehouse_cache = cache.subcache('warehouses')

    l = Library(cache=cache,
                doc_cache=doc_cache,
                warehouse_cache=warehouse_cache,
                database=database,
                name=config['_name'] if '_name' in config else 'NONE',
                remotes=remotes,
                source_dir=source_dir)

    return l
Exemple #7
0
 def meta_sources(self):
     """Read the data stored in S3 to generate a list of sources. """
     from lxml import etree
     from lxml import objectify
     from ckcache import new_cache
     
     c = new_cache(self.metadata.sources.base.url)
     
     for e in c.list():
         try:
             year, fn = e.split('/')
         except ValueError: # Got s3 directory, but no files
             continue
         
         
         _,year2,dsd,type_,state = fn.strip('.xml').split('_')
         
         key = "{}_{}_{}".format(year, state.lower(), type_.lower())
         
         self.metadata.sources[key] = dict(
             url = c.path(e, public_url=True),
             description = "{} {} {}".format(year, state, type_)
         )
         
         self.update_configuration()
Exemple #8
0
    def test_delay(self):
        from ckcache import parse_cache_string, new_cache, copy_file_or_flo

        testfile = self.new_rand_file(os.path.join(self.root, 'testfile'), size=20)

        config = parse_cache_string('/tmp/ckcache-test/logging#log')

        cache  = new_cache(config)

        print cache

        for i in range(1,5):
            rel_path = 'async_iter/{}'.format(i)
            with cache.put_stream(rel_path) as f:
                copy_file_or_flo(testfile, f)
                print "Wrote to", rel_path

        #self.assertTrue(cache.has(rel_path))

        #print cache.path(rel_path)
        #print cache.upstream.path(rel_path)

        #cache.remove(rel_path)

        #self.assertFalse(cache.has(rel_path))

        for e in cache.list_log():
            print e.path, e.time, e.cache
            e.remove()
Exemple #9
0
    def test_multi_cache(self):
        from ckcache import new_cache
        from ckcache.multi import MultiCache


        testfile = self.new_rand_file(os.path.join(self.root, 'testfile'), size=2)

        fs1 = new_cache(dict(dir=os.path.join(self.root, 'fs1')))
        fs2 = new_cache(dict(dir=os.path.join(self.root, 'fs2')))
        fs3 = new_cache(dict(dir=os.path.join(self.root, 'fs3')))

        caches = [fs1, fs2, fs3]

        for i, cache in enumerate(caches, 1):
            cache.put(testfile, 'fs' + str(i), metadata={'i': i})
            j = (i + 1) % len(caches)

            caches[j].put(testfile, 'fs' + str(i), metadata={'i': i})

        mc = MultiCache(caches)
        ls = mc.list()

        self.assertEqual(3, len(ls))
        self.assertIn('fs1', ls)
        self.assertIn('fs3', ls)

        self.assertIn('/tmp/ckcache-test/fs1', ls['fs1']['caches'])
        self.assertIn('/tmp/ckcache-test/fs3', ls['fs1']['caches'])

        mc2 = MultiCache([fs1, fs2])
        ls = mc2.list()
        self.assertEqual(3, len(ls))
        self.assertIn('fs1', ls)
        self.assertIn('fs3', ls)

        mc.put(testfile, 'mc1')
        ls = mc.list()
        self.assertEqual(4, len(ls))
        self.assertIn('mc1', ls)

        # Put should have gone to first cache
        mc2 = MultiCache([fs2, fs3])
        ls = mc2.list()
        self.assertEqual(3, len(ls))
        self.assertNotIn('mc1', ls)
        self.assertIn('fs1', ls)
        self.assertIn('fs3', ls)
Exemple #10
0
    def test_compression(self):

        from ckcache import new_cache, md5_for_file, copy_file_or_flo

        comp_cache = new_cache(os.path.join(self.root, 'compressioncache#compress'))

        print comp_cache

        test_file_name = 'test_file'

        fn = self.make_test_file()
        cf = comp_cache.put(fn, test_file_name)

        with open(cf) as stream:
            from ckcache.sgzip import GzipFile

            stream = GzipFile(stream)

            uncomp_cache = new_cache(os.path.join(self.root,'uncomp'))

            uncomp_stream = uncomp_cache.put_stream('decomp')

            copy_file_or_flo(stream, uncomp_stream)

        uncomp_stream.close()

        dcf = uncomp_cache.get('decomp')

        self.assertEquals(md5_for_file(fn), md5_for_file(dcf))

        with comp_cache.get_stream(test_file_name) as f:
            print len(f.read())

        with uncomp_cache.get_stream('decomp') as f:
            print len(f.read())


        with comp_cache.put_stream('comp2') as s:
            copy_file_or_flo(fn, s)

        self.assertTrue(comp_cache.has('comp2'))

        os.remove(fn)
Exemple #11
0
    def get_cache_by_name(self, name):
        from dbexceptions import ConfigurationError
        from ckcache import new_cache

        config = self.config.filesystem(name)

        if not config:
            raise ConfigurationError('No filesystem cache by name of {}'.format(name))

        return new_cache(config)
Exemple #12
0
    def get_cache_by_name(self, name):
        from dbexceptions import ConfigurationError
        from ckcache import new_cache

        config = self.config.filesystem(name)

        if not config:
            raise ConfigurationError(
                'No filesystem cache by name of {}'.format(name))

        return new_cache(config)
Exemple #13
0
    def test_s3(self):

        #ambry.util.get_logger('ambry.filesystem').setLevel(logging.DEBUG)
        # Set up the test directory and make some test files. 
        from ckcache import new_cache
        
        root = self.rc.group('filesystem').root
        os.makedirs(root)
                
        testfile = os.path.join(root,'testfile')
        
        with open(testfile,'w+') as f:
            for i in range(1024):
                f.write('.'*1023)
                f.write('\n')
         
        #fs = self.bundle.filesystem
        #local = fs.get_cache('downloads')
        
        cache = new_cache(self.rc.filesystem('s3'))
        repo_dir  = cache.cache_dir
      
        print "Repo Dir: {}".format(repo_dir)
      
        for i in range(0,10):
            global_logger.info("Putting "+str(i))
            cache.put(testfile,'many'+str(i))
        
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many1')))   
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many2')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3')))
                
        p = cache.get('many1')
        self.assertTrue(p is not None)
                
        self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many1')))   
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many2')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3')))
        
        p = cache.get('many2')
        self.assertTrue(p is not None)
                
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3')))      
        self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many7'))) 
 
        p = cache.get('many3')
        self.assertTrue(p is not None)
                
        self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many3')))      
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many7'))) 
Exemple #14
0
    def test_http_compressed(self):
        from ckcache import  copy_file_or_flo

        http_cache = new_cache('http://s3.sandiegodata.org/library')

        for x in  http_cache.list().keys():
            if 'example' in x:
                print x

        x  = http_cache.get_stream('example.com/random-0.0.2.db').__enter__()

        with http_cache.get_stream('example.com/random-0.0.2.db') as f:

            with open('/tmp/foo.db', 'wb') as fout:
                copy_file_or_flo(f, fout)
Exemple #15
0
    def test_s3(self):

        #ambry.util.get_logger('ambry.filesystem').setLevel(logging.DEBUG)
        # Set up the test directory and make some test files.
        from ckcache import new_cache

        root = self.rc.group('filesystem').root
        os.makedirs(root)

        testfile = os.path.join(root, 'testfile')

        with open(testfile, 'w+') as f:
            for i in range(1024):
                f.write('.' * 1023)
                f.write('\n')

        #fs = self.bundle.filesystem
        #local = fs.get_cache('downloads')

        cache = new_cache(self.rc.filesystem('s3'))
        repo_dir = cache.cache_dir

        for i in range(0, 10):
            global_logger.info("Putting " + str(i))
            cache.put(testfile, 'many' + str(i))

        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many1')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many2')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3')))

        p = cache.get('many1')
        self.assertTrue(p is not None)

        self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many1')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many2')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3')))

        p = cache.get('many2')
        self.assertTrue(p is not None)

        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3')))
        self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many7')))

        p = cache.get('many3')
        self.assertTrue(p is not None)

        self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many3')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many7')))
Exemple #16
0
    def test_s3(self):
        try:
            cache = new_cache(self.rc.filesystem('s3'))
        except ConfigurationError:
            raise unittest.SkipTest('devtest.sandiegodata.org bucket is not configured')

        repo_dir = cache.cache_dir

        # Set up the test directory and make some test files.

        root = self.rc.group('filesystem').root
        os.makedirs(root)

        testfile = os.path.join(root, 'testfile')

        with open(testfile, 'w+') as f:
            for i in range(1024):
                f.write('.' * 1023)
                f.write('\n')

        for i in range(0, 10):
            global_logger.info('Putting ' + str(i))
            cache.put(testfile, 'many' + str(i))

        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many1')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many2')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3')))

        p = cache.get('many1')
        self.assertTrue(p is not None)

        self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many1')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many2')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3')))

        p = cache.get('many2')
        self.assertTrue(p is not None)

        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3')))
        self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many7')))

        p = cache.get('many3')
        self.assertTrue(p is not None)

        self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many3')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many7')))
Exemple #17
0
    def test_accounts(self):

        config = dict(
            dir = self.subpath('ta_d1'),
            upstream = dict(
                dir = self.subpath('ta_d2'),
                upstream = 's3://devtest.sandiegodata.org/test'
            )
        )

        c =  new_cache(config)

        with c.put_stream('foobar',metadata=dict(foo='bar')) as f:
            f.write("bar baz")


        self.assertTrue(c.has('foobar'))
Exemple #18
0
    def test_dict(self):
        from ckcache import parse_cache_string, new_cache, copy_file_or_flo
        from ckcache.dictionary import DictCache

        testfile = self.new_rand_file(os.path.join(self.root, 'testfile'), size=20)

        base_cache = new_cache('/tmp/ckcache-test/dict#log')

        d = DictCache(base_cache)

        d[1] = ['one', 1]
        d[2] = ['two', 2]
        d[3] = ['three', 3]

        print d[1]

        for x in d:
            print x, d[x][0]
Exemple #19
0
    def run(self):
        from ckcache import new_cache
        global keep_alive
        global upload_queue

        while(keep_alive and not upload_queue.empty()):

            try:
                (rel_path, cache_string, buffer) = upload_queue.get(False)

                print "Send ", rel_path, cache_string

                cache = new_cache(cache_string)
                with cache.put_stream(rel_path) as s:
                    copy_file_or_flo(buffer, s)

                upload_queue.task_done()

            except Empty:
                break
Exemple #20
0
    def download(self, url, test_f=None, unzip=False):
        """Context manager to download a file, return it for us, and delete it
        when done.

        url may also be a key for the sources metadata


        Will store the downloaded file into the cache defined
        by filesystem.download

        """

        import tempfile
        import urlparse
        import urllib2
        import urllib

        cache = self.get_cache_by_name('downloads')
        parsed = urlparse.urlparse(str(url))

        # If the URL doesn't parse as a URL, then it is a name of a source.
        if (not parsed.scheme and url in self.bundle.metadata.sources):

            source_entry = self.bundle.metadata.sources.get(url)

            # If a conversion exists, load it, otherwize, get the original URL
            if source_entry.conversion:
                url = source_entry.conversion
            else:
                url = source_entry.url
            parsed = urlparse.urlparse(str(url))

        if parsed.scheme == 'file':
            return parsed.path

        elif parsed.scheme == 's3':
            # To keep the rest of the code simple, we'll use the S# cache to generate a signed URL, then
            # download that through the normal process.
            from ckcache import new_cache

            s3cache = new_cache("s3://{}".format(parsed.netloc.strip('/')))

            url = s3cache.path(urllib.unquote_plus(parsed.path.strip('/')))
            parsed = urlparse.urlparse(str(url))
            use_hash = False
        else:
            use_hash = True

        #file_path = parsed.netloc+'/'+urllib.quote_plus(parsed.path.replace('/','_'),'_')
        file_path = os.path.join(parsed.netloc, parsed.path.strip('/'))

        # S3 has time in the query, so it never caches
        if use_hash and parsed.query:
            import hashlib

            hash = hashlib.sha224(parsed.query).hexdigest()
            file_path = os.path.join(file_path, hash)

        file_path = file_path.strip('/')

        # We download to a temp file, then move it into place when
        # done. This allows the code to detect and correct partial
        # downloads.
        download_path = os.path.join(
            tempfile.gettempdir(),
            file_path +
            ".download")

        def test_zip_file(f):
            if not os.path.exists(f):
                raise Exception("Test zip file does not exist: {} ".format(f))

            try:
                with zipfile.ZipFile(f) as zf:
                    return zf.testzip() is None
            except zipfile.BadZipfile:
                return False

        if test_f == 'zip':
            test_f = test_zip_file

        for attempts in range(3):

            if attempts > 0:
                self.bundle.error("Retrying download of {}".format(url))

            cached_file = None
            out_file = None
            excpt = None

            try:

                cached_file = cache.get(file_path)
                size = os.stat(cached_file).st_size if cached_file else None

                if cached_file and size:

                    out_file = cached_file

                    if test_f and not test_f(out_file):
                        cache.remove(file_path, True)
                        raise DownloadFailedError(
                            "Cached Download didn't pass test function " +
                            url)

                else:

                    self.bundle.log("Downloading " + url)
                    self.bundle.log(
                        "  --> " +
                        cache.path(
                            file_path,
                            missing_ok=True))

                    resp = urllib2.urlopen(url)
                    headers = resp.info()  # @UnusedVariable

                    if resp.getcode() is not None and resp.getcode() != 200:
                        raise DownloadFailedError(
                            "Failed to download {}: code: {} ".format(
                                url, resp.getcode()))

                    try:
                        out_file = cache.put(resp, file_path)
                    except:
                        self.bundle.error(
                            "Caught exception, deleting download file")
                        cache.remove(file_path, propagate=True)
                        raise

                    if test_f and not test_f(out_file):
                        cache.remove(file_path, propagate=True)
                        raise DownloadFailedError(
                            "Download didn't pass test function " +
                            url)

                break

            except KeyboardInterrupt:
                print "\nRemoving Files! \n Wait for deletion to complete! \n"
                cache.remove(file_path, propagate=True)
                raise
            except DownloadFailedError as e:
                self.bundle.error("Failed:  " + str(e))
                excpt = e
            except IOError as e:
                self.bundle.error(
                    "Failed to download " +
                    url +
                    " to " +
                    file_path +
                    " : " +
                    str(e))
                excpt = e
            except urllib.ContentTooShortError as e:
                self.bundle.error("Content too short for " + url)
                excpt = e
            except zipfile.BadZipfile as e:
                # Code that uses the yield value -- like th filesystem.unzip method
                # can throw exceptions that will propagate to here. Unexpected, but very useful.
                # We should probably create a FileNotValueError, but I'm lazy.
                self.bundle.error("Got an invalid zip file for " + url)
                cache.remove(file_path, propagate=True)
                excpt = e

            except Exception as e:
                self.bundle.error(
                    "Unexpected download error '" +
                    str(e) +
                    "' when downloading " +
                    str(url))
                cache.remove(file_path, propagate=True)
                raise

        if download_path and os.path.exists(download_path):
            os.remove(download_path)

        if excpt:
            raise excpt

        if unzip:

            if isinstance(unzip, bool):
                return self.unzip(out_file)
            elif unzip == 'dir':
                return self.unzip_dir(out_file)
            else:
                return self.unzip_dir(out_file, regex=unzip)

        else:
            return out_file
Exemple #21
0
    def download(self, url, test_f=None, unzip=False):
        """Context manager to download a file, return it for us, and delete it
        when done.

        url may also be a key for the sources metadata


        Will store the downloaded file into the cache defined
        by filesystem.download

        """

        import tempfile
        import urlparse
        import urllib2
        import urllib

        cache = self.get_cache_by_name('downloads')
        parsed = urlparse.urlparse(str(url))

        # If the URL doesn't parse as a URL, then it is a name of a source.
        if (not parsed.scheme and url in self.bundle.metadata.sources):

            source_entry = self.bundle.metadata.sources.get(url)

            # If a conversion exists, load it, otherwize, get the original URL
            if source_entry.conversion:
                url = source_entry.conversion
            else:
                url = source_entry.url
            parsed = urlparse.urlparse(str(url))

        if parsed.scheme == 'file' or not parsed.scheme:
            return parsed.path

        elif parsed.scheme == 's3':
            # To keep the rest of the code simple, we'll use the S# cache to generate a signed URL, then
            # download that through the normal process.
            from ckcache import new_cache, parse_cache_string

            bucket = parsed.netloc.strip('/')

            cache_url = "s3://{}".format(bucket)

            config = parse_cache_string(cache_url)

            config['account'] = self.config.account(bucket)

            import pprint
            pprint.pprint(config)

            s3cache = new_cache(config)

            url = s3cache.path(urllib.unquote_plus(parsed.path.strip('/')))
            parsed = urlparse.urlparse(str(url))
            use_hash = False
        else:
            use_hash = True

        #file_path = parsed.netloc+'/'+urllib.quote_plus(parsed.path.replace('/','_'),'_')
        file_path = os.path.join(parsed.netloc, parsed.path.strip('/'))

        # S3 has time in the query, so it never caches
        if use_hash and parsed.query:
            import hashlib

            hash = hashlib.sha224(parsed.query).hexdigest()
            file_path = os.path.join(file_path, hash)

        file_path = file_path.strip('/')

        # We download to a temp file, then move it into place when
        # done. This allows the code to detect and correct partial
        # downloads.
        download_path = os.path.join(tempfile.gettempdir(),
                                     file_path + ".download")

        def test_zip_file(f):
            if not os.path.exists(f):
                raise Exception("Test zip file does not exist: {} ".format(f))

            try:
                with zipfile.ZipFile(f) as zf:
                    return zf.testzip() is None
            except zipfile.BadZipfile:
                return False

        if test_f == 'zip':
            test_f = test_zip_file

        for attempts in range(3):

            if attempts > 0:
                self.bundle.error("Retrying download of {}".format(url))

            cached_file = None
            out_file = None
            excpt = None

            try:

                cached_file = cache.get(file_path)
                size = os.stat(cached_file).st_size if cached_file else None

                if cached_file and size:

                    out_file = cached_file

                    if test_f and not test_f(out_file):
                        cache.remove(file_path, True)
                        raise DownloadFailedError(
                            "Cached Download didn't pass test function " + url)

                else:

                    self.bundle.log("Downloading " + url)
                    self.bundle.log("  --> " +
                                    cache.path(file_path, missing_ok=True))

                    resp = urllib2.urlopen(url)
                    headers = resp.info()  # @UnusedVariable

                    if resp.getcode() is not None and resp.getcode() != 200:
                        raise DownloadFailedError(
                            "Failed to download {}: code: {} ".format(
                                url, resp.getcode()))

                    try:
                        out_file = cache.put(resp, file_path)
                    except:
                        self.bundle.error(
                            "Caught exception, deleting download file")
                        cache.remove(file_path, propagate=True)
                        raise

                    if test_f and not test_f(out_file):
                        cache.remove(file_path, propagate=True)
                        raise DownloadFailedError(
                            "Download didn't pass test function " + url)

                break

            except KeyboardInterrupt:
                print "\nRemoving Files! \n Wait for deletion to complete! \n"
                cache.remove(file_path, propagate=True)
                raise
            except DownloadFailedError as e:
                self.bundle.error("Failed:  " + str(e))
                excpt = e
            except IOError as e:
                self.bundle.error("Failed to download " + url + " to " +
                                  file_path + " : " + str(e))
                excpt = e
            except urllib.ContentTooShortError as e:
                self.bundle.error("Content too short for " + url)
                excpt = e
            except zipfile.BadZipfile as e:
                # Code that uses the yield value -- like th filesystem.unzip method
                # can throw exceptions that will propagate to here. Unexpected, but very useful.
                # We should probably create a FileNotValueError, but I'm lazy.
                self.bundle.error("Got an invalid zip file for " + url)
                cache.remove(file_path, propagate=True)
                excpt = e

            except Exception as e:
                self.bundle.error("Unexpected download error '" + str(e) +
                                  "' when downloading " + str(url))
                cache.remove(file_path, propagate=True)
                raise

        if download_path and os.path.exists(download_path):
            os.remove(download_path)

        if excpt:
            raise excpt

        if unzip:

            if isinstance(unzip, bool):
                return self.unzip(out_file)
            elif unzip == 'dir':
                return self.unzip_dir(out_file)
            else:
                return self.unzip_dir(out_file, regex=unzip)

        else:
            return out_file
Exemple #22
0
def _new_library(config):
    from ckcache import new_cache
    from database import LibraryDb
    from sqlalchemy.exc import OperationalError

    cache = new_cache(config['filesystem'])

    database = LibraryDb(**dict(config['database']))

    try:
        database.create()
    except OperationalError as e:
        from ..dbexceptions import DatabaseError

        raise DatabaseError('Failed to create {} : {}'.format(database.dsn,
                                                              e.message))

    if 'upstream' in config:
        raise DeprecationWarning("Upstream no longer allowed in configuration")

    root = config['root']

    remotes =  [ new_cache(remote) for remote in config.get('remotes' ,[])]

    for i,remote in enumerate(remotes):
        remote.set_priority(i)

    source_dir = config.get('source', None)

    hostport = config.get('host', None)

    if hostport:
        if ':' in hostport:
            host, port = hostport.split(':')
        else:
            host = hostport
            port = 80
    else:
        host = None
        port = 80

    if 'documentation' in config:
        doc_cache = new_cache(config['documentation'])
    else:
        doc_cache = cache.subcache('_doc')

    if 'warehouses' in config:
        warehouse_cache = new_cache(config['warehouses'])
    else:
        warehouse_cache = cache.subcache('warehouses')

    l = Library(cache=cache,
                doc_cache = doc_cache,
                warehouse_cache = warehouse_cache,
                database=database,
                name = config['_name'] if '_name' in config else 'NONE',
                remotes=remotes,
                require_upload=config.get('require_upload', None),
                source_dir = source_dir,
                host = host,
                port = port,
                urlhost=config.get('urlhost', None))

    return l
Exemple #23
0
def fscache():
    from ckcache import parse_cache_string, new_cache

    cache_config = parse_cache_string(app_config['cache'])
    return new_cache(cache_config)
Exemple #24
0
 def remove(self):
     from ckcache import new_cache
     cache = new_cache(self.cache)
     cache.remove(self.rel_path)
Exemple #25
0
def _new_library(config):
    from ckcache import new_cache
    from database import LibraryDb
    from sqlalchemy.exc import OperationalError

    cache = new_cache(config['filesystem'])

    database = LibraryDb(**dict(config['database']))

    try:
        database.create()
    except OperationalError as e:
        from ..dbexceptions import DatabaseError

        raise DatabaseError('Failed to create {} : {}'.format(database.dsn,
                                                              e.message))

    if 'upstream' in config:
        raise DeprecationWarning("Upstream no longer allowed in configuration")

    root = config['root']

    remotes =  {  name:new_cache(remote) for name, remote in config.get('remotes', {}).items() }

    for i, remote in enumerate(remotes.values()):
        remote.set_priority(i)

    source_dir = config.get('source', None)

    hostport = config.get('host', None)

    if hostport:
        if ':' in hostport:
            host, port = hostport.split(':')
        else:
            host = hostport
            port = 80
    else:
        host = None
        port = 80

    if 'documentation' in config:
        doc_cache = new_cache(config['documentation'])
    else:
        doc_cache = cache.subcache('_doc')

    if 'warehouses' in config:
        warehouse_cache = new_cache(config['warehouses'])
    else:
        warehouse_cache = cache.subcache('warehouses')

    l = Library(cache=cache,
                doc_cache = doc_cache,
                warehouse_cache = warehouse_cache,
                database=database,
                name = config['_name'] if '_name' in config else 'NONE',
                remotes=remotes,
                require_upload=config.get('require_upload', None),
                source_dir = source_dir,
                host = host,
                port = port,
                urlhost=config.get('urlhost', None))

    return l