def testListBucket(self):
        bars = [BUCKET + '/test/bar' + str(i) for i in range(3)]
        foos = [BUCKET + '/test/foo' + str(i) for i in range(3)]
        filenames = bars + foos
        for filename in filenames:
            self.CreateFile(filename)

        bucket = cloudstorage.listbucket(BUCKET + '/test/')
        self.assertEqual(filenames, [stat.filename for stat in bucket])

        bucket = cloudstorage.listbucket(BUCKET + '/test/', max_keys=1)
        stats = list(bucket)
        self.assertEqual(1, len(stats))
        stat = stats[0]
        content = ''.join(DEFAULT_CONTENT)
        self.assertEqual(filenames[0], stat.filename)
        self.assertEqual(len(content), stat.st_size)
        self.assertEqual(hashlib.md5(content).hexdigest(), stat.etag)

        bucket = cloudstorage.listbucket(BUCKET + '/test/',
                                         marker=BUCKET + '/test/foo0',
                                         max_keys=1)
        stats = [stat for stat in bucket]
        self.assertEqual(1, len(stats))
        stat = stats[0]
        self.assertEqual(foos[1], stat.filename)
Beispiel #2
0
    def delete_files_in_folder(self, folder, skip=[]):
        bucketFolder = self.getPath(folder)
        log.info("bucketFolder %s" % bucketFolder)
        startdel = datetime.datetime.now()
        delcount = 0
        page_size = 100
        stats = cloudstorage.listbucket(bucketFolder, max_keys=page_size)
        files = []
        while True:
            count = 0
            for stat in stats:
                count += 1
                fname = stat.filename
                delete = True
                if skip:
                    for s in skip:
                        if s in fname:
                            delete = False
                            log.info("SKIPPING: %s" % fname)
                            break
                if delete:
                    files.append(stat.filename)
                
            for f in files:
                delcount += 1
                self._delete_file(f)

            if count != page_size or count == 0:
                break
            stats = cloudstorage.listbucket(bucketFolder, max_keys=page_size,marker=stat.filename)
            files = []
        
        log.info("Cloudstorage: deleted %s files in %s seconds" % (delcount, (datetime.datetime.now() - startdel)))
        return delcount
    def delete_files_in_folder(self, folder, skip=[]):
        bucketFolder = self.getPath(folder)
        log.info("bucketFolder %s" % bucketFolder)
        startdel = datetime.datetime.now()
        delcount = 0
        page_size = 100
        stats = cloudstorage.listbucket(bucketFolder, max_keys=page_size)
        files = []
        while True:
            count = 0
            for stat in stats:
                count += 1
                fname = stat.filename
                delete = True
                if skip:
                    for s in skip:
                        if s in fname:
                            delete = False
                            log.info("SKIPPING: %s" % fname)
                            break
                if delete:
                    files.append(stat.filename)
                
            for f in files:
                delcount += 1
                self._delete_file(f)

            if count != page_size or count == 0:
                break
            stats = cloudstorage.listbucket(bucketFolder, max_keys=page_size,marker=stat.filename)
            files = []
        
        log.info("Cloudstorage: deleted %s files in %s seconds" % (delcount, (datetime.datetime.now() - startdel)))
        return delcount
Beispiel #4
0
  def list_bucket(self, bucket):
    """Create several files and paginate through them.

    Production apps should set page_size to a practical value.

    Args:
      bucket: bucket.
    """
    self.response.write('Creating more files for listbucket...\n')
    self.create_file(bucket + '/foo1')
    self.create_file(bucket + '/foo2')
    self.response.write('\nListbucket result:\n')

    page_size = 1
    stats = gcs.listbucket(bucket, max_keys=page_size)
    while True:
      count = 0
      for stat in stats:
        count += 1
        self.response.write(repr(stat))
        self.response.write('\n')

      if count != page_size or count == 0:
        break
      last_filename = stat.filename[len(bucket)+1:]
      stats = gcs.listbucket(bucket, max_keys=page_size, marker=last_filename)
  def testListBucket(self):
    bars = [BUCKET + '/test/bar' + str(i) for i in range(3)]
    foos = [BUCKET + '/test/foo' + str(i) for i in range(3)]
    filenames = bars + foos
    for filename in filenames:
      self.CreateFile(filename)

    bucket = cloudstorage.listbucket(BUCKET + '/test/')
    self.assertEqual(filenames, [stat.filename for stat in bucket])

    bucket = cloudstorage.listbucket(BUCKET + '/test/', max_keys=1)
    stats = list(bucket)
    self.assertEqual(1, len(stats))
    stat = stats[0]
    content = ''.join(DEFAULT_CONTENT)
    self.assertEqual(filenames[0], stat.filename)
    self.assertEqual(len(content), stat.st_size)
    self.assertEqual(hashlib.md5(content).hexdigest(), stat.etag)

    bucket = cloudstorage.listbucket(BUCKET + '/test/',
                                     marker=BUCKET + '/test/foo0',
                                     max_keys=1)
    stats = [stat for stat in bucket]
    self.assertEqual(1, len(stats))
    stat = stats[0]
    self.assertEqual(foos[1], stat.filename)
  def testListBucketWithDelimiter(self):
    filenames = ['/bar',
                 '/foo0', '/foo1',
                 '/foo/a', '/foo/b/bb', '/foo/b/bbb', '/foo/c/c',
                 '/foo1/a',
                 '/foo2/a', '/foo2/b',
                 '/foo3/a']
    def FullyQualify(n):
      return BUCKET + n
    fullnames = [FullyQualify(n) for n in filenames]
    for n in fullnames:
      self.CreateFile(n)

    bucket = cloudstorage.listbucket(BUCKET + '/foo',
                                     delimiter='/',
                                     max_keys=5)
    expected = [FullyQualify(n) for n in ['/foo/', '/foo0', '/foo1',
                                          '/foo1/', '/foo2/']]
    self.assertEqual(expected, [stat.filename for stat in bucket])

    bucket = cloudstorage.listbucket(BUCKET + '/foo/',
                                     delimiter='/',
                                     max_keys=2)
    expected = [FullyQualify(n) for n in ['/foo/a', '/foo/b/']]
    self.assertEqual(expected, [stat.filename for stat in bucket])
    def testListBucketWithDelimiter(self):
        filenames = [
            '/bar', '/foo0', '/foo1', '/foo/a', '/foo/b/bb', '/foo/b/bbb',
            '/foo/c/c', '/foo1/a', '/foo2/a', '/foo2/b', '/foo3/a'
        ]

        def FullyQualify(n):
            return BUCKET + n

        fullnames = [FullyQualify(n) for n in filenames]
        for n in fullnames:
            self.CreateFile(n)

        bucket = cloudstorage.listbucket(BUCKET + '/foo',
                                         delimiter='/',
                                         max_keys=5)
        expected = [
            FullyQualify(n)
            for n in ['/foo/', '/foo0', '/foo1', '/foo1/', '/foo2/']
        ]
        self.assertEqual(expected, [stat.filename for stat in bucket])

        bucket = cloudstorage.listbucket(BUCKET + '/foo/',
                                         delimiter='/',
                                         max_keys=2)
        expected = [FullyQualify(n) for n in ['/foo/a', '/foo/b/']]
        self.assertEqual(expected, [stat.filename for stat in bucket])
Beispiel #8
0
    def list_bucket(self, bucket):
        """Create several files and paginate through them.

    Production apps should set page_size to a practical value.

    Args:
      bucket: bucket.
    """
        self.response.write('Listbucket result:\n')

        page_size = 1
        stats = gcs.listbucket(bucket + '/foo', max_keys=page_size)
        while True:
            count = 0
            for stat in stats:
                count += 1
                self.response.write(repr(stat))
                self.response.write('\n')

            if count != page_size or count == 0:
                break
            # pylint: disable=undefined-loop-variable
            stats = gcs.listbucket(bucket + '/foo',
                                   max_keys=page_size,
                                   marker=stat.filename)
Beispiel #9
0
    def list_bucket(self, bucket):
        """Create several files and paginate through them.

    Production apps should set page_size to a practical value.

    Args:
      bucket: bucket.
    """
        self.response.write('Creating more files for listbucket...\n')
        self.create_file(bucket + '/foo1')
        self.create_file(bucket + '/foo2')
        self.response.write('\nListbucket result:\n')

        page_size = 1
        stats = gcs.listbucket(bucket, max_keys=page_size)
        while True:
            count = 0
            for stat in stats:
                count += 1
                self.response.write(repr(stat))
                self.response.write('\n')

            if count != page_size or count == 0:
                break
            last_filename = stat.filename[len(bucket) + 1:]
            stats = gcs.listbucket(bucket,
                                   max_keys=page_size,
                                   marker=last_filename)
Beispiel #10
0
 def list_bucket_directory_mode(self, bucket):
     print 'Listbucket directory mode result'
     for stat in gcs.listbucket(bucket + '/b', delimiter='/'):
         print stat
         if stat.is_dir:
             for subdir_file in gcs.listbucket(stat.filename,
                                               delimiter='/'):
                 print subdir_file
Beispiel #11
0
 def list_bucket_directory_mode(self, bucket):
   self.response.write('Listbucket directory mode result:\n')
   for stat in gcs.listbucket(bucket + '/b', delimiter='/'):
     self.response.write('%r' % stat)
     self.response.write('\n')
     if stat.is_dir:
       for subdir_file in gcs.listbucket(stat.filename, delimiter='/'):
         self.response.write('  %r' % subdir_file)
         self.response.write('\n')
Beispiel #12
0
 def list_bucket_directory_mode(self, bucket):
   self.response.write('Listbucket directory mode result:\n')
   for stat in gcs.listbucket(bucket + '/b', delimiter='/'):
     self.response.write('%r' % stat)
     self.response.write('\n')
     if stat.is_dir:
       for subdir_file in gcs.listbucket(stat.filename, delimiter='/'):
         self.response.write('  %r' % subdir_file)
         self.response.write('\n')
Beispiel #13
0
 def list_bucket_directory_mode(self, bucket):
     self.response.write('Listbucket directory mode result:\n')
     for stat in cloudstorage.listbucket(bucket + '/b', delimiter='/'):
         self.response.write(stat)
         self.response.write('\n')
         if stat.is_dir:
             for subdir_file in cloudstorage.listbucket(
                     stat.filename, delimiter='/'):
                 self.response.write('  {}'.format(subdir_file))
                 self.response.write('\n')
 def list_bucket_directory_mode(self, bucket):
     self.response.write('Listbucket directory mode result:\n')
     for stat in cloudstorage.listbucket(bucket + '/b', delimiter='/'):
         self.response.write(stat)
         self.response.write('\n')
         if stat.is_dir:
             for subdir_file in cloudstorage.listbucket(stat.filename,
                                                        delimiter='/'):
                 self.response.write('  {}'.format(subdir_file))
                 self.response.write('\n')
Beispiel #15
0
    def testRemoveGarbage(self):
        """Make sure abandoned files get removed."""
        writer_spec = {
            self.WRITER_CLS.BUCKET_NAME_PARAM: "unused",
            self.WRITER_CLS.TMP_BUCKET_NAME_PARAM: "test"
        }
        mapreduce_state = self.create_mapreduce_state(
            output_params=writer_spec)
        shard_state = self.create_shard_state(1)
        ctx = context.Context(mapreduce_state.mapreduce_spec, shard_state)
        context.Context._set(ctx)

        writer = self.WRITER_CLS.create(mapreduce_state.mapreduce_spec,
                                        shard_state.shard_number, 0)
        writer.begin_slice(None)

        # our shard
        our_file = "/test/gae_mr_tmp/DummyMapReduceJobId-tmp-1-very-random"
        f = cloudstorage.open(our_file, "w")
        f.write("foo?")
        f.close()

        # not our shard
        their_file = "/test/gae_mr_tmp/DummyMapReduceJobId-tmp-3-very-random"
        f = cloudstorage.open(their_file, "w")
        f.write("bar?")
        f.close()

        # unrelated file
        real_file = "/test/this_things_should_survive"
        f = cloudstorage.open(real_file, "w")
        f.write("yes, foobar!")
        f.close()

        # Make sure bogus file still exists
        names = [l.filename for l in cloudstorage.listbucket("/test")]
        self.assertTrue(our_file in names)
        self.assertTrue(their_file in names)
        self.assertTrue(real_file in names)

        # slice end should clean up the garbage
        writer = self._serialize_and_deserialize(writer)

        names = [l.filename for l in cloudstorage.listbucket("/test")]
        self.assertFalse(our_file in names)
        self.assertTrue(their_file in names)
        self.assertTrue(real_file in names)

        # finalize shouldn't change anything
        writer.finalize(ctx, shard_state)
        self.assertFalse(our_file in names)
        self.assertTrue(their_file in names)
        self.assertTrue(real_file in names)
Beispiel #16
0
 def list_bucket(self, bucket):
     page_size = 1
     stats = gcs.listbucket(bucket, max_keys=page_size)
     while True:
         count = 0
         for stat in stats:
             count += 1
             self.response.write(repr(stat))
             self.response.write('\n')
         if count != page_size or count == 0:
             break
             stats = gcs.listbucket(bucket,
                                    max_keys=page_size,
                                    marker=stat.filename)
  def testRemoveGarbage(self):
    """Make sure abandoned files get removed."""
    writer_spec = {self.WRITER_CLS.BUCKET_NAME_PARAM: "unused",
                   self.WRITER_CLS.TMP_BUCKET_NAME_PARAM: "test"}
    mapreduce_state = self.create_mapreduce_state(output_params=writer_spec)
    shard_state = self.create_shard_state(1)
    ctx = context.Context(mapreduce_state.mapreduce_spec, shard_state)
    context.Context._set(ctx)

    writer = self.WRITER_CLS.create(mapreduce_state.mapreduce_spec,
                                    shard_state.shard_number, 0)
    writer.begin_slice(None)

    # our shard
    our_file = "/test/gae_mr_tmp/DummyMapReduceJobId-tmp-1-very-random"
    f = cloudstorage.open(our_file, "w")
    f.write("foo?")
    f.close()

    # not our shard
    their_file = "/test/gae_mr_tmp/DummyMapReduceJobId-tmp-3-very-random"
    f = cloudstorage.open(their_file, "w")
    f.write("bar?")
    f.close()

    # unrelated file
    real_file = "/test/this_things_should_survive"
    f = cloudstorage.open(real_file, "w")
    f.write("yes, foobar!")
    f.close()

    # Make sure bogus file still exists
    names = [l.filename for l in cloudstorage.listbucket("/test")]
    self.assertTrue(our_file in names)
    self.assertTrue(their_file in names)
    self.assertTrue(real_file in names)

    # slice end should clean up the garbage
    writer = self._serialize_and_deserialize(writer)

    names = [l.filename for l in cloudstorage.listbucket("/test")]
    self.assertFalse(our_file in names)
    self.assertTrue(their_file in names)
    self.assertTrue(real_file in names)

    # finalize shouldn't change anything
    writer.finalize(ctx, shard_state)
    self.assertFalse(our_file in names)
    self.assertTrue(their_file in names)
    self.assertTrue(real_file in names)
Beispiel #18
0
def get_filenames():
    page_size = 1
    stats = gcs.listbucket(bucket, max_keys=page_size)
    names = []
    while True:
        count = 0
        for stat in stats:
            count += 1
            names.append(repr(stat.filename)[1:-1])
        if count != page_size or count == 0:
            break
        stats = gcs.listbucket(bucket,
                               max_keys=page_size,
                               marker=stat.filename)
    return names
Beispiel #19
0
    def get(self):
        """Regelmäßig von Cron aufzurufen."""
        if not gaetkconfig.BIGQUERY_PROJECT:
            return self.return_text('BIGQUERY_PROJECT not provided, exiting')

        if not gaetkconfig.BACKUP_BUCKET:
            bucket = get_default_gcs_bucket_name()
        else:
            bucket = gaetkconfig.BACKUP_BUCKET

        bucketpath = '/'.join((bucket, get_application_id()))
        bucketpath = '/{}/'.format(bucketpath.strip('/'))
        logger.info('searching backups in %r', bucketpath)

        objs = cloudstorage.listbucket(bucketpath, delimiter=b'/')
        subdirs = sorted((obj.filename for obj in objs if obj.is_dir),
                         reverse=True)
        # Find Path of newest available backup
        # typical path:
        # '/appengine-backups-eu-nearline/hudoraexpress/2017-05-02/ag9...EM.ArtikelBild.backup_info'
        dirs = {}
        for subdir in subdirs:
            try:
                datum = convert_to_date(subdir.rstrip('/').split('/')[-1])
            except ValueError:
                continue
            else:
                dirs[datum] = subdir

        if not dirs:
            raise RuntimeError('No Datastore Backup found in %r' %
                               (bucketpath))

        datum = max(dirs)
        if datum < datetime.date.today() - datetime.timedelta(days=7):
            raise exc.RuntimeError(
                'Latest Datastore Backup in {!r} is way too old!'.format(
                    bucketpath))

        countdown = 1
        subdir = dirs[datum]
        logger.info('Uploading Backup %s from directory %s', datum, subdir)
        regexp = re.compile(subdir + r'([\w-]+)\.(\w+)\.backup_info')
        for obj in cloudstorage.listbucket(subdir):
            if regexp.match(obj.filename):
                defer(upload_backup_file, obj.filename, _countdown=countdown)
                countdown += 2
        self.response.write('ok, countdown=%d\n' % (countdown))
Beispiel #20
0
def get_latest_data_location(device):
    # type: (Device) -> str
    """
    Gets the url path to the latest data entry from a given device
    :param device: device datastore object
    :return: string
    """
    serial = device.serial_num
    base_path = BUCKET_PREFIX.format(serial)

    filenames = []

    # This is probably wrong too. Damnit.
    for statinfo in gcs.listbucket(base_path, retry_params=READ_RETRY):
        filenames.append(statinfo.filename)

    split_filenames = [os.path.splitext(f) for f in filenames]

    with_index = [(int(os.path.split(f[0])[1]), f) for f in split_filenames]
    sorted_filenames = sorted(with_index, key=lambda e: e[0], reverse=True)

    try:
        index, split_filename = sorted_filenames[0]
    except IndexError:
        raise RuntimeError("There are no files in the storage.")

    filename = split_filename[0] + split_filename[1]

    return filename
 def get(self):
     try:
         ch=0
         cacheFlag=self.request.get("cacheFlag")
         print("cache flag recieved",cacheFlag);
         if str(cacheFlag)=="false":
             stats = gcs.listbucket(bucket_name)
             print("accessing gcs bucket");
             count = 0
             for stat in stats:
                 self.response.write(stat.filename+";")
                 count+=1 
             print("total no of files in GCS: "+count)
             if count == 0:
                 self.response.write("No data found :404")
         else:
             print("in else part");
             filecontent = memcache.get('Master')
             print(filecontent)
             if len(memcache.get('Master')) > 0:
                 print("fetching memcache list")
                 for files in filecontent:
                     filedata = memcache.get(files)
                     if filedata != None:
                         self.response.write(files+";")
                     else:
                         count = -1
         count = None
         stats = None
     except Exception as e:
         print type(e)
         print "server exception"
Beispiel #22
0
 def FileExists(self, bucketname, filename):
     stats = cloudstorage.listbucket(bucketname)
     exist = False
     for stat in stats:
         if bucketname+'/'+filename in stat.filename:
             exist = True
     return (exist)
Beispiel #23
0
 def getFiles():
     stats = gcs.listbucket(FileManager.getBucketName() + '/')
     files = []
     for stat in stats:
         items = stat.filename.split('/')
         files.append(items[-1])
     return files
Beispiel #24
0
    def get(self):
        bucket_name = 'virtualproctor'

        self.response.headers['Content-Type'] = 'application/json'

        bucket = '/' + bucket_name

        try:
            stats = gcs.listbucket(bucket)
            ten_minutes = datetime.timedelta(minutes=10)
            ten_minutes_ago = datetime.datetime.now() - ten_minutes
            names = []
            for x in stats:
                create_time = datetime.datetime.fromtimestamp(x.st_ctime)
                if ten_minutes_ago < create_time:
                    name = x.filename.replace('/virtualproctor/',
                                              '').split('.')
                    image = {
                        'classroom': name[0],
                        'student': name[1],
                        'created': x.st_ctime
                    }
                    names.append(image)
            self.response.write(json.dumps(names))

        except Exception as e:
            logging.exception(e)
            self.response.write('{"error": "oops"}')
Beispiel #25
0
 def get_files(self):
     """Get files from GCS"""
     files = []
     stats = gcs.listbucket('/%s/backups' % BACKUP_BUCKET, max_keys=100)
     for stat in stats:
         files.append(stat)
     return files
Beispiel #26
0
 def get_files(self):
     """Get files from GCS"""
     files = []
     stats = gcs.listbucket('/%s/backups' % BACKUP_BUCKET, max_keys=100)
     for stat in stats:
         files.append(stat)
     return files
 def sync_table_only_ids(self):
     del self.table_only_ids[:]  # table = [] would break the references!
     stats = gcs.listbucket('%s/%s' % (bucket, self._path))
     for stat in stats:
         id = stat.filename
         self.table_only_ids.append({'id': id})
     return self.table_only_ids
Beispiel #28
0
def hello():
    from google.appengine.api import app_identity
    import cloudstorage as gcs

    #GEt default bucket name
    default_bucket_name = app_identity.get_default_gcs_bucket_name()

    out = "Default bucket : " + default_bucket_name

    ## Write to cloud
    filename = '/' + default_bucket_name + '/new.txt'
    gcs_file = gcs.open(filename, 'w', content_type='text/plain')
    gcs_file.write('abcde\n')
    gcs_file.write('Hello!' + '\n')
    gcs_file.close()

    ## Read from cloud
    gcs_file = gcs.open(filename)
    contents = gcs_file.read()
    gcs_file.close()

    out += "<p>Contents :</p>"
    out += "<p>" + contents + "</p>"

    #files CANNOT be appended as the objects are immutable. to append you have to read, modify contents and overwrite

    #List items
    blist = gcs.listbucket('/' + default_bucket_name)
    print(list(blist))

    return out
Beispiel #29
0
    def _next_file(self):
        """Find next filename.

    self._filenames may need to be expanded via listbucket.

    Returns:
      None if no more file is left. Filename otherwise.
    """
        while True:
            if self._bucket_iter:
                try:
                    return self._bucket_iter.next().filename
                except StopIteration:
                    self._bucket_iter = None
                    self._bucket = None
            if self._index >= len(self._filenames):
                return
            filename = self._filenames[self._index]
            self._index += 1
            if self._delimiter is None or not filename.endswith(
                    self._delimiter):
                return filename
            self._bucket = cloudstorage.listbucket(filename,
                                                   delimiter=self._delimiter)
            self._bucket_iter = iter(self._bucket)
Beispiel #30
0
    def get(self):
        bucket_name = os.environ.get(
            'BUCKET_NAME', app_identity.get_default_gcs_bucket_name())

        self.response.headers['Content-Type'] = 'text/plain'
        self.response.write('Demo GCS Application running from Version: ' +
                            os.environ['CURRENT_VERSION_ID'] + '\n')
        self.response.write('Using bucket name: ' + bucket_name + '\n\n')

        self.response.write('Listbucket results:\n')

        bucket = '/' + bucket_name
        page_size = 100
        stats = gcs.listbucket(bucket + '/', max_keys=page_size)
        self.response.write('Stats for #%s entries\n' % stats)
        while True:
            count = 0
            for stat in stats:
                count += 1
                self.response.write('Item #%d' % count)
                self.response.write('\n')
                self.response.write(repr(stat))
                self.response.write('\n')
                self.response.write('Count = %d\n' % count)

            if count != page_size or count == 0:
                self.response.write('Breaking = %d\n' % count)
                break
Beispiel #31
0
def list_bucket(collective):
    """Create several files and paginate through them.

    Production apps should set page_size to a practical value.

    Args:
    bucket: bucket.
    """
    bucket = '/ahtme-music/' + collective

    page_size = 10
    stats = gcs.listbucket(bucket, max_keys=page_size)

    files = []
    while True:
        count = 0
        for stat in stats:
            count += 1
            stat.filename = stat.filename.split('/')[-1]
            files.append(stat)
            # print stat
            # self.response.write(repr(stat))
            # self.response.write('\n')

        if count != page_size or count == 0:
            break
        # stats = gcs.listbucket(bucket, max_keys=page_size,
        #                        marker=stat.filename)
    return files
Beispiel #32
0
    def listdir(self, dir_name):
        """Lists all files in a directory.

        Args:
            dir_name: str. The directory whose files should be listed. This
                should not start with '/' or end with '/'.

        Returns:
            list(str). A lexicographically-sorted list of filenames.
        """
        if dir_name.endswith('/') or dir_name.startswith('/'):
            raise IOError(
                'The dir_name should not start with / or end with / : %s' %
                (dir_name))

        # The trailing slash is necessary to prevent non-identical directory
        # names with the same prefix from matching, e.g. /abcd/123.png should
        # not match a query for files under /abc/.
        prefix = '%s' % utils.vfs_construct_path('/', self._assets_path,
                                                 dir_name)
        if not prefix.endswith('/'):
            prefix += '/'
        # The prefix now ends and starts with '/'.
        bucket_name = app_identity_services.get_gcs_resource_bucket_name()
        # The path entered should be of the form, /bucket_name/prefix.
        path = '/%s%s' % (bucket_name, prefix)
        stats = cloudstorage.listbucket(path)
        files_in_dir = []
        for stat in stats:
            files_in_dir.append(stat.filename)
        return files_in_dir
 def get(self):
     try:
         ch=0
         cacheFlag=self.request.get("cacheFlag")
         print("cache flag recieved",cacheFlag);
         # cacheFlag = False; Fetch list from GCS
         if str(cacheFlag)=="false":
             stats = gcs.listbucket(bucket_name)
             print("accessing gcs bucket");
             count = 0
             for stat in stats:
                 self.response.write(stat.filename+";")
                 count+=1 
             print("total no of files in GCS: "+count)
             if count == 0:
                 self.response.write("No data found :404")
         # cacheFlag = False; Fetch list from Memcache
         else:
             print("in else part");
             filecontent = memcache.get('Master')
             print(filecontent)
             if len(memcache.get('Master')) > 0:
                 print("fetching memcache list")
                 for files in filecontent:
                     filedata = memcache.get(files)
                     if filedata != None:
                         self.response.write(files+";")
                     else:
                         count = -1
         count = None
         stats = None
     except Exception as e:
         print type(e)
         print "server exception"
Beispiel #34
0
 def GetAllPaths(self, prefix, max_keys=None, marker=None, delimiter=None):
     return (f.filename[len(self.bucket) + 1:]
             for f in cloudstorage.listbucket(self.bucket,
                                              prefix=prefix,
                                              max_keys=max_keys,
                                              marker=marker,
                                              delimiter=delimiter))
Beispiel #35
0
 def get(self):
     u = self.user_info
     username = u['name']
     # storage params
     bucketlist = gcs.listbucket(Rbucket)
     params = {'username': username}
     rbac(self, 'gcs', params)
  def get(self):
 
    #login
    usr = users.get_current_user()
    if not usr:
      url = users.create_login_url(self.request.uri)
      url_linktext = 'Login'
      self.redirect(users.create_login_url(self.request.uri))
    else:
      url = users.create_logout_url(self.request.uri)
      url_linktext = 'Logout'
      
      #testing users deb model
      userlist = User.query().fetch(5)
      
      #get files from bucket for the user
      bucket_name = "/"+os.environ.get('BUCKET_NAME',app_identity.get_default_gcs_bucket_name())+"/"+str(usr)
      l_files=gcs.listbucket(bucket_name)

      #get shared files of the user
      sh_files = SharedFile.query(SharedFile.recipients == usr.email())
      result = sh_files.fetch(1000)

      template_values = {
        'url': url,
        'url_linktext': url_linktext,
        'user_name': usr,
        'files': l_files,
        'users': userlist,
        'shared_files': sh_files,
      }

      template = JINJA_ENVIRONMENT.get_template('index.html')
      self.response.write(template.render(template_values))
Beispiel #37
0
def search_file_in_bucket(bucket_name, folder_name, file_name):
    """Search through all files with in folder_name."""
    buck = cloudstorage.listbucket("/"+bucket_name+"/"+folder_name, marker='/my_bucket/'+folder_name+'/')
    for blob in buck:
        if blob.filename == file_name:
            return True
    return False
Beispiel #38
0
 def exists_attachments_for_entity_key(entity_key):
     bucket_name = os.environ.get(
         'BUCKET_NAME', app_identity.get_default_gcs_bucket_name())
     files = gcs.listbucket("/" + bucket_name + "/" + entity_key)
     for file in files:
         return True
     return False
Beispiel #39
0
 def listdir(self, name):
     """
     TODO collect directories
     """
     return [], [
         obj.filename for obj in cloudstorage.listbucket(self.path(name))
     ]
    def list_gcs_file_names(cls, bucket=None, folder='/'):
        """ Example usage :  for gcs_filename, filename in BlobFiles.list_gcs_file_names(folder='/upload') """

        for obj in gcs.listbucket('/%s%s' % (bucket or app_identity.get_default_gcs_bucket_name(), folder)):
            pbf = cls._query(cls.gcs_filename == obj.filename).get(projection=cls.filename)
            # yield result: the gcs_filename from GCS and the corresponding filename from BlobFiles
            yield obj.filename, (pbf.filename if pbf else '')
    def get(self, backup_date):
        # Make sure the requested backup exists
        backup_bucket = self.get_backup_bucket()
        backup_dir = "/{}/{}/".format(backup_bucket, backup_date)

        backup_files = cloudstorage.listbucket(backup_dir)
        bucket_prefix = "/{}/".format(backup_bucket)
        count = 0
        for bfile in backup_files:
            if bfile.is_dir:
                continue

            count += 1
            fname = bfile.filename
            path = fname[len(bucket_prefix):]
            taskqueue.add(
                url='/backend-tasks/backup/archive/file',
                params={
                    'bucket': backup_bucket,
                    'object': path,
                },
                queue_name='backups',
                method='POST')

        self.response.out.write("Enqueued updates for {} files".format(count))
    def post(self):
        # get args
        self.start_cursor = self.request.get('cursor')
        self.filtering_event_key = self.request.get('event')
        self.filename = self.request.get('filename')
        self.csv_header = self.request.get('csv_header')
        self.worker_url = self.request.get('worker_url')

        self.event = Event.get(self.filtering_event_key) if self.filtering_event_key else None

        # get (base) query, skip query to cursor, filter for sites
        query = self.get_base_query()
        if self.start_cursor:
            query.with_cursor(self.start_cursor)
        fetched_sites = query.fetch(limit=self.sites_per_task)
        sites = self.filter_sites(fetched_sites)

        # write part of csv file to GCS
        csv_part_gcs_fd = cloudstorage.open(
            BUCKET_NAME + '/' + self.filename + '.part.' + self.start_cursor,
            'w',
            content_type='text/csv'
        )
        self._write_csv_rows(csv_part_gcs_fd, sites)
        csv_part_gcs_fd.close()

        # decide what to do next
        self.end_cursor = query.cursor()
        if self.end_cursor and self.start_cursor != self.end_cursor:
            # chain to next task
            taskqueue.add(
                url=self.worker_url,
                params=self.get_continuation_param_dict(),
                retry_options=taskqueue.TaskRetryOptions(task_retry_limit=3),
            )
        else:
            # finish file: combine parts and deduplicate lines
            logging.info(u"Deduplicating to create %s ..." % self.filename)

            sio = StringIO()
            path_prefix = BUCKET_NAME + '/' + self.filename + '.part'
            for gcs_file_stat in cloudstorage.listbucket(path_prefix):
                csv_part_gcs_fd = cloudstorage.open(gcs_file_stat.filename)
                for line in csv_part_gcs_fd:
                    sio.write(line)
                csv_part_gcs_fd.close()
            sio.seek(0)
            deduplicated_lines = set(line for line in sio)

            # write csv header and deduplicated lines to new file
            csv_complete_gcs_fd = cloudstorage.open(
                BUCKET_NAME + '/' + self.filename,
                'w',
                content_type='text/csv'
            )
            csv_complete_gcs_fd.write(self.csv_header.encode('utf-8'))
            for line in deduplicated_lines:
                csv_complete_gcs_fd.write(line)
            csv_complete_gcs_fd.close()
Beispiel #43
0
def GetCachedResults2(job):
  filename = _GetCloudStorageName(job.job_id)
  results = cloudstorage.listbucket(filename)

  for _ in results:
    return 'https://storage.cloud.google.com' + filename

  return None
        def list_bucket(self,bucket):
            self.response.write('Listbucket result:\n')

            page_size=1
            stats=gcs.listbucket(bucket+'/foo',max_keys=page_size)
            while True:
                count = 0
                for stat in stats:
                    count+=1
                    self.response.write(repr(stat))
                    self.response.write('\n')
                    
                if count != page_size or count==0:
                    break
                stats=gcs.listbucket(bucket+'/foo',
                                     max_keys=page_size,
                                     market=stat.filename)
  def testListBucketPickle(self):
    bars = [BUCKET + '/test/bar' + str(i) for i in range(3)]
    foos = [BUCKET + '/test/foo' + str(i) for i in range(3)]
    filenames = bars + foos
    for filename in filenames:
      self.CreateFile(filename)

    bucket = cloudstorage.listbucket(BUCKET + '/test/')
    self.AssertListBucketEqual(filenames, bucket)

    bucket = cloudstorage.listbucket(BUCKET + '/test/', max_keys=2)
    self.AssertListBucketEqual(bars[:2], bucket)

    bucket = cloudstorage.listbucket(BUCKET + '/test/',
                                     marker=BUCKET + '/test/bar2',
                                     max_keys=2)
    self.AssertListBucketEqual(foos[:2], bucket)
Beispiel #46
0
    def sizes(self):
        """
        Returns a list of all available sizes of this photo.
        """

        inodes = cloudstorage.listbucket(path_prefix="/{}/{}".format(
                        self.GCS_BUCKET, self.path))
        return [inode.filename.split("-")[-1] for inode in inodes]
def delete_file():
    # GCS helpfully decodes UTF-8 for you, is a bit weird because it won't
    # accept unicode when creating an object.
    for stat in cloudstorage.listbucket(folder):
        target = stat.filename.encode('utf-8')
        cloudstorage.delete(target)

    return bottle.redirect('/list')
  def testMapReduce(self):
    # Prepare test data
    bucket_name = "testbucket"
    job_name = "test_job"
    entity_count = 200

    for i in range(entity_count):
      TestEntity(data=str(i)).put()
      TestEntity(data=str(i)).put()

    # Run Mapreduce
    p = mapreduce_pipeline.MapreducePipeline(
        job_name,
        __name__ + ".test_mapreduce_map",
        __name__ + ".test_mapreduce_reduce",
        input_reader_spec=input_readers.__name__ + ".DatastoreInputReader",
        output_writer_spec=(
            output_writers.__name__ + "._GoogleCloudStorageRecordOutputWriter"),
        mapper_params={
            "entity_kind": __name__ + "." + TestEntity.__name__,
            "bucket_name": bucket_name
        },
        reducer_params={
            "output_writer": {
                "bucket_name": bucket_name
            },
        },
        shards=16)
    p.start()
    test_support.execute_until_empty(self.taskqueue)

    self.assertEquals(1, len(self.emails))
    self.assertTrue(self.emails[0][1].startswith(
        "Pipeline successful:"))

    # Verify reduce output.
    p = mapreduce_pipeline.MapreducePipeline.from_id(p.pipeline_id)
    self.assertEqual(model.MapreduceState.RESULT_SUCCESS,
                     p.outputs.result_status.value)
    output_data = []
    for output_file in p.outputs.default.value:
      with cloudstorage.open(output_file) as f:
        for record in records.RecordsReader(f):
          output_data.append(record)

    expected_data = [
        str((str(d), ["", ""])) for d in range(entity_count)]
    expected_data.sort()
    output_data.sort()
    self.assertEquals(expected_data, output_data)

    # Verify that mapreduce doesn't leave intermediate files behind.
    temp_file_stats = cloudstorage.listbucket("/" + bucket_name)
    for stat in temp_file_stats:
      if stat.filename:
        self.assertFalse(
            stat.filename.startswith("/%s/%s-shuffle-" %
                                     (bucket_name, job_name)))
Beispiel #49
0
	def get(self):
		u = self.user_info
		username = u['name']
		# storage params
		bucketlist = gcs.listbucket(Rbucket)
		params = {
		'username': username
		}
		rbac(self, 'gcs', params)
Beispiel #50
0
 def listdir(filename, recursive=True):
     bucket, prefix = filename[1:].split('/', 1)
     bucket = '/' + bucket
     names = set()
     for item in cloudstorage.listbucket(bucket, prefix=prefix):
         name = item.filename[len(bucket) + len(prefix) + 1:]
         if name and (recursive or '/' not in name):
             names.add(name)
     return list(names)
    def rmtree(self, path):
        if path != "":
            path_prefix = self.location + "/" + path + "/"
        else:
            path_prefix = self.location + "/"
        bucketContents = gcs.listbucket(path_prefix=path_prefix)

        for entry in bucketContents:
            gcs.delete(entry.filename)
Beispiel #52
0
 def __deleteBlob(cls,key):
     """ Private method to delete a blobstore file from key
         param @key is String
     """
     r = gcs.listbucket(cls.__bucket_name)
     for a in r:    
         if a.filename[len(cls.__bucket_name):]==key:
             gcs.delete(a.filename)
             break
  def _runTest(self, num_shards):
    entity_count = 1000
    bucket_name = "bucket"
    tmp_bucket_name = "tmp_bucket"
    job_name = "test_map"

    for _ in range(entity_count):
      TestEntity().put()

    mapreduce_id = control.start_map(
        job_name,
        __name__ + ".test_handler_yield_key_str",
        DATASTORE_READER_NAME,
        {
            "entity_kind": __name__ + "." + TestEntity.__name__,
            "output_writer": {
                "bucket_name": bucket_name,
                "tmp_bucket_name": tmp_bucket_name,
            },
        },
        shard_count=num_shards,
        output_writer_spec=self.WRITER_NAME)

    test_support.execute_until_empty(self.taskqueue)
    mapreduce_state = model.MapreduceState.get_by_job_id(mapreduce_id)
    filenames = self.WRITER_CLS.get_filenames(mapreduce_state)

    self.assertEqual(num_shards, len(set(filenames)))
    total_entries = 0
    for shard in range(num_shards):
      self.assertTrue(filenames[shard].startswith("/%s/%s" % (bucket_name,
                                                              job_name)))
      data = cloudstorage.open(filenames[shard]).read()
      # strip() is used to remove the last newline of each file so that split()
      # does not retrun extraneous empty entries.
      total_entries += len(data.strip().split("\n"))
    self.assertEqual(entity_count, total_entries)

    # no files left in tmpbucket
    self.assertFalse(list(cloudstorage.listbucket("/%s" % tmp_bucket_name)))
    # and only expected files in regular bucket
    files_in_bucket = [
        f.filename for f in cloudstorage.listbucket("/%s" % bucket_name)]
    self.assertEquals(filenames, files_in_bucket)
  def testListBucketCompatibility(self):
    """Test listbucket's old interface still works."""
    bars = [BUCKET + '/test/bar' + str(i) for i in range(3)]
    foos = [BUCKET + '/test/foo' + str(i) for i in range(3)]
    filenames = bars + foos
    for filename in filenames:
      self.CreateFile(filename)

    bucket = cloudstorage.listbucket(BUCKET, prefix='test/', marker='test/foo')
    self.assertEqual(foos, [stat.filename for stat in bucket])
def _get_files():
    files = []
    for f in cloudstorage.listbucket(OLD_BUCKET):
        split = f.filename.split('/protocol-logs-')[1].split('.')
        date_str = split[0]
        date = datetime.strptime(date_str, '%Y%m%d')
        if split[-1] == 'processed' or date < datetime(2016, 3, 17):
            continue
        files.append(f.filename)
    return files
Beispiel #56
0
    def list_bucket(self, bucket):
        """Create several files and paginate through them."""

        self.response.write('Listbucket result:\n')

        # Production apps should set page_size to a practical value.
        page_size = 1
        stats = cloudstorage.listbucket(bucket + '/foo', max_keys=page_size)
        while True:
            count = 0
            for stat in stats:
                count += 1
                self.response.write(repr(stat))
                self.response.write('\n')

            if count != page_size or count == 0:
                break
            stats = cloudstorage.listbucket(
                bucket + '/foo', max_keys=page_size, marker=stat.filename)
def list_files():
    list_retry_params = gcs.RetryParams(initial_delay=.25, max_retries=0, urlfetch_timeout=.25)
    files = set()
    try:
        for file in gcs.listbucket("/{}/".format(BUCKET_NAME), retry_params=list_retry_params):
            files.add(file.filename)
    except gcs.TimeoutError:
        pass
    finally:
        return files
Beispiel #58
0
def listing():
	listbucket=[]
	bucketContent = gcs.listbucket(bucket,
				 				   marker=None,
				 				   max_keys=None,
				  				   delimiter=None,
				  				   retry_params=None)
	for entry in bucketContent:
		listbucket.append(entry.filename)
	return listbucket