Exemple #1
0
def check(key):
	try:
		gcs.stat(key,
		 	 	 retry_params=write_retry_params)
		return True
	except Exception:
		return False
Exemple #2
0
def migrate(image):
	"""Copies blobs stored in Blobstore over to a GCS bucket.

	Args:
		image: main.Image instance representing a single entity in the Datastore.

	This does not delete migrated (old) blobs so it is safe to run the job
	multiple times.
	"""
	if image.blob_key and not image.gs_key:
		blob_info = blobstore.get(image.blob_key)
		if not blob_info:
			image.blob_key = None
		else:
			gs_key = '/'.join(['', BUCKET, blob_info.filename])
			try:
				gcs.stat(gs_key)
			except gcs.NotFoundError:
				reader = blobstore.BlobReader(blob_info)
				with gcs.open(gs_key, 'w', content_type=blob_info.content_type) as f:
					while True:
						data = reader.read(1024**2)
						if not data:
							break
						f.write(data)
			blob_gs_key = blobstore.create_gs_key('/gs'+gs_key)
			image.url = images_api.get_serving_url(blob_gs_key, secure_url=True)
			image.gs_key = gs_key
		yield op.db.Put(image)
		if image.gs_key:
			yield op.counters.Increment('Migrated')
Exemple #3
0
 def _is_gs_file_present(self, gs_path):
   """True if given GS file exists."""
   try:
     cloudstorage.stat(
         filename=gs_path,
         retry_params=self._retry_params)
   except cloudstorage.NotFoundError:
     return False
   return True
 def check_backup_exists(self, backup_bucket, file_name):
     try:
         cloudstorage.stat("/{}/{}".format(backup_bucket, file_name))
     except Exception, e:
         logging.info("Unable to find backup {} in GCS bucket {} - {}".format(
             file_name,
             backup_bucket,
             e.message))
         self.abort(400, "Unable to check backup exists")
def exists(bucket, filename):
    """
    Return True if the filename exists in cloudstorage. False otherwise.
    """
    destination = get_cloudstorage_filename(bucket, filename)
    try:
        cloudstorage.stat(destination)
        return True
    except cloudstorage.NotFoundError:
        return False
def _get_bucket_file_handle(date):
    bucket = '/rogerthat-protocol-logs'
    counter = 0
    while True:
        counter += 1
        file_name = "%s/protocol-logs-%04d%02d%02d.%d" % (bucket, date.year, date.month, date.day, counter)
        try:
            cloudstorage.stat(file_name)
        except cloudstorage.NotFoundError:
            break
    return cloudstorage.open(file_name, 'w', content_type='text/plain')
Exemple #7
0
 def serveProxyCached(self , url):
     #does the url not exist in the bucket?
     try:
         gcs.stat(url)
     except NotFoundError:
         #download the data and store it into the bucket.
         result = urlfetch.fetch(url)
         if result.status_code == 200:
             #todo: store it
             pass
         else:
             return webapp2.abort(510)
def _file_exists(destination):
  """Checks if a file exists.
    Tries to do a stat check on the file.
    If it succeeds returns True otherwise false

    Args:
      destination: Full path to the file (ie. /bucket/object) w/ leading slash
    Returns:
      True if the file is accessible otherwsie False
    """
  try:
    cloudstorage.stat(destination)
    return True
  except cloud_errors.NotFoundError:
    return False
Exemple #9
0
 def render_gcs(self, path, fallback=None):
     try:
         content_type = gcs.stat(path).content_type
         gcs_file = gcs.open(path)
         self.response.headers.add_header("Content-Type", content_type)
         self.response.write(gcs_file.readline())
         stat = gcs.stat(path)
         self.response.write(gcs_file.read())
         gcs_file.close()
     except gcs.NotFoundError:
         logging.warning(fallback)
         if fallback != None:
             self.render_gcs(fallback)
         else:
             self.error(404)
    def get(self):
        try:
            ch=0
            # getting Download File from Client
            dlFileName=self.request.get("action");
            cacheFlag=self.request.get("cacheFlag");
            # cacheFlag = False; Fetch list from GCS
            if str(cacheFlag)=="false":
                filecontent = memcache.get(str(dlFileName))
                if filecontent != None:
                    self.response.write("Memcache File Content:")
                    self.response.write("<br>"+filecontent+"")

                stat = gcs.stat(bucket_name+"/"+str(dlFileName))
                gcs_file = gcs.open(bucket_name+"/"+str(dlFileName),'r')
                filecontent=gcs_file.read()
                fileName=str(dlFileName)
                self.response.write(filecontent)
                gcs_file.close()
                stat = None
            # cacheFlag = False; Fetch list from Memcache
            else:
                filecontent = memcache.get(str(dlFileName))
                print("printing memcache filecontent to response");
                self.response.write(filecontent);

        except Exception:
            print "server exception"
            self.response.write("")
    def AuthenticatedGet(self, org, event):
        filename = all_event_timeless_filename(event)
        bucket_path = BUCKET_NAME + '/' + filename

        try:
            file_stat = cloudstorage.stat(bucket_path)
        except cloudstorage.NotFoundError:
            self.abort(404)

        # rewrite filename to include timestamp
        custom_timestamp = timestamp(
            datetime.datetime.utcfromtimestamp(file_stat.st_ctime))
        filename_to_serve = file_stat.filename.replace(
            '.csv',
            '-%s.csv' % custom_timestamp 
        )

        # serve the file as an attachment, forcing download
        gcs_fd = cloudstorage.open(bucket_path)
        if file_stat.content_type:
            self.response.headers['Content-Type'] = file_stat.content_type
        self.response.headers['Content-Disposition'] = (
            str('attachment; filename="%s"' % filename_to_serve)
        )

	self.response.write(gcs_fd.read())
    def AuthenticatedGet(self, org, event):
        filename = self.request.get('filename')
        if not filename:
            self.abort(404)

        # check we are allowed to get this file, by filename
        allowed_filename_substrs = [inc.filename_friendly_name for inc in org.incidents]
        allowed_to_access = (
            org.is_global_admin or
            any(s in filename for s in allowed_filename_substrs)
        )
        if not allowed_to_access:
            self.abort(403)

        # find file in GCS
        bucket_path = BUCKET_NAME + '/' + filename
        try:
            file_stat = cloudstorage.stat(bucket_path)
        except cloudstorage.NotFoundError:
            # say not ready yet (HTTP 202)
            self.response.set_status(202)
            return

        # send the file contents & force download
        gcs_fd = cloudstorage.open(bucket_path)
        if file_stat.content_type:
            self.response.headers['Content-Type'] = file_stat.content_type
        self.response.headers['Content-Disposition'] = (
            str('attachment; filename="%s"' % filename)
        )
        self.response.write(gcs_fd.read())
Exemple #13
0
	def get(self, *args, **kwargs):
		from urllib import unquote

		uri = self.request.path
		uri = unquote(uri.encode('ascii')).decode('utf-8')

		try:
			gcs_object = cloudstorage.open(
				uri,
				mode = "r",
			)
		except cloudstorage.NotFoundError:
			if uri == "/dres/directory":
				self.get_listing()
				return # awk :-/
			else:
				self.response.set_status(404)
		else:
			gcs_object_info = cloudstorage.stat(
				uri,
			)

			self.response.headers['Content-Type'] = gcs_object_info.content_type
			self.response.write(gcs_object.read())

			gcs_object.close()
Exemple #14
0
 def size(self, name):
     try:
         info = cloudstorage.stat(self._add_bucket(name))
     except cloudstorage.NotFoundError:
         return None
     else:
         return info.st_size
  def update_response_headers(self, request_headers, response_headers):
    try:
      # cloudstorage.stat doesn't use "/gs" prefix.
      gs_object_name = self.gs_object_name[3:]
      stat = cloudstorage.stat(gs_object_name)
    except cloudstorage.errors.NotFoundError as e:
      raise FileNotFoundError(str(e))

    headers = {}
    time_obj = datetime.fromtimestamp(stat.st_ctime).timetuple()
    headers['Last-Modified'] =  time.strftime('%a, %d %b %Y %H:%M:%S GMT', time_obj)
    headers['ETag'] = '"{}"'.format(stat.etag)
    if stat.content_type:
      headers['Content-Type'] = stat.content_type

    # The presence of "X-AppEngine-BlobKey" tells App Engine that we want to
    # serve the GCS blob directly to the user. This avoids reading the blob data
    # into the App Engine application. If the user has the file cached already,
    # omit the X-AppEngine-BlobKey header since we want to serve an empty response
    # with a 304 status code.
    request_etag = request_headers.get('If-None-Match')
    if request_etag != headers['ETag']:
      key = blobstore.create_gs_key(self.gs_object_name)
      headers['X-AppEngine-BlobKey'] = key

    response_headers.update(headers)
Exemple #16
0
    def post(self):
        """Copy uploaded files to provided destination

        Returns:
            string: path to uploaded path
        """
        if not self.get_file_infos():
            self.abort(400, "No file has been uploaded")

        fileinfo = self.get_file_infos()[0]

        try:
            import cloudstorage as gcs
        except ImportError:
            self.abort(
                    500,
                    'GoogleAppEngineCloudStorageClient module is required')

        stat = gcs.stat(fileinfo.gs_object_name[3:])
        destpath = "/".join(stat.filename.split("/")[:-1])

        gcs.copy2(fileinfo.gs_object_name[3:], destpath)
        gcs.delete(fileinfo.gs_object_name[3:])

        if spiner.env.is_local_env():
            url = '/_ah/gcs{}'.format(destpath)
        else:
            url = 'https://storage.googleapis.com{}'.format(destpath)

        self.response.write(url)
  def testRecoverSomethingWrittenInFailedInstance(self):
    mr_spec = self.mr_state.mapreduce_spec
    shard_state = self.create_shard_state(0)
    ctx = context.Context(mr_spec, shard_state)
    context.Context._set(ctx)

    writer = self.WRITER_CLS.create(mr_spec, 0, 0)
    writer.write("123")
    writer = self.WRITER_CLS.from_json(writer.to_json())
    writer.write("4")

    new_writer = writer._recover(mr_spec, 0, 0)
    # Old instance is finalized and valid offset saved.
    old_stat = cloudstorage.stat(writer._streaming_buffer.name)
    self.assertEqual(
        len("123"),
        int(old_stat.metadata[self.WRITER_CLS._VALID_LENGTH]))
    # New instance is created with an incremented seg index.
    self.assertEqual(writer._seg_index + 1, new_writer._seg_index)

    # Verify filenames.
    self.assertTrue(
        writer._streaming_buffer.name.endswith(str(writer._seg_index)))
    self.assertTrue(
        new_writer._streaming_buffer.name.endswith(str(new_writer._seg_index)))
Exemple #18
0
  def get(self, bucket, obj):
    gcs_path = '/%s/%s' % (bucket, urllib.quote(obj))
    logging.info('Opening %s using BLOCK_SIZE=%d', gcs_path, self.BLOCK_SIZE)
    try:
      gcs_file = gcs.open(gcs_path, retry_params=self.RETRY_PARAMS)
      gcs_stat = gcs.stat(gcs_path, retry_params=self.RETRY_PARAMS)
    except gcs.ForbiddenError:
      logging.exception("ForbiddenError accessing path %s", gcs_path)
      self.abort(httplib.FORBIDDEN)
    except gcs.AuthorizationError:
      logging.exception("AuthorizationError accessing path %s", gcs_path)
      self.abort(httplib.UNAUTHORIZED)

    self.response.headers["Content-Type"] = gcs_stat.content_type

    content_size = 0L
    block_num = 0
    while True:
      block = gcs_file.read(self.BLOCK_SIZE)
      if not block:
        break
      self.response.write(block)
      content_size += len(block)
      block_num += 1
    logging.info("Wrote content from [%s]: %s blocks, %s bytes",
                 gcs_path, block_num, content_size)
Exemple #19
0
def serve_file(request, blob_key_or_info, as_download=False, content_type=None, filename=None, offset=None, size=None):
    """
        Serves a file from the blobstore, reads most of the data from the blobinfo by default but you can override stuff
        by passing kwargs.

        You can also pass a Google Cloud Storage filename as `blob_key_or_info` to use Blobstore API to serve the file:
        https://cloud.google.com/appengine/docs/python/blobstore/#Python_Using_the_Blobstore_API_with_Google_Cloud_Storage
    """

    if isinstance(blob_key_or_info, BlobKey):
        info = BlobInfo.get(blob_key_or_info)
        blob_key = blob_key_or_info
    elif isinstance(blob_key_or_info, basestring):
        info = BlobInfo.get(BlobKey(blob_key_or_info))
        blob_key = BlobKey(blob_key_or_info)
    elif isinstance(blob_key_or_info, BlobInfo):
        info = blob_key_or_info
        blob_key = info.key()
    else:
        raise ValueError("Invalid type %s" % blob_key_or_info.__class__)

    if info == None:
        # Lack of blobstore_info means this is a Google Cloud Storage file
        if has_cloudstorage:
            info = cloudstorage.stat(blob_key_or_info)
            info.size = info.st_size
            blob_key = create_gs_key('/gs{0}'.format(blob_key_or_info))
        else:
            raise ImportError("To serve a Cloud Storage file you need to install cloudstorage")

    response = HttpResponse(content_type=content_type or info.content_type)
    response[BLOB_KEY_HEADER] = str(blob_key)
    response['Accept-Ranges'] = 'bytes'
    http_range = request.META.get('HTTP_RANGE')

    if offset or size:
        # Looks a little bonkers, but basically create the HTTP range string, we cast to int first to make sure
        # nothing funky gets into the headers
        http_range = "{}-{}".format(
            str(int(offset)) if offset else "",
            str(int(offset or 0) + size) if size else ""
        )

    if http_range is not None:
        response[BLOB_RANGE_HEADER] = http_range

    if as_download:
        response['Content-Disposition'] = smart_str(
            u'attachment; filename="%s"' % (filename or info.filename)
        )
    elif filename:
        raise ValueError("You can't specify a filename without also specifying as_download")

    if info.size is not None:
        response['Content-Length'] = info.size
    return response
    def testShuffleNoFile(self):
        bucket_name = "testbucket"
        p = shuffler.ShufflePipeline("testjob", {"bucket_name": bucket_name}, [])
        p.start()
        test_support.execute_until_empty(self.taskqueue)

        p = shuffler.ShufflePipeline.from_id(p.pipeline_id)
        for filename in p.outputs.default.value:
            self.assertEqual(0, cloudstorage.stat(filename).st_size)
        self.assertEquals(1, len(self.emails))
  def testCopy2(self):
    with cloudstorage.open(TESTFILE, 'w',
                           'text/foo', {'x-goog-meta-foo': 'foo'}) as f:
      f.write('abcde')

    dst = TESTFILE + 'copy'
    self.assertRaises(cloudstorage.NotFoundError, cloudstorage.stat, dst)
    cloudstorage_api.copy2(TESTFILE, dst)

    src_stat = cloudstorage.stat(TESTFILE)
    dst_stat = cloudstorage.stat(dst)
    self.assertEqual(src_stat.st_ctime, dst_stat.st_ctime)
    self.assertEqual(src_stat.st_size, dst_stat.st_size)
    self.assertEqual(src_stat.etag, dst_stat.etag)
    self.assertEqual(src_stat.content_type, dst_stat.content_type)
    self.assertEqual(src_stat.metadata, dst_stat.metadata)

    with cloudstorage.open(dst) as f:
      self.assertEqual('abcde', f.read())
Exemple #22
0
def validate_gsc_filename(filename):
  if not filename:
    return None
  else:
    filename = _prepare_gcs_filename(filename)
    try:
      stat = gcs.stat(filename)
    except:
      filename = None
  return filename
 def _statFile_(self, name):
     """
     Primitive, private method. Returns a Google Stat object, raises cloudstorage.NotFoundError
     """
     filename = self.location+"/"+name
     
     #if settings.GOOGLE_CLOUD_STORAGE_LOGGING:
     #    logging.info("GoogleCloudStorage-stat %s", filename)
         
     return cloudstorage.stat(filename)
Exemple #24
0
def _get_or_create_cached_blob_key_and_info(blob_key_or_info):
    cached_value = _get_from_cache(blob_key_or_info)
    if cached_value:
        blob_key, info = cached_value
    else:
        info = cloudstorage.stat(blob_key_or_info)
        info.size = info.st_size
        blob_key = create_gs_key('/gs{0}'.format(blob_key_or_info))
        _add_to_cache(blob_key_or_info, blob_key, info)
    return (blob_key, info)
 def testFilenameEscaping(self):
   name = BUCKET + '/a b/c d/*%$'
   with cloudstorage.open(name, 'w') as f:
     f.write('foo')
   with cloudstorage.open(name) as f:
     self.assertEqual('foo', f.read())
   self.assertEqual(name, cloudstorage.stat(name).filename)
   bucket = cloudstorage.listbucket(BUCKET)
   for stat in bucket:
     self.assertEqual(name, stat.filename)
   cloudstorage.delete(name)
  def get(self):
    """
    GET

    'filename' is required.
    """
    filename = self.request.GET['filename'].strip()
    stat = cloudstorage.stat(filename)
    gcs_file = cloudstorage.open(filename)
    self.response.headers['Content-type'] = stat.content_type
    self.response.out.write(gcs_file.read())
    gcs_file.close()
Exemple #27
0
def stat(	file_name,
			retry_params = None	):
	try:
		gcs_stat = gcs.stat( file_name )
		
	except gcs.NotFoundError:
		return None
	except gcs.AuthorizationError:
		return None
	else:
		file_stat = __gcs_file_stat_conversion__( gcs_stat )
		return file_stat
Exemple #28
0
def view_video():
    for _file in cloudstorage.listbucket(BUCKET_NAME):
        if "microscope_video" in _file.filename:
            _file = cloudstorage.stat(_file.filename)
            logging.info(_file.filename)
            logging.info(_file.content_type)
            cloud_file = cloudstorage.open(_file.filename,mode='r')
            response = make_response(cloud_file.read())
            cloud_file.close()
            response.mimetype = _file.content_type
            return response
    return "No file found"
  def testCopy2ReplacesMetadata(self):
    with cloudstorage.open(TESTFILE, 'w',
                           'text/foo', {'x-goog-meta-foo': 'foo'}) as f:
      f.write('abcde')
    src_stat = cloudstorage.stat(TESTFILE)

    cloudstorage_api.copy2(TESTFILE, TESTFILE,
                           metadata={'x-goog-meta-foo': 'bar',
                                     'content-type': 'text/bar'})

    dst_stat = cloudstorage.stat(TESTFILE)
    self.assertEqual(src_stat.st_size, dst_stat.st_size)
    self.assertEqual(src_stat.etag, dst_stat.etag)
    self.assertEqual(src_stat.st_ctime, dst_stat.st_ctime)
    self.assertEqual('text/foo', src_stat.content_type)
    self.assertEqual('text/bar', dst_stat.content_type)
    self.assertEqual('foo', src_stat.metadata['x-goog-meta-foo'])
    self.assertEqual('bar', dst_stat.metadata['x-goog-meta-foo'])

    with cloudstorage.open(TESTFILE) as f:
      self.assertEqual('abcde', f.read())
Exemple #30
0
  def StatObject(self, url=None, bucket=None, obj=None):
    """Reads some information about an object in Gcs.

    Args:
      url: Full URL of the object. Use either this or bucket and object
      bucket: Bucket name. Use either this and object or url.
      obj: Object name. Use either this and bucket or url.

    Returns:
      A dict with size, md5, contentType and metadata keys.
    """
    stat = None
    if url:
      stat = cloudstorage.stat(Gcs.UrlToBucketAndNamePath(url))
    else:
      stat = cloudstorage.stat(Gcs.MakeBucketAndNamePath(bucket, obj))
    return {
        'size': stat.st_size,
        'md5Hash': stat.etag,
        'contentType': stat.content_type,
        'metadata': stat.metadata
    }
Exemple #31
0
def get_file_info(bucket, filename):
    """Returns information about stored file.

  Arguments:
    bucket: a bucket that contains the file.
    filename: path to a file relative to bucket root.

  Returns:
    FileInfo object or None if no such file.
  """
    try:
        stat = cloudstorage.stat('/%s/%s' % (bucket, filename),
                                 retry_params=_make_retry_params())
        return FileInfo(size=stat.st_size)
    except cloudstorage.errors.NotFoundError:
        return None
 def testStat(self):
     self.CreateFile(TESTFILE)
     filestat = cloudstorage.stat(TESTFILE)
     content = ''.join(DEFAULT_CONTENT)
     self.assertEqual(len(content), filestat.st_size)
     self.assertEqual('text/plain', filestat.content_type)
     self.assertEqual('foo', filestat.metadata['x-goog-meta-foo'])
     self.assertEqual('bar', filestat.metadata['x-goog-meta-bar'])
     self.assertEqual('public, max-age=6000',
                      filestat.metadata['cache-control'])
     self.assertEqual('attachment; filename=f.txt',
                      filestat.metadata['content-disposition'])
     self.assertEqual(TESTFILE, filestat.filename)
     self.assertEqual(hashlib.md5(content).hexdigest(), filestat.etag)
     self.assertTrue(math.floor(self.start_time) <= filestat.st_ctime)
     self.assertTrue(filestat.st_ctime <= time.time())
Exemple #33
0
    def isfile(self, filepath):
        """Checks if the file with the given filepath exists in the GCS.

        Args:
            filepath: str. The path to the relevant file within the entity's
                assets folder.

        Returns:
            bool. Whether the file exists in GCS.
        """
        try:
            return bool(
                cloudstorage.stat(self._get_gcs_file_url(filepath),
                                  retry_params=None))
        except cloudstorage.NotFoundError:
            return False
Exemple #34
0
def _stat_file(file_path):
    """

    :param filename: full path
    :return:
    """
    st = gcs.stat(file_path)
    return {
        'filename': st.filename,
        'is_dir': st.is_dir,
        'st_size': st.st_size,
        'st_ctime': st.st_ctime,
        'etag': st.etag,
        'content_type': st.content_type,
        'metadata': st.metadata,
    }
Exemple #35
0
 def testAppendAndFlush(self):
     self.pool.append("a")
     self.assertRaises(cloudstorage.errors.NotFoundError, cloudstorage.open,
                       self.filename)
     self.pool.append("b")
     self.assertRaises(cloudstorage.errors.NotFoundError, cloudstorage.open,
                       self.filename)
     self.pool.flush()
     self.assertRaises(cloudstorage.errors.NotFoundError, cloudstorage.open,
                       self.filename)
     # File handle does need to be explicitly closed.
     self.filehandle.close()
     self.assertEquals(32 * 1024, cloudstorage.stat(self.filename).st_size)
     self.assertEquals(
         ["a", "b"],
         list(records.RecordsReader(cloudstorage.open(self.filename))))
Exemple #36
0
 def get(self, *args, **kwargs):
     bucket_name = os.environ.get(
         'BUCKET_NAME', app_identity.get_default_gcs_bucket_name())
     bucket = '/' + bucket_name
     # omit first '/'
     object_name = self.request.path[1:]
     try:
         stat = gcs.stat(bucket + '/' + object_name)
         logging.info(repr(stat))
     except gcs.errors.NotFoundError:
         self.abort(404)
     except gcs.errors.AuthorizationError:
         self.abort(404)
     except Exception, e:
         logging.exception(e)
         self.response.write('There was an error: {}'.format(e))
Exemple #37
0
  def testShuffleNoData(self):
    bucket_name = "testbucket"
    test_filename = "testfile"
    full_filename = "/%s/%s" % (bucket_name, test_filename)

    gcs_file = cloudstorage.open(full_filename, mode="w")
    gcs_file.close()

    p = shuffler.ShufflePipeline("testjob", {"bucket_name": bucket_name},
                                 [full_filename, full_filename, full_filename])
    p.start()
    test_support.execute_until_empty(self.taskqueue)

    p = shuffler.ShufflePipeline.from_id(p.pipeline_id)
    for filename in p.outputs.default.value:
      self.assertEqual(0, cloudstorage.stat(filename).st_size)
    self.assertEquals(1, len(self.emails))
Exemple #38
0
    def get(self):
        try:
            file_name = self.request.get('filename')
            assert file_name
            info = gcs.stat('/%s/%s' % (bucket_name, file_name))
            assert info is not None
            blob_key = blobstore.create_gs_key('/gs/%s/%s' %
                                               (bucket_name, file_name))
            assert blob_key is not None

        except Exception as e:
            self.response.set_status(400)
        else:
            self.response.headers['X-AppEngine-BlobKey'] = blob_key
            self.response.headers['Content-Type'] = info.content_type.encode()
            self.response.headers['Content-Disposition'] = (
                'attachment; filename="%s"' % file_name).encode()
Exemple #39
0
def get_database_file():
    bucket = '/' + constants.GEOLOCATION_MAXMIND_GCS_BUCKET
    bucket_path = bucket + '/' + constants.GEOLOCATION_MAXMIND_BUCKET_PATH
    filename = bucket_path + '/' + constants.GEOLOCATION_MAXMIND_CITY_FILE
    logging.info('MaxMind database GCS path is: %s', filename)
    try:
        file_stat = gcs.stat(filename)
        database_file = gcs.open(filename)
        logging.info(
            'MaxMind database file creation time is: %s',
            datetime.datetime.fromtimestamp(
                file_stat.st_ctime).strftime('%Y-%m-%d %H:%M:%S'))
    except gcs.NotFoundError:
        logging.error('MaxMind database file not found in GCS: %s', filename)
        raise

    return database_file
Exemple #40
0
  def split_input(cls, mapper_spec):
    """Returns a list of shard_count input_spec_shards for input_spec.

    Args:
      mapper_spec: The mapper specification to split from. Must contain
          'file_paths' parameter with one or more file paths.

    Returns:
      A list of GoogleStorageLineInputReader corresponding to the
      specified shards.
    """
    params = _get_params(mapper_spec)
    file_paths = params[cls.FILE_PATHS_PARAM]

    if isinstance(file_paths, basestring):
      # This is a mechanism to allow multiple file paths (which do not contain
      # commas) in a single string. It may go away.
      file_paths = file_paths.split(",")

    file_sizes = {}

    for file_path in file_paths:
      fstat = cloudstorage.stat(file_path)
      file_sizes[file_path] = fstat.st_size

    shard_count = min(cls._MAX_SHARD_COUNT, mapper_spec.shard_count)
    shards_per_file = shard_count // len(file_paths)

    if shards_per_file == 0:
      shards_per_file = 1

    chunks = []

    for file_path, file_size in file_sizes.items():
      file_chunk_size = file_size // shards_per_file
      for i in xrange(shards_per_file - 1):
        chunks.append(GoogleStorageLineInputReader.from_json(
            {cls.FILE_PATH_PARAM: file_path,
             cls.INITIAL_POSITION_PARAM: file_chunk_size * i,
             cls.END_POSITION_PARAM: file_chunk_size * (i + 1)}))
      chunks.append(GoogleStorageLineInputReader.from_json(
          {cls.FILE_PATH_PARAM: file_path,
           cls.INITIAL_POSITION_PARAM: file_chunk_size * (shards_per_file - 1),
           cls.END_POSITION_PARAM: file_size}))

    return chunks
Exemple #41
0
def futuregcsfileshardedpagemap(pagemapf=None, gcspath=None, pagesize=100, onsuccessf=None, onfailuref=None, onprogressf = None, onallchildsuccessf = None, initialresult = None, oncombineresultsf = None, weight = None, parentkey=None, **taskkwargs):
    def MapOverRange(futurekey, startbyte, endbyte, weight, **kwargs):
        logdebug("Enter MapOverRange: %s, %s, %s" % (startbyte, endbyte, weight))

        linitialresult = initialresult if not initialresult is None else 0
        loncombineresultsf = oncombineresultsf if oncombineresultsf else lambda a, b: a + b
    
        try:
            # open file at gcspath for read
            with gcs.open(gcspath) as gcsfile:
                page, ranges = hwalk(gcsfile, pagesize, 2, startbyte, endbyte) 

            if pagemapf:
                lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult, loncombineresultsf)
                taskkwargs["futurename"] = "pagemap %s of %s,%s" % (len(page), startbyte, endbyte)
                future(pagemapf, parentkey=futurekey, onallchildsuccessf=lonallchildsuccessf, weight = len(page), **taskkwargs)(page)
            else:
                setlocalprogress(futurekey, len(page))

            if ranges:
                newweight = (weight - len(page)) / len(ranges) if not weight is None else None 
                for arange in ranges:
                    taskkwargs["futurename"] = "shard %s" % (arange)

                    lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult if pagemapf else len(page), loncombineresultsf)

                    future(MapOverRange, parentkey=futurekey, onallchildsuccessf=lonallchildsuccessf, weight = newweight, **taskkwargs)(arange[0], arange[1], weight = newweight)
                
            if ranges or pagemapf:
                raise FutureReadyForResult("still going")
            else:
                return len(page)
        finally:
            logdebug("Leave MapOverRange: %s, %s, %s" % (startbyte, endbyte, weight))

    # get length of file in bytes
    filestat = gcs.stat(gcspath)

    filesizebytes = filestat.st_size    

    futurename = "top level 0 to %s" % (filesizebytes)

    taskkwargscopy = dict(taskkwargs)
    taskkwargscopy["futurename"] = taskkwargscopy.get("futurename", futurename)

    return future(MapOverRange, onsuccessf = onsuccessf, onfailuref = onfailuref, onprogressf = onprogressf, onallchildsuccessf=onallchildsuccessf, parentkey=parentkey, weight = weight, **taskkwargscopy)(0, filesizebytes, weight)
Exemple #42
0
    def detectBillFileType(self, request):
        accountId = int(request.accountId)
        accountKey = Key(Account, accountId)
        billId = request.billId
        billKey = Key(Bill, billId, parent=accountKey)

        billfileId = request.billfileToDetect
        billFile = Key(BillFile, billfileId, parent=billKey).get()

        # Build file path
        filepath = getFilepath(str(accountId), billId, billfileId,
                               billFile.name)
        filestat = gcs.stat('/' + settings.FILE_BUCKET + filepath)

        billFile.file_type = filestat.content_type
        billFile.put()

        return StringMessage(data='Detected:' + billFile.file_type)
    def testGzip(self):
        with cloudstorage.open(TESTFILE, 'w', 'text/plain',
                               {'content-encoding': 'gzip'}) as f:
            gz = gzip.GzipFile('', 'wb', 9, f)
            gz.write('a' * 1024)
            gz.write('b' * 1024)
            gz.close()

        stat = cloudstorage.stat(TESTFILE)
        self.assertEqual('text/plain', stat.content_type)
        self.assertEqual('gzip', stat.metadata['content-encoding'])
        self.assertEqual(37, stat.st_size)

        with cloudstorage.open(TESTFILE) as f:
            gz = gzip.GzipFile('', 'rb', 9, f)
            result = gz.read(10)
            self.assertEqual('a' * 10, result)
            self.assertEqual('a' * 1014 + 'b' * 1024, gz.read())
Exemple #44
0
def _copy_gcs_file(m):
    if not m.gcs_filename:
        return
    if m.gcs_filename.endswith(".jpeg") or m.gcs_filename.endswith(
            ".png") or m.gcs_filename.endswith(".gif"):
        return

    old_gcs_filename = m.gcs_filename
    old_gcs_stats = cloudstorage.stat(old_gcs_filename)
    content_type = old_gcs_stats.content_type

    filename = '%s.%s' % (old_gcs_filename,
                          get_extension_for_content_type(content_type))
    with cloudstorage.open(old_gcs_filename, 'r') as gcs_file:
        with cloudstorage.open(filename, 'w', content_type) as f:
            f.write(gcs_file.read())

    m.gcs_filename = filename
    m.put()
Exemple #45
0
    def isfile(self, filepath):
        """Checks if the file with the given filepath exists in the GCS.

        Args:
            filepath: str. The path to the relevant file within the exploration.

        Returns:
            bool. Whether the file exists in GCS.
        """
        bucket_name = app_identity_services.get_gcs_resource_bucket_name()

        # Upload to GCS bucket with filepath
        # "<bucket>/<exploration-id>/assets/<filepath>".
        gcs_file_url = ('/%s/%s/assets/%s' %
                        (bucket_name, self._exploration_id, filepath))
        try:
            return cloudstorage.stat(gcs_file_url, retry_params=None)
        except cloudstorage.NotFoundError:
            return False
Exemple #46
0
    def post(self):

        if self.request.get('_delete'):
            self.delete()
            return

        tags = clean_tags(self.request.get('tags'))
        if not tags:
            api.write_error(self.response, 400,
                            'Missing required parameter: tags')
            return

        tags.append(api.get_geo_name(self.request))

        try:
            uploaded_file = self.request.POST['file']
            if not uploaded_file.type:
                api.write_error(self.response, 400, 'Missing content type')
                return
        except:
            uploaded_file = None

        if uploaded_file == None:
            api.write_error(self.response, 400, 'Missing content')
            return

        recording_id = str(uuid.uuid4())
        filename = config.RECORDINGS_BUCKET + recording_id

        gcs_file = gcs.open(filename,
                            mode='w',
                            content_type=uploaded_file.type)
        gcs_file.write(uploaded_file.file.read())
        gcs_file.close()

        stat = gcs.stat(filename)
        recording = Recording(uuid=recording_id,
                              tags=tags,
                              duration=stat.st_size / config.SAMPLES_PER_SEC)
        recording.put_async()

        api.write_message(self.response, 'success')
    def _next_seg(self):
        """Get next seg."""
        if self._seg:
            self._seg.close()
        self._seg_index += 1
        if self._seg_index > self._last_seg_index:
            self._seg = None
            return

        filename = self._seg_prefix + str(self._seg_index)
        stat = cloudstorage.stat(filename)
        writer = output_writers._GoogleCloudStorageOutputWriter
        if writer._VALID_LENGTH not in stat.metadata:
            raise ValueError("Expect %s in metadata for file %s." %
                             (writer._VALID_LENGTH, filename))
        self._seg_valid_length = int(stat.metadata[writer._VALID_LENGTH])
        if self._seg_valid_length > stat.st_size:
            raise ValueError(
                "Valid length %s is too big for file %s of length %s" %
                (self._seg_valid_length, filename, stat.st_size))
        self._seg = cloudstorage.open(filename)
Exemple #48
0
def read_photo_from_storage(photo, label, response):
    bucket_name = os.environ.get('BUCKET_NAME',
                                 app_identity.get_default_gcs_bucket_name())

    filename = format_photo_file_name(bucket_name, photo.created_by_user_id,
                                      photo.sha256, label)

    try:
        file_stat = gcs.stat(filename)
        gcs_file = gcs.open(filename)
        response.headers['Content-Type'] = file_stat.content_type
        response.headers[
            'Cache-Control'] = 'private, max-age=31536000'  # cache for upto 1 year
        response.headers['ETag'] = file_stat.etag
        response.write(gcs_file.read())
        gcs_file.close()

    except gcs.NotFoundError:
        logging.exception("Fail to read photo file")
        response.status = 404
        response.write('photo file not found')
Exemple #49
0
def gcsfileshardedpagemap(pagemapf=None, gcspath=None, initialshards = 10, pagesize = 100, **taskkwargs):
    @task(**taskkwargs)
    def MapOverRange(startpos, endpos, **kwargs):
        logdebug("Enter MapOverRange: %s, %s" % (startpos, endpos))

        # open file at gcspath for read
        with gcs.open(gcspath) as gcsfile:
            page, ranges = hwalk(gcsfile, pagesize, initialshards, startpos, endpos) 

        if ranges:
            for arange in ranges:
                MapOverRange(arange[0], arange[1])

        if pagemapf:
            pagemapf(page)

        logdebug("Leave MapOverRange: %s, %s" % (startpos, endpos))

    # get length of file in bytes
    filestat = gcs.stat(gcspath)
    
    MapOverRange(0, filestat.st_size)
Exemple #50
0
    def testWriterMetadata(self):
        test_acl = "test-acl"
        test_content_type = "test-mime"
        mapreduce_state = self.create_mapreduce_state(
            output_params={
                self.WRITER_CLS.BUCKET_NAME_PARAM: "test",
                self.WRITER_CLS.ACL_PARAM: test_acl,
                self.WRITER_CLS.CONTENT_TYPE_PARAM: test_content_type
            })
        shard_state = self.create_shard_state(0)
        ctx = context.Context(mapreduce_state.mapreduce_spec, shard_state)
        context.Context._set(ctx)

        writer = self.WRITER_CLS.create(mapreduce_state.mapreduce_spec,
                                        shard_state.shard_number, 0)
        writer = self.WRITER_CLS.from_json(writer.to_json())
        writer.finalize(ctx, shard_state)

        filename = self.WRITER_CLS._get_filename(shard_state)

        file_stat = cloudstorage.stat(filename)
        self.assertEqual(test_content_type, file_stat.content_type)
    def _stat_file(self, bucketFile, ftype=None, cache=True):
        #log.info("_stat_file(%s,%s,%s)" % (bucketFile, ftype, cache))
        ret = None
        if cache:
            item = self.readCache(bucketFile,ftype)
            if item:
                ret = item.stat
                log.info("Got from readCache")
            
        if not ret:
            #log.info('Stating file {}'.format(bucketFile))
            try:
                ret = cloudstorage.stat(bucketFile)
            except cloudstorage.NotFoundError:
                log.info("File not found: %s" % bucketFile)
            except Exception as e:
                log.info("Stat error(%s): %s" % (bucketFile,e))

            if ret:
                log.info("Stat {}".format(ret))
                itm = bucketCacheItem(ret,None)
                self.writeCache(bucketFile, itm, ftype)
        return ret
Exemple #52
0
    def AuthenticatedGet(self, org, event):
        filename = all_event_timeless_filename(event)
        bucket_path = BUCKET_NAME + '/' + filename

        try:
            file_stat = cloudstorage.stat(bucket_path)
        except cloudstorage.NotFoundError:
            self.abort(404)

        # rewrite filename to include timestamp
        custom_timestamp = timestamp(
            datetime.datetime.utcfromtimestamp(file_stat.st_ctime))
        filename_to_serve = file_stat.filename.replace(
            '.csv', '-%s.csv' % custom_timestamp)

        # serve the file as an attachment, forcing download
        gcs_fd = cloudstorage.open(bucket_path)
        if file_stat.content_type:
            self.response.headers['Content-Type'] = file_stat.content_type
        self.response.headers['Content-Disposition'] = (str(
            'attachment; filename="%s"' % filename_to_serve))

        self.response.write(gcs_fd.read())
Exemple #53
0
def gcs_stat(filename, *args, **kwargs):
    return gcs.stat(gcs_filename(filename), *args, **kwargs)
Exemple #54
0
 def exists(filename):
     try:
         cloudstorage.stat(filename)
         return True
     except cloudstorage.NotFoundError:
         return False
Exemple #55
0
 def stat(filename):
     try:
         return cloudstorage.stat(filename)
     except cloudstorage.NotFoundError:
         raise IOError('File {} not found.'.format(filename))
Exemple #56
0
 def exists(self, name):
     try:
         cloudstorage.stat(self.path(name))
         return True
     except cloudstorage.NotFoundError:
         return False
Exemple #57
0
def StatGCS(fullurl):
    try:
        return gcs.stat(_remove_gcs_prefix(fullurl))
    except gcs.NotFoundError:
        return None
Exemple #58
0
def serve_file(request,
               blob_key_or_info,
               as_download=False,
               content_type=None,
               filename=None,
               offset=None,
               size=None):
    """
        Serves a file from the blobstore, reads most of the data from the blobinfo by default but you can override stuff
        by passing kwargs.

        You can also pass a Google Cloud Storage filename as `blob_key_or_info` to use Blobstore API to serve the file:
        https://cloud.google.com/appengine/docs/python/blobstore/#Python_Using_the_Blobstore_API_with_Google_Cloud_Storage
    """

    if isinstance(blob_key_or_info, BlobKey):
        info = BlobInfo.get(blob_key_or_info)
        blob_key = blob_key_or_info
    elif isinstance(blob_key_or_info, basestring):
        info = BlobInfo.get(BlobKey(blob_key_or_info))
        blob_key = BlobKey(blob_key_or_info)
    elif isinstance(blob_key_or_info, BlobInfo):
        info = blob_key_or_info
        blob_key = info.key()
    else:
        raise ValueError("Invalid type %s" % blob_key_or_info.__class__)

    if info == None:
        # Lack of blobstore_info means this is a Google Cloud Storage file
        if has_cloudstorage:
            cached_value = _get_from_cache(blob_key_or_info)
            if cached_value:
                blob_key, info = cached_value
            else:
                info = cloudstorage.stat(blob_key_or_info)
                info.size = info.st_size
                blob_key = create_gs_key('/gs{0}'.format(blob_key_or_info))
                _add_to_cache(blob_key_or_info, blob_key, info)
        else:
            raise ImportError(
                "To serve a Cloud Storage file you need to install cloudstorage"
            )

    response = HttpResponse(content_type=content_type or info.content_type)
    response[BLOB_KEY_HEADER] = str(blob_key)
    response['Accept-Ranges'] = 'bytes'
    http_range = request.META.get('HTTP_RANGE')

    if offset or size:
        # Looks a little bonkers, but basically create the HTTP range string, we cast to int first to make sure
        # nothing funky gets into the headers
        http_range = "{}-{}".format(
            str(int(offset)) if offset else "",
            str(int(offset or 0) + size) if size else "")

    if http_range is not None:
        response[BLOB_RANGE_HEADER] = http_range

    if as_download:
        response['Content-Disposition'] = smart_str(
            u'attachment; filename="%s"' % (filename or info.filename))
    elif filename:
        raise ValueError(
            "You can't specify a filename without also specifying as_download")

    if info.size is not None:
        response['Content-Length'] = info.size
    return response
Exemple #59
0
 def size(self, name):
     filestat = cloudstorage.stat(self.path(name))
     return filestat.st_size
Exemple #60
0
 def created_time(self, name):
     filestat = cloudstorage.stat(self.path(name))
     creation_date = timezone.datetime.fromtimestamp(filestat.st_ctime)
     return timezone.make_aware(creation_date,
                                timezone.get_current_timezone())