Beispiel #1
0
def warmup():
  """Precaches configuration in local memory, to be called from warmup handler.

  This call is optional. Everything works even if 'warmup' is never called.
  """
  settings()
  utils.get_task_queue_host()
  utils.get_app_version()
Beispiel #2
0
def warmup():
    """Precaches configuration in local memory, to be called from warmup handler.

  This call is optional. Everything works even if 'warmup' is never called.
  """
    settings()
    utils.get_task_queue_host()
    utils.get_app_version()
  def storage_helper(self, request, uploaded_to_gs):
    """Implement shared logic between store_inline and finalize_gs."""
    # validate token or error out
    if not request.upload_ticket:
      raise endpoints.BadRequestException(
          'Upload ticket was empty or not provided.')
    try:
      embedded = TokenSigner.validate(
          request.upload_ticket, UPLOAD_MESSAGES[uploaded_to_gs])
    except (auth.InvalidTokenError, ValueError) as error:
      raise endpoints.BadRequestException(
          'Ticket validation failed: %s' % error.message)

    # read data and convert types
    digest = embedded['d'].encode('utf-8')
    is_isolated = bool(int(embedded['i']))
    namespace = embedded['n']
    size = int(embedded['s'])

    # create a key
    key = entry_key_or_error(namespace, digest)

    # get content and compressed size
    if uploaded_to_gs:
      # ensure that file info is uploaded to GS first
      # TODO(cmassaro): address analogous TODO from handlers_api
      file_info = gcs.get_file_info(config.settings().gs_bucket, key.id())
      if not file_info:
        raise endpoints.BadRequestException(
            'File should be in Google Storage.\nFile: \'%s\' Size: %d.' % (
                key.id(), size))
      content = None
      compressed_size = file_info.size
    else:
      content = request.content
      compressed_size = len(content)

    # all is well; create an entry
    entry = model.new_content_entry(
        key=key,
        is_isolated=is_isolated,
        compressed_size=compressed_size,
        expanded_size=size,
        is_verified=not uploaded_to_gs,
        content=content,
    )

    # DB: assert that embedded content is the data sent by the request
    if not uploaded_to_gs:
      if (digest, size) != hash_content(content, namespace):
        raise endpoints.BadRequestException(
            'Embedded digest does not match provided data: '
            '(digest, size): (%r, %r); expected: %r' % (
                digest, size, hash_content(content, namespace)))
      entry.put()

    # GCS: enqueue verification task
    else:
      try:
        store_and_enqueue_verify_task(entry, utils.get_task_queue_host())
      except (
          datastore_errors.Error,
          runtime.apiproxy_errors.CancelledError,
          runtime.apiproxy_errors.DeadlineExceededError,
          runtime.apiproxy_errors.OverQuotaError,
          runtime.DeadlineExceededError,
          taskqueue.Error) as e:
        raise endpoints.InternalServerErrorException(
            'Unable to store the entity: %s.' % e.__class__.__name__)

    stats.add_entry(
        stats.STORE, entry.compressed_size,
        'GS; %s' % entry.key.id() if uploaded_to_gs else 'inline')
    return PushPing(ok=True)
Beispiel #4
0
    def storage_helper(request, uploaded_to_gs):
        """Implement shared logic between store_inline and finalize_gs.

    Arguments:
      request: either StorageRequest or FinalizeRequest.
      uploaded_to_gs: bool.
    """
        if not request.upload_ticket:
            raise endpoints.BadRequestException(
                'Upload ticket was empty or not provided.')
        try:
            embedded = TokenSigner.validate(request.upload_ticket,
                                            UPLOAD_MESSAGES[uploaded_to_gs])
        except (auth.InvalidTokenError, ValueError) as error:
            raise endpoints.BadRequestException(
                'Ticket validation failed: %s' % error.message)

        digest = embedded['d'].encode('utf-8')
        is_isolated = bool(int(embedded['i']))
        namespace = embedded['n']
        size = int(embedded['s'])
        key = entry_key_or_error(namespace, digest)

        if uploaded_to_gs:
            # Ensure that file info is uploaded to GS first.
            file_info = gcs.get_file_info(config.settings().gs_bucket,
                                          key.id())
            if not file_info:
                logging.debug('%s', digest)
                raise endpoints.BadRequestException(
                    'File should be in Google Storage.\nFile: \'%s\' Size: %d.'
                    % (key.id(), size))
            content = None
            compressed_size = file_info.size
        else:
            content = request.content
            compressed_size = len(content)

        # Look if the entity was already stored. Alert in that case but ignore it.
        if key.get():
            # TODO(maruel): Handle these more gracefully.
            logging.warning('Overwritting ContentEntry\n%s', digest)

        entry = model.new_content_entry(
            key=key,
            is_isolated=is_isolated,
            compressed_size=compressed_size,
            expanded_size=size,
            is_verified=not uploaded_to_gs,
            content=content,
        )

        if not uploaded_to_gs:
            # Assert that embedded content is the data sent by the request.
            logging.debug('%s', digest)
            if (digest, size) != hash_content(content, namespace):
                raise endpoints.BadRequestException(
                    'Embedded digest does not match provided data: '
                    '(digest, size): (%r, %r); expected: %r' %
                    (digest, size, hash_content(content, namespace)))
            try:
                entry.put()
            except datastore_errors.Error as e:
                raise endpoints.InternalServerErrorException(
                    'Unable to store the entity: %s.' % e.__class__.__name__)
        else:
            # Enqueue verification task transactionally as the entity is stored.
            try:
                store_and_enqueue_verify_task(entry,
                                              utils.get_task_queue_host())
            except (datastore_errors.Error,
                    runtime.apiproxy_errors.CancelledError,
                    runtime.apiproxy_errors.DeadlineExceededError,
                    runtime.apiproxy_errors.OverQuotaError,
                    runtime.DeadlineExceededError, taskqueue.Error) as e:
                raise endpoints.InternalServerErrorException(
                    'Unable to store the entity: %s.' % e.__class__.__name__)

        stats.add_entry(
            stats.STORE, entry.compressed_size,
            'GS; %s' % entry.key.id() if uploaded_to_gs else 'inline')
        return PushPing(ok=True)
Beispiel #5
0
  def handle(self, namespace, hash_key):
    """Handles this request."""
    # Extract relevant request parameters.
    expiration_ts = self.request.get('x')
    item_size = self.request.get('s')
    is_isolated = self.request.get('i')
    uploaded_to_gs = self.request.get('g')
    signature = self.request.get('sig')

    # Build correct signature.
    expected_sig = self.generate_signature(
        config.settings().global_secret, self.request.method, expiration_ts,
        namespace, hash_key, item_size, is_isolated, uploaded_to_gs)

    # Verify signature is correct.
    if not utils.constant_time_equals(signature, expected_sig):
      return self.send_error('Incorrect signature.')

    # Convert parameters from strings back to something useful.
    # It can't fail since matching signature means it was us who generated
    # this strings in a first place.
    expiration_ts = int(expiration_ts)
    item_size = int(item_size)
    is_isolated = bool(int(is_isolated))
    uploaded_to_gs = bool(int(uploaded_to_gs))

    # Verify signature is not yet expired.
    if time.time() > expiration_ts:
      return self.send_error('Expired signature.')

    if uploaded_to_gs:
      # GS upload finalization uses empty POST body.
      assert self.request.method == 'POST'
      if self.request.headers.get('content-length'):
        return self.send_error('Expecting empty POST.')
      content = None
    else:
      # Datastore upload uses PUT.
      assert self.request.method == 'PUT'
      if self.request.headers.get('content-length'):
        content = self.request.body
      else:
        content = ''

    # Info about corresponding GS entry (if it exists).
    gs_bucket = config.settings().gs_bucket
    key = model.entry_key(namespace, hash_key)

    # Verify the data while at it since it's already in memory but before
    # storing it in memcache and datastore.
    if content is not None:
      # Verify advertised hash matches the data.
      try:
        hex_digest, expanded_size = hash_content(content, namespace)
        if hex_digest != hash_key:
          raise ValueError(
              'Hash and data do not match, '
              '%d bytes (%d bytes expanded)' % (len(content), expanded_size))
        if expanded_size != item_size:
          raise ValueError(
              'Advertised data length (%d) and actual data length (%d) '
              'do not match' % (item_size, expanded_size))
      except ValueError as err:
        return self.send_error('Inline verification failed.\n%s' % err)
      # Successfully verified!
      compressed_size = len(content)
      needs_verification = False
    else:
      # Fetch size of the stored file.
      file_info = gcs.get_file_info(gs_bucket, key.id())
      if not file_info:
        # TODO(maruel): Do not fail yet. If the request got up to here, the file
        # is likely there but the service may have trouble fetching the metadata
        # from GS.
        return self.send_error(
            'File should be in Google Storage.\nFile: \'%s\' Size: %d.' %
            (key.id(), item_size))
      compressed_size = file_info.size
      needs_verification = True

    # Data is here and it's too large for DS, so put it in GS. It is likely
    # between MIN_SIZE_FOR_GS <= len(content) < MIN_SIZE_FOR_DIRECT_GS
    if content is not None and len(content) >= MIN_SIZE_FOR_GS:
      if not gcs.write_file(gs_bucket, key.id(), [content]):
        # Returns 503 so the client automatically retries.
        return self.send_error(
            'Unable to save the content to GS.', http_code=503)
      # It's now in GS.
      uploaded_to_gs = True

    # Can create entity now, everything appears to be legit.
    entry = model.new_content_entry(
        key=key,
        is_isolated=is_isolated,
        compressed_size=compressed_size,
        expanded_size=-1 if needs_verification else item_size,
        is_verified = not needs_verification)

    # If it's not in GS then put it inline.
    if not uploaded_to_gs:
      assert content is not None and len(content) < MIN_SIZE_FOR_GS
      entry.content = content

    # Start saving *.isolated into memcache iff its content is available and
    # it's not in Datastore: there's no point in saving inline blobs in memcache
    # because ndb already memcaches them.
    memcache_store_future = None
    if (content is not None and
        entry.content is None and
        entry.is_isolated and
        entry.compressed_size <= model.MAX_MEMCACHE_ISOLATED):
      memcache_store_future = model.save_in_memcache(
          namespace, hash_key, content, async=True)

    try:
      # If entry was already verified above (i.e. it is a small inline entry),
      # store it right away, possibly overriding existing entity. Most of
      # the time it is a new entry anyway (since clients try to upload only
      # new entries).
      if not needs_verification:
        entry.put()
      else:
        # For large entries (that require expensive verification) be more
        # careful and check that it is indeed a new entity. No need to do it in
        # transaction: a race condition would lead to redundant verification
        # task enqueued, no big deal.
        existing = entry.key.get()
        if existing:
          if existing.is_verified:
            logging.info('Entity exists and already verified')
          else:
            logging.info('Entity exists, but not yet verified')
        else:
          # New entity. Store it and enqueue verification task, transactionally.
          task_queue_host = utils.get_task_queue_host()
          def run():
            entry.put()
            taskqueue.add(
                url='/internal/taskqueue/verify/%s' % entry.key.id(),
                queue_name='verify',
                headers={'Host': task_queue_host},
                transactional=True)
          datastore_utils.transaction(run)

      # TODO(vadimsh): Fill in details about the entry, such as expiration time.
      self.send_json({'entry': {}})

      # Log stats.
      where = 'GS; ' + 'inline' if entry.content is not None else entry.key.id()
      stats.add_entry(stats.STORE, entry.compressed_size, where)

    finally:
      # Do not keep dangling futures. Note that error here is ignored,
      # memcache is just an optimization.
      if memcache_store_future:
        memcache_store_future.wait()
Beispiel #6
0
    def handle(self, namespace, hash_key):
        """Handles this request."""
        # Extract relevant request parameters.
        expiration_ts = self.request.get('x')
        item_size = self.request.get('s')
        is_isolated = self.request.get('i')
        uploaded_to_gs = self.request.get('g')
        signature = self.request.get('sig')

        # Build correct signature.
        expected_sig = self.generate_signature(config.settings().global_secret,
                                               self.request.method,
                                               expiration_ts, namespace,
                                               hash_key, item_size,
                                               is_isolated, uploaded_to_gs)

        # Verify signature is correct.
        if not utils.constant_time_equals(signature, expected_sig):
            return self.send_error('Incorrect signature.')

        # Convert parameters from strings back to something useful.
        # It can't fail since matching signature means it was us who generated
        # this strings in a first place.
        expiration_ts = int(expiration_ts)
        item_size = int(item_size)
        is_isolated = bool(int(is_isolated))
        uploaded_to_gs = bool(int(uploaded_to_gs))

        # Verify signature is not yet expired.
        if time.time() > expiration_ts:
            return self.send_error('Expired signature.')

        if uploaded_to_gs:
            # GS upload finalization uses empty POST body.
            assert self.request.method == 'POST'
            if self.request.headers.get('content-length'):
                return self.send_error('Expecting empty POST.')
            content = None
        else:
            # Datastore upload uses PUT.
            assert self.request.method == 'PUT'
            if self.request.headers.get('content-length'):
                content = self.request.body
            else:
                content = ''

        # Info about corresponding GS entry (if it exists).
        gs_bucket = config.settings().gs_bucket
        key = model.entry_key(namespace, hash_key)

        # Verify the data while at it since it's already in memory but before
        # storing it in memcache and datastore.
        if content is not None:
            # Verify advertised hash matches the data.
            try:
                hex_digest, expanded_size = hash_content(content, namespace)
                if hex_digest != hash_key:
                    raise ValueError('Hash and data do not match, '
                                     '%d bytes (%d bytes expanded)' %
                                     (len(content), expanded_size))
                if expanded_size != item_size:
                    raise ValueError(
                        'Advertised data length (%d) and actual data length (%d) '
                        'do not match' % (item_size, expanded_size))
            except ValueError as err:
                return self.send_error('Inline verification failed.\n%s' % err)
            # Successfully verified!
            compressed_size = len(content)
            needs_verification = False
        else:
            # Fetch size of the stored file.
            file_info = gcs.get_file_info(gs_bucket, key.id())
            if not file_info:
                # TODO(maruel): Do not fail yet. If the request got up to here, the file
                # is likely there but the service may have trouble fetching the metadata
                # from GS.
                return self.send_error(
                    'File should be in Google Storage.\nFile: \'%s\' Size: %d.'
                    % (key.id(), item_size))
            compressed_size = file_info.size
            needs_verification = True

        # Data is here and it's too large for DS, so put it in GS. It is likely
        # between MIN_SIZE_FOR_GS <= len(content) < MIN_SIZE_FOR_DIRECT_GS
        if content is not None and len(content) >= MIN_SIZE_FOR_GS:
            if not gcs.write_file(gs_bucket, key.id(), [content]):
                # Returns 503 so the client automatically retries.
                return self.send_error('Unable to save the content to GS.',
                                       http_code=503)
            # It's now in GS.
            uploaded_to_gs = True

        # Can create entity now, everything appears to be legit.
        entry = model.new_content_entry(
            key=key,
            is_isolated=is_isolated,
            compressed_size=compressed_size,
            expanded_size=-1 if needs_verification else item_size,
            is_verified=not needs_verification)

        # If it's not in GS then put it inline.
        if not uploaded_to_gs:
            assert content is not None and len(content) < MIN_SIZE_FOR_GS
            entry.content = content

        # Start saving *.isolated into memcache iff its content is available and
        # it's not in Datastore: there's no point in saving inline blobs in memcache
        # because ndb already memcaches them.
        memcache_store_future = None
        if (content is not None and entry.content is None and entry.is_isolated
                and entry.compressed_size <= model.MAX_MEMCACHE_ISOLATED):
            memcache_store_future = model.save_in_memcache(namespace,
                                                           hash_key,
                                                           content,
                                                           async=True)

        try:
            # If entry was already verified above (i.e. it is a small inline entry),
            # store it right away, possibly overriding existing entity. Most of
            # the time it is a new entry anyway (since clients try to upload only
            # new entries).
            if not needs_verification:
                entry.put()
            else:
                # For large entries (that require expensive verification) be more
                # careful and check that it is indeed a new entity. No need to do it in
                # transaction: a race condition would lead to redundant verification
                # task enqueued, no big deal.
                existing = entry.key.get()
                if existing:
                    if existing.is_verified:
                        logging.info('Entity exists and already verified')
                    else:
                        logging.info('Entity exists, but not yet verified')
                else:
                    # New entity. Store it and enqueue verification task, transactionally.
                    task_queue_host = utils.get_task_queue_host()

                    def run():
                        entry.put()
                        taskqueue.add(url='/internal/taskqueue/verify/%s' %
                                      entry.key.id(),
                                      queue_name='verify',
                                      headers={'Host': task_queue_host},
                                      transactional=True)

                    datastore_utils.transaction(run)

            # TODO(vadimsh): Fill in details about the entry, such as expiration time.
            self.send_json({'entry': {}})

            # Log stats.
            where = 'GS; ' + 'inline' if entry.content is not None else entry.key.id(
            )
            stats.add_entry(stats.STORE, entry.compressed_size, where)

        finally:
            # Do not keep dangling futures. Note that error here is ignored,
            # memcache is just an optimization.
            if memcache_store_future:
                memcache_store_future.wait()
  def storage_helper(request, uploaded_to_gs):
    """Implement shared logic between store_inline and finalize_gs.

    Arguments:
      request: either StorageRequest or FinalizeRequest.
      uploaded_to_gs: bool.
    """
    if not request.upload_ticket:
      raise endpoints.BadRequestException(
          'Upload ticket was empty or not provided.')
    try:
      embedded = TokenSigner.validate(
          request.upload_ticket, UPLOAD_MESSAGES[uploaded_to_gs])
    except (auth.InvalidTokenError, ValueError) as error:
      raise endpoints.BadRequestException(
          'Ticket validation failed: %s' % error.message)

    digest = embedded['d'].encode('utf-8')
    is_isolated = bool(int(embedded['i']))
    namespace = embedded['n']
    size = int(embedded['s'])
    key = entry_key_or_error(namespace, digest)

    if uploaded_to_gs:
      # Ensure that file info is uploaded to GS first.
      file_info = gcs.get_file_info(config.settings().gs_bucket, key.id())
      if not file_info:
        logging.debug('%s', digest)
        raise endpoints.BadRequestException(
            'File should be in Google Storage.\nFile: \'%s\' Size: %d.' % (
                key.id(), size))
      content = None
      compressed_size = file_info.size
    else:
      content = request.content
      compressed_size = len(content)

    # Look if the entity was already stored. Alert in that case but ignore it.
    if key.get():
      # TODO(maruel): Handle these more gracefully.
      logging.warning('Overwritting ContentEntry\n%s', digest)

    entry = model.new_content_entry(
        key=key,
        is_isolated=is_isolated,
        compressed_size=compressed_size,
        expanded_size=size,
        is_verified=not uploaded_to_gs,
        content=content,
    )

    if not uploaded_to_gs:
      # Assert that embedded content is the data sent by the request.
      logging.debug('%s', digest)
      if (digest, size) != hash_content(content, namespace):
        raise endpoints.BadRequestException(
            'Embedded digest does not match provided data: '
            '(digest, size): (%r, %r); expected: %r' % (
                digest, size, hash_content(content, namespace)))
      entry.put()
    else:
      # Enqueue verification task transactionally as the entity is stored.
      try:
        store_and_enqueue_verify_task(entry, utils.get_task_queue_host())
      except (
          datastore_errors.Error,
          runtime.apiproxy_errors.CancelledError,
          runtime.apiproxy_errors.DeadlineExceededError,
          runtime.apiproxy_errors.OverQuotaError,
          runtime.DeadlineExceededError,
          taskqueue.Error) as e:
        raise endpoints.InternalServerErrorException(
            'Unable to store the entity: %s.' % e.__class__.__name__)

    stats.add_entry(
        stats.STORE, entry.compressed_size,
        'GS; %s' % entry.key.id() if uploaded_to_gs else 'inline')
    return PushPing(ok=True)