Esempio n. 1
0
    def test_validate_data_pass(self):
        allow_restricted_fields = True

        # Call under test
        ret_val = ds.validate_data(self.valid_user_data, allow_restricted_fields,
                                   self.user_kind)
        self.assertTrue(ret_val)
Esempio n. 2
0
    def test_validate_data_field_restricted_fail(self):
        allow_restricted_fields = False

        # Call under test
        ret_val = ds.validate_data(self.valid_user_data, allow_restricted_fields,
                                   self.user_kind)
        self.assertFalse(ret_val)
Esempio n. 3
0
    def test_validate_data_field_not_allowed_fail(self):
        data = {'bad': 'data', 'evil': 'monsters'}
        data.update(self.valid_user_data)
        allow_restricted_fields = True

        # Call under test
        ret_val = ds.validate_data(data, allow_restricted_fields, self.user_kind)
        self.assertFalse(ret_val)
Esempio n. 4
0
    def _create_datastore_entry(self,
                                datastore_client,
                                filename,
                                original_filename,
                                user=None,
                                upload_session_id=None,
                                image_bucket=None,
                                cc0_agree=False,
                                public_agree=False):
        """
        Creates and returns a datastore entity for a file with name filename
        uploaded by user user.
        Filename should be the new name we have generated that is <uuid>.<ext>.
        Raises FailedToSaveToDatastoreError if unsuccessful.
        """
        # Create datastore entity
        key = datastore_client.key(self._datastore_kind, filename)
        entity = self.datastore.Entity(key=key,
                                       exclude_from_indexes=["exif_json"])

        # Set datastore entity data
        entity['user'] = datastore_client.key(self._user_datastore_kind, user)
        entity['upload_session_id'] = upload_session_id
        entity['confirmed_by_user'] = False
        entity['original_filename'] = original_filename
        entity['in_gcs'] = False
        entity['processed'] = False
        entity['uploaded_date'] = self.datetime.now()
        entity['image_bucket'] = image_bucket
        entity['cc0_agree'] = cc0_agree
        entity['public_agree'] = public_agree

        if not ds.validate_data(entity, True, ds.DATASTORE_PHOTO):
            self.logger.error('Invalid entity: {0}'.format(entity))
            return None
        return entity
Esempio n. 5
0
 def test_validate_data_entity_not_allowed(self):
     kind = 'evilEntity'
     # Call under test
     ret_val = ds.validate_data(self.valid_user_data, True, kind)
     self.assertFalse(ret_val)
Esempio n. 6
0
def _record_status_in_datastore(fpaths, success):
    """
    Records GCS upload status in datastore for each file fpaths.
    `success` is a boolean corresponding to whether the files in fpaths were
    uploaded successfully to GCS or not. A list of files that failed to have
    their upload status updated are returned.
    """
    error_msg = ''
    error = False

    try:
        client = _get_client('datastore')
    except CouldNotObtainCredentialsError as e:
        error_msg = 'Could not obtain datastore credentials: {0}'.format(e)
        error = True

    if not error:
        keys = list()

        for p in fpaths:
            key = _get_ds_key_for_file(p)
            keys.append(key)

        try:
            entities = client.get_multi(keys)
        except GCloudError as e:
            error_msg = str(e)
            error = True

    if not error:
        # Add new entities as necessary
        if len(entities) != len(fpaths):
            entities = _insert_missing_entities(entities, fpaths)

        if success is False:
            new_data = {'gcs_upload_failed': True}
        else:
            new_data = {'in_gcs': True}

        # We only want to validate the new data, as there may be restricted
        # fields in the entities we pulled from datastore. All new data must
        # be validated as follows before adding it to the entities that will be
        # pushed to datastore.
        if not ds.validate_data(new_data, allow_restricted_fields=False,
                                kind=ds.DATASTORE_PHOTO):
            error_msg = 'Invalid data: {0}'.format(new_data)
            error = True

    if not error:
        # Update entities
        for i in range(len(entities)):
            entities[i].update(new_data)

        # Save to datastore
        try:
            client.put_multi(entities)
        except GCloudError as e:
            error_msg = str(e)
            error = True

    if error:
        msg = 'Failed to record {0} upload statuses in datastore: {1}'
        logging.error(msg.format(len(fpaths), error_msg))

    return fpaths if error else list()
Esempio n. 7
0
def _upload_single(fpath):
    """
    Uploads single file to GCS. Returns a tuple containing
    (upload_success, fpath).
    """
    try:
        bucket_name = config.GCS_BUCKET
        success = True
        try:
            datastore_client = _get_client('datastore')
        except CouldNotObtainCredentialsError as e:
            error_msg = 'Could not obtain datastore credentials: {0}'.format(str(e))
            logging.error(error_msg)
            return False, fpath

        try:
            client = _get_client('storage')
        except CouldNotObtainCredentialsError as e:
            logging.error('Could not obtain GCS credentials: {0}'.format(str(e)))
            return False, fpath
        bucket = client.bucket(bucket_name)

        # Verify that filename already exists as key in database
        filename = os.path.basename(fpath)

        key = datastore_client.key('Photo', filename)
        entity = datastore_client.get(key)
        if entity is None:
            logging.error('Failed to find file: ' + filename)
            return False, fpath

        try:
            img = Image.open(fpath)
            format_ = img.format
            if format_  == 'TIFF':
                output_file = "/tmp/" + filename + ".jpg"
                img.save(output_file)
                _upload_derived(output_file, bucket)
                os.unlink(output_file)
        except IOError as e:
            try:
                with Raw(filename=fpath) as raw:
                    tiff_output_file = "/tmp/" + filename + ".tiff"
                    raw.save(filename=tiff_output_file)
            except Exception as e:
                logging.error("Failed to parse file with PIL or rawkit: %s (error: %s)" % (fpath, str(e)))
                # move the file out of the pending tree so it won't be processed next loop
                try:
                    shutil.move(fpath, "/tmp/%s" % os.path.basename(fpath))
                except IOError as e:
                    logging.error("Unable to move bad file out of the way: %s (error: %s)" % (fpath, str(e)))
                return False, fpath
            jpg_output_file = "/tmp/" + filename + ".jpg"
            img = Image.open(tiff_output_file)
            img.save(jpg_output_file)
            _upload_derived(jpg_output_file, bucket)
            os.unlink(tiff_output_file)
            os.unlink(jpg_output_file)
            format_ = 'raw'

        is_adult = _check_adult_content(img)
        if is_adult:
            entity.update({'is_adult_content': True})
            datastore_client.put(entity)
            os.unlink(fpath)
            return False, fpath
        else:
            entity.update({'is_adult_content': False})

        metadata = {}
        metadata['reviews'] = []
        metadata['num_reviews'] = 0
        entity.update(metadata)

        width = img.width
        height = img.height
        metadata = _extract_image_metadata(filename, format_, width, height, bucket_name)
        entity.update(metadata)
        if not ds.validate_data(entity, True, ds.DATASTORE_PHOTO):
            logging.error('Invalid entity: {0}'.format(entity))
            return False, fpath
        datastore_client.put(entity)

        blob = storage.Blob(os.path.basename(fpath), bucket)

        try:
            blob.upload_from_filename(fpath)
            msg = 'Successfully uploaded {0} to GCS'
            logging.debug(msg.format(fpath))

        except Exception as e:
            msg = '{0} failed to upload to GCS: {1}'
            logging.error(msg.format(fpath, e))
            success = False

        return success, fpath
    except Exception as e:
        logging.error("Failed to upload file: %s" % fpath)
        traceback.print_exc(limit=50)
        logging.error("Returning false")
        return False, fpath
Esempio n. 8
0
    def _upload_post(self):
        """
        Request handler for upload POST requests. Writes accepts files in POST
        request body and saves them to the local file system as
        <self._dir>/<uuid>.<file extension as uploaded>
        Creates datastore record for uploaded files and indicates that they
        have yet to be uploaded to Cloud Storage.
        Returns constants.HTTP_ERROR status if an error occurs with a
            short message.
        Returns constants.HTTP_OK response on success with no message.
        Returns constants.HTTP_OOM status if the server is under too much
            load to handle the request.
        """

        # Fetch the user's identifier from the request, which
        # contains the oauth2 creds.
        try:
            token = flask.request.headers['X-IDTOKEN']
        except Exception as e:
            return flask.Response('Missing credential token header', 405)

        try:
            idinfo = client.verify_id_token(token, sk.GOOGLE_OAUTH2_CLIENT_ID)
            if idinfo['iss'] not in [
                    'accounts.google.com', 'https://accounts.google.com'
            ]:
                raise crypt.AppIdentityError("Wrong issuer.")
        except crypt.AppIdentityError:
            # Invalid token
            return flask.Response('Application identity error.', 405)
        user_id = idinfo['sub']
        hash_id = hashlib.sha256(user_id).hexdigest()

        content_type = self.request.content_type

        datastore_client = self.datastore.Client(self.config['PROJECT_ID'])
        batch = datastore_client.batch()

        for file_ in self.request.files.getlist('datafile'):
            # In case an error occured and the filename was not sent
            # filename = self.request.headers.get(constants.HTTP_FILENAME_HEADER) or ''
            filename = file_.filename
            ext = self.os.path.splitext(filename)[1].strip('.')
            name = '.'.join((str(uuid4()), ext))

            entity = self._create_datastore_entry(datastore_client,
                                                  name,
                                                  user=hash_id)

            if entity:
                batch.put(entity)

            # Local file system file paths
            local_file = self.os.path.join(self._dir, name)
            temp_file = local_file + self._file_not_ready_suffix

            try:
                self._write_data_to_file(temp_file, file_)

            except IOError as e:
                self.logger.error(
                    'Error occured writing to file: {0}'.format(e))
                return self.Response('Failed to save file.',
                                     status=constants.HTTP_ERROR)

            except ClientDisconnected:
                # This error will occur if Gunicorn/Flask fails to respond before
                # the load balancer times the request out. In this situation, the
                # load balancer responds to the client with a 502 error, however
                # this is not detected by Flask until it reads to the end of the
                # buffered request from nginx at which point this exception will be
                # thrown by the call to self.request.stream.read in
                # _write_data_to_file.
                try:
                    self.util.retry_func(self.os.remove, self._retrys,
                                         (OSError, ), temp_file)
                except RuntimeError:
                    pass
                self.logger.error('Upload failed. Client disconnected.')
                return self.Response(status=constants.HTTP_ERROR)

            try:
                self.util.retry_func(self.os.rename, self._retrys, (OSError, ),
                                     temp_file, local_file)
            except RuntimeError:
                return self.Response('Failed to save file.',
                                     status=constants.HTTP_ERROR)

        try:
            batch.commit()
        except FailedToSaveToDatastoreError as e:
            self.logger.error(str(e))
            # Continue on for now. The upload daemon will create a datastore
            # entity if it doesn't find one, it will just be missing the user
            # information.

        return self.Response(status=constants.HTTP_OK)

        def _create_datastore_entry(self,
                                    datastore_client,
                                    filename,
                                    user=None):
            """
            Creates and returns a datastore entity for a file with name filename
            uploaded by user user.
            Filename should be the new name we have generated that is <uuid>.<ext>.
            Raises FailedToSaveToDatastoreError if unsuccessful.
            """

        # Create datastore entity
        key = datastore_client.key(self._datastore_kind, filename)
        entity = self.datastore.Entity(key=key)

        # Set datastore entity data
        entity['user'] = datastore_client.key(self._user_datastore_kind, user)
        entity['in_gcs'] = False
        entity['processed'] = False
        entity['uploaded_date'] = self.datetime.now()

        if not ds.validate_data(entity, True, ds.DATASTORE_PHOTO):
            msg = 'Invalid entity: {0}'.format(entity)
            return None

        return entity