def test_get_mime_type(filename, expected_mime_type): with open(Path(__file__).parent.parent / 'sample_files' / filename, 'rb') as f: assert get_mime_type(f) == expected_mime_type
def upload_document(service_id): no_document_error = jsonify(error='No document upload'), 400 if request.is_json: if 'document' not in request.json: return no_document_error raw_content = b64decode(request.json['document']) if len(raw_content) > current_app.config['MAX_CONTENT_LENGTH']: abort(413) file_data = BytesIO(raw_content) is_csv = request.json.get('is_csv', False) else: if 'document' not in request.files: return no_document_error file_data = request.files['document'] is_csv = False if not isinstance(is_csv, bool): return jsonify(error='Value for is_csv must be a boolean'), 400 mimetype = get_mime_type(file_data) if mimetype not in current_app.config['ALLOWED_FILE_TYPES']: allowed_file_types = ', '.join( sorted({ f"'.{x}'" for x in current_app.config['ALLOWED_FILE_TYPES'].values() })) return jsonify( error= f"Unsupported file type '{mimetype}'. Supported types are: {allowed_file_types}" ), 400 # Our mimetype auto-detection resolves CSV content as text/plain, so we use # an explicit POST body parameter `is_csv` from the caller to resolve it as text/csv if is_csv and mimetype == 'text/plain': mimetype = 'text/csv' if current_app.config['ANTIVIRUS_ENABLED']: try: virus_free = antivirus_client.scan(file_data) except AntivirusError: return jsonify(error='Antivirus API error'), 503 if not virus_free: return jsonify(error="File did not pass the virus scan"), 400 document = document_store.put(service_id, file_data, mimetype=mimetype) return jsonify(status='ok', document={ 'id': document['id'], 'direct_file_url': get_direct_file_url( service_id=service_id, document_id=document['id'], key=document['encryption_key'], mimetype=mimetype, ), 'url': get_frontend_download_url( service_id=service_id, document_id=document['id'], key=document['encryption_key'], ), 'mimetype': mimetype, }), 201
def test_get_mime_type(filename, expected_mime_type): file = open(sample_files_path / filename, 'rb') assert get_mime_type(file) == expected_mime_type
def test_get_mime_type_zip_xml(filename, expected_mime_type, mocker): # libmagic on PaaS sometimes mistakes docx, xlsx, etc. files as ZIPs mocker.patch('app.utils.magic.from_buffer', return_value='application/zip') file = open(sample_files_path / filename, 'rb') assert get_mime_type(file) == expected_mime_type
def upload_document(service_id): if 'document' not in request.files: return jsonify(error='No document upload'), 400 mimetype = get_mime_type(request.files['document']) if not mime_type_is_allowed(mimetype, service_id): return jsonify( error="Unsupported document type '{}'. Supported types are: {}". format(mimetype, current_app.config['ALLOWED_MIME_TYPES'])), 400 file_content = request.files['document'].read() filename = request.form.get('filename') file_extension = None if filename and '.' in filename: file_extension = ''.join(pathlib.Path( filename.lower()).suffixes).lstrip('.') # Our MIME type auto-detection resolves CSV content as text/plain, # so we fix that if possible if (filename or '').lower().endswith('.csv') and mimetype == 'text/plain': mimetype = 'text/csv' sending_method = request.form.get('sending_method') if current_app.config["MLWR_HOST"]: sid = upload_to_mlwr(file_content) else: sid = False document = document_store.put(service_id, file_content, sending_method=sending_method, mimetype=mimetype) return jsonify(status='ok', document={ 'id': document['id'], 'direct_file_url': get_direct_file_url( service_id=service_id, document_id=document['id'], key=document['encryption_key'], sending_method=sending_method, ), 'url': get_frontend_download_url( service_id=service_id, document_id=document['id'], key=document['encryption_key'], filename=filename, ), 'mlwr_sid': sid, 'filename': filename, 'sending_method': sending_method, 'mime_type': mimetype, 'file_size': len(file_content), 'file_extension': file_extension, }), 201