def get(self, access_token): """ --- summary: Download file description: | Returns file contents based on provided file download token. security: - bearerAuth: [] tags: - download parameters: - in: path name: access_token schema: type: string required: true description: File download token responses: 200: description: File contents content: application/octet-stream: schema: type: string format: binary 403: description: When file download token is no longer valid """ file_obj = File.get_by_download_token(access_token) if not file_obj: raise Forbidden( 'Download token expired, please re-request download.') return send_file(file_obj.get_path(), attachment_filename=file_obj.sha256, as_attachment=True)
def post(self, identifier): """ --- summary: Generate file download token description: | Returns download token for given file. security: - bearerAuth: [] tags: - file parameters: - in: path name: identifier description: Requested file identifier (SHA256/MD5/SHA1/SHA512) schema: type: string responses: 200: description: File download token, valid for 60 seconds content: application/json: schema: FileDownloadTokenResponseSchema 404: description: | When file doesn't exist, object is not a file or user doesn't have access to this object. """ file = File.access(identifier) if file is None: raise NotFound("Object not found") download_token = file.generate_download_token() schema = FileDownloadTokenResponseSchema() return schema.dump({"token": download_token.decode()})
def post(self, identifier): """ --- summary: Get file download URL description: | Returns download URL for given file. security: - bearerAuth: [] tags: - download parameters: - in: path name: identifier description: Requested file identifier (SHA256/MD5/SHA1/SHA512) schema: type: string responses: 200: description: Absolute download URL for the sample, valid for 60 seconds content: application/json: schema: DownloadURLResponseSchema 404: description: When file doesn't exist, object is not a file or user doesn't have access to this object. """ file = File.access(identifier) if file is None: raise NotFound("Object not found") download_token = file.generate_download_token() schema = DownloadURLResponseSchema() url = api.relative_url_for(DownloadResource, access_token=download_token.decode()) return schema.dump({"url": url})
def post(self, remote_name, identifier): """ --- summary: Push file from local to remote instance description: | Push file from the local instance to the remote instance security: - bearerAuth: [] tags: - remotes parameters: - in: path name: remote_name description: Name of remote instance schema: type: string - in: path name: identifier description: Object identifier (SHA256/SHA512/SHA1/MD5) schema: type: string requestBody: required: false description: Additional options for object push content: application/json: schema: RemoteOptionsRequestSchema responses: 200: description: Information about pushed fie 404: description: | When the name of the remote instance is not figured in the application config or object doesn't exist """ db_object = File.access(identifier) if db_object is None: raise NotFound("Object not found") remote = RemoteAPI(remote_name) options = loads_schema(request.get_data(as_text=True), RemoteOptionsRequestSchema()) response = remote.request( "POST", "file", files={ "file": (db_object.file_name, db_object.open()), "options": (None, json.dumps(options)), }, ).json() logger.info( f"{db_object.type} pushed remote", extra={ "dhash": db_object.dhash, "remote_name": remote_name }, ) return response
def post(self, remote_name, identifier): """ --- summary: Pulls file from remote to local instance description: | Pulls file from the remote instance to the local instance security: - bearerAuth: [] tags: - remotes parameters: - in: path name: remote_name description: Name of remote instance schema: type: string - in: path name: identifier description: Object identifier (SHA256/SHA512/SHA1/MD5) schema: type: string responses: 200: description: Information about pulled file content: application/json: schema: FileItemResponseSchema 404: description: | When the name of the remote instance is not figured in the application config 409: description: Object exists yet but has different type """ remote = RemoteAPI(remote_name) response = remote.request("GET", f"file/{identifier}") file_name = response.json()["file_name"] response = remote.request("GET", f"file/{identifier}/download", stream=True) with SpooledTemporaryFile() as file_stream: for chunk in response.iter_content(chunk_size=2**16): file_stream.write(chunk) file_stream.seek(0) try: item, is_new = File.get_or_create( file_name=file_name, file_stream=file_stream, share_with=[ group for group in g.auth_user.groups if group.name != "public" ], ) except ObjectTypeConflictError: raise Conflict( "Object already exists locally and is not a file") return self.create_pulled_object(item, is_new)
def _create_object(self, spec, parent, share_with, metakeys): try: return File.get_or_create( request.files["file"].filename, request.files["file"].stream, parent=parent, share_with=share_with, metakeys=metakeys, ) except ObjectTypeConflictError: raise Conflict("Object already exists and is not a file") except EmptyFileError: raise BadRequest("File cannot be empty")
def _create_object(self, spec, parent, share_with, attributes, analysis_id, tags): try: return File.get_or_create( request.files["file"].filename, request.files["file"].stream, parent=parent, share_with=share_with, attributes=attributes, analysis_id=analysis_id, tags=tags, ) except ObjectTypeConflictError: raise Conflict("Object already exists and is not a file") except EmptyFileError: raise BadRequest("File cannot be empty")
def on_created_file(self, file: File): """ Create Drakvuf Sandbox job for newly added file. Add analysis identifier as 'drakvuf' attribute. """ if not file.file_type.startswith("PE32 executable"): logger.debug("Not a PE executable, ignoring.") return # Get contents path from "uploads" directory contents_path = file.get_path() # Send request to Drakvuf Sandbox req = requests.post(f"{config.drakvuf.drakvuf_url}/upload", files={ "file": (file.sha256 + ".exe", open(contents_path, "rb")), }, data={"timeout": config.drakvuf.timeout}) req.raise_for_status() # Get task identifier task_uid = req.json()["task_uid"] # Add it as attribute to the file file.add_metakey("drakvuf", task_uid, check_permissions=False) logger.info("File sent to Drakvuf. Analysis identifier: %s", task_uid)
def get(self, access_token): """ --- summary: Download file description: | Returns file contents based on provided file download token. tags: - deprecated parameters: - in: path name: access_token schema: type: string required: true description: File download token responses: 200: description: File contents content: application/octet-stream: schema: type: string format: binary 403: description: When file download token is no longer valid 503: description: | Request canceled due to database statement timeout. """ file_obj = File.get_by_download_token(access_token) if not file_obj: raise Forbidden( "Download token expired, please re-request download.") return Response( file_obj.iterate(), content_type="application/octet-stream", headers={ "Content-disposition": f"attachment; filename={file_obj.sha256}" }, )
def send_file_to_karton(file: File) -> str: try: path = file.get_path() tmpfile = None except Exception: # If get_path doesn't work: download content to NamedTemporaryFile # It won't work if we use S3 storage and try to reanalyze # existing file (not uploaded within the same request). tmpfile = tempfile.NamedTemporaryFile() file_stream = file.open() shutil.copyfileobj(file_stream, tmpfile) File.close(file_stream) path = tmpfile.name producer = Producer(identity="karton.mwdb", config=KartonConfig(config.karton.config_path)) feed_quality = g.auth_user.feed_quality task_priority = TaskPriority.NORMAL if feed_quality == "high" else TaskPriority.LOW task = Task(headers={ "type": "sample", "kind": "raw", "quality": feed_quality }, payload={ "sample": Resource(file.file_name, path=path, sha256=file.sha256), "attributes": file.get_metakeys(as_dict=True, check_permissions=False) }, priority=task_priority) producer.send_task(task) if tmpfile is not None: tmpfile.close() file.add_metakey("karton", task.root_uid, check_permissions=False) logger.info("File sent to karton with %s", task.root_uid) return task.root_uid
def get(self, identifier): """ --- summary: Download file description: | Returns file contents. Optionally accepts file download token to get the file via direct link (without Authorization header) security: - bearerAuth: [] tags: - file parameters: - in: path name: identifier schema: type: string description: File identifier (SHA256/SHA512/SHA1/MD5) - in: query name: token schema: type: string description: | File download token for direct link purpose required: false responses: 200: description: File contents content: application/octet-stream: schema: type: string format: binary 403: description: | When file download token is no longer valid or was generated for different object 404: description: | When file doesn't exist, object is not a file or user doesn't have access to this object. """ access_token = request.args.get("token") if access_token: file_obj = File.get_by_download_token(access_token) if not file_obj: raise Forbidden( "Download token expired, please re-request download.") if not (file_obj.sha1 == identifier or file_obj.sha256 == identifier or file_obj.sha512 == identifier or file_obj.md5 == identifier): raise Forbidden( "Download token doesn't apply to the chosen object. " "Please re-request download.") else: if not g.auth_user: raise Unauthorized("Not authenticated.") file_obj = File.access(identifier) if file_obj is None: raise NotFound("Object not found") return Response( file_obj.iterate(), content_type="application/octet-stream", headers={ "Content-disposition": f"attachment; filename={file_obj.sha256}" }, )
def on_created_file(self, file: File) -> None: metakeys = file.get_metakeys(as_dict=True, check_permissions=False) if "karton" in metakeys: logger.info("Analyzed artifact - not sending to karton") return send_file_to_karton(file)
def post(self, remote_name, identifier): """ --- summary: Pulls file from remote to local instance description: | Pulls file from the remote instance to the local instance security: - bearerAuth: [] tags: - remotes parameters: - in: path name: remote_name description: Name of remote instance schema: type: string - in: path name: identifier description: Object identifier (SHA256/SHA512/SHA1/MD5) schema: type: string requestBody: required: false description: Additional options for object pull content: application/json: schema: RemoteOptionsRequestSchema responses: 200: description: Information about pulled file content: application/json: schema: FileItemResponseSchema 404: description: | When the name of the remote instance is not figured in the application config 409: description: Object exists yet but has different type 503: description: | Request canceled due to database statement timeout. """ remote = RemoteAPI(remote_name) response = remote.request("GET", f"file/{identifier}") file_name = response.json()["file_name"] response = remote.request("GET", f"file/{identifier}/download", stream=True) options = loads_schema(request.get_data(as_text=True), RemoteOptionsRequestSchema()) share_with = get_shares_for_upload(options["upload_as"]) with SpooledTemporaryFile() as file_stream: for chunk in response.iter_content(chunk_size=2**16): file_stream.write(chunk) file_stream.seek(0) try: item, is_new = File.get_or_create( file_name=file_name, file_stream=file_stream, share_with=share_with, ) except ObjectTypeConflictError: raise Conflict( "Object already exists locally and is not a file") return self.create_pulled_object(item, is_new)
def on_created_file(self, file: File): file.add_metakey("virustotal", file.md5)
def post(self, identifier: str) -> Tuple[Dict[str, Any], int]: db_file = File.access(identifier) if db_file is None: raise NotFound("Object not found or is not a file") root_uid = send_file_to_karton(db_file) return {"uid": root_uid}, 200