def test_lifecycle(self, db_session, create_user): """ Advance a dataset through the entire lifecycle using the state transition dict. """ ds = Dataset(owner=create_user.username, controller="frodo", name="fio") ds.add() assert ds.state == States.UPLOADING beenthere = [ds.state] while ds.state in Dataset.transitions: advances = Dataset.transitions[ds.state] for n in advances: if n not in beenthere: next = n break else: break # avoid infinite reindex loop! beenthere.append(next) ds.advance(next) assert ds.state == next lifecycle = ",".join([s.name for s in beenthere]) assert ( lifecycle == "UPLOADING,UPLOADED,UNPACKING,UNPACKED,INDEXING,INDEXED,EXPIRING,EXPIRED" )
def test_advanced_bad_state(self, db_session, create_user): """Test with a non-States state value """ ds = Dataset(owner=create_user.username, controller="frodo", name="fio") ds.add() with pytest.raises(DatasetBadParameterType): ds.advance("notStates")
def test_construct_bad_state(self, db_session, create_user): """Test with a non-States state value """ with pytest.raises(DatasetBadParameterType): Dataset( owner=create_user.username, controller="frodo", name="fio", state="notStates", )
def test_dataset_survives_user(self, db_session, create_user): """The Dataset isn't automatically removed when the referenced user is removed. """ user = create_user ds = Dataset(owner=user.username, controller="frodo", name="fio") ds.add() User.delete(username=user.username) ds1 = Dataset.attach(controller="frodo", name="fio") assert ds1 == ds
def test_advanced_illegal(self, db_session, create_user): """ Test that we can't advance to a state that's not a successor to the initial state. """ ds = Dataset(owner=create_user.username, controller="frodo", name="fio") ds.add() with pytest.raises(DatasetBadStateTransition): ds.advance(States.EXPIRED)
def test_advanced_good(self, db_session, create_user): """ Test advancing the state of a dataset """ ds = Dataset(owner=create_user.username, controller="frodo", name="fio") ds.add() ds.advance(States.UPLOADED) assert ds.state == States.UPLOADED assert ds.created <= ds.transition
def test_advanced_terminal(self, db_session, create_user): """ Test that we can't advance from a terminal state """ ds = Dataset( owner=create_user.username, controller="frodo", name="fio", state=States.EXPIRED, ) ds.add() with pytest.raises(DatasetTerminalStateViolation): ds.advance(States.UPLOADING)
def test_construct(self, db_session, create_user): """ Test dataset contructor """ user = create_user ds = Dataset(owner=user.username, controller="frodo", name="fio") ds.add() assert ds.owner == user assert ds.controller == "frodo" assert ds.name == "fio" assert ds.state == States.UPLOADING assert ds.md5 is None assert ds.created <= ds.transition assert ds.id is not None assert "test(1)|frodo|fio" == str(ds)
def attach_dataset(monkeypatch, pbench_token, create_user): """ Mock a Dataset attach call to return an object. We mock the Dataset.attach method to avoid DB access here, however the user authentication mechanism is not yet mocked so we have to look up User data. Args: monkeypatch: patching fixture pbench_token: create a "drb" user for testing create_user: create a "test" user """ datasets = {} drb = User.query(username="******") # Created by pbench_token fixture test = User.query(username="******") # Created by create_user fixture datasets["drb"] = Dataset( owner=drb, owner_id=drb.id, controller="node", name="drb", access="private", id=1, ) datasets["test"] = Dataset( owner=test, owner_id=test.id, controller="node", name="test", access="private", id=2, ) def attach_dataset(controller: str, name: str) -> Dataset: return datasets[name] with monkeypatch.context() as m: m.setattr(Dataset, "attach", attach_dataset) yield
def test_attach_filename(self, db_session, create_user): """ Test that we can create a dataset using the full tarball file path. """ ds1 = Dataset(owner="test", path="/foo/bilbo/rover.tar.xz", state=States.QUARANTINED) ds1.add() ds2 = Dataset.attach(controller="bilbo", name="rover") assert ds2.owner == ds1.owner assert ds2.controller == ds1.controller assert ds2.name == ds1.name assert ds2.state == States.QUARANTINED assert ds2.md5 is ds1.md5 assert ds2.id is ds1.id
def test_attach_controller_path(self, db_session, create_user): """ Test that we can attach using controller and name to a dataset created by file path. """ ds1 = Dataset( owner=create_user.username, path="/foo/frodo/fio.tar.xz", state=States.INDEXING, ) ds1.add() ds2 = Dataset.attach(controller="frodo", name="fio") assert ds2.owner == ds1.owner assert ds2.controller == ds1.controller assert ds2.name == ds1.name assert ds2.state == States.INDEXING assert ds2.md5 is ds1.md5 assert ds2.id is ds1.id
def test_attach_exists(self, db_session, create_user): """ Test that we can attach to a dataset """ ds1 = Dataset( owner=create_user.username, controller="frodo", name="fio", state=States.INDEXING, ) ds1.add() ds2 = Dataset.attach(controller="frodo", name="fio", state=States.INDEXED) assert ds2.owner == ds1.owner assert ds2.controller == ds1.controller assert ds2.name == ds1.name assert ds2.state == States.INDEXED assert ds2.md5 is ds1.md5 assert ds2.id is ds1.id
def put(self, filename: str): try: username = Auth.token_auth.current_user().username except Exception as exc: self.logger.error("Error verifying the username: '******'", exc) abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR") if os.path.basename(filename) != filename: msg = "File must not contain a path" self.logger.warning( "{} for user = {}, file = {!a}", msg, username, filename, ) abort(HTTPStatus.BAD_REQUEST, message=msg) if not self.supported_file_extension(filename): msg = f"File extension not supported, must be {self.ALLOWED_EXTENSION}" self.logger.warning( "{} for user = {}, file = {!a}", msg, username, filename, ) abort(HTTPStatus.BAD_REQUEST, message=msg) controller = request.headers.get("controller") if not controller: msg = "Missing required controller header" self.logger.warning("{} for user = {}, file = {!a}", msg, username, filename) abort(HTTPStatus.BAD_REQUEST, message=msg) if validate_hostname(controller) != 0: msg = "Invalid controller header" self.logger.warning( "{} for user = {}, ctrl = {!a}, file = {!a}", msg, username, controller, filename, ) abort(HTTPStatus.BAD_REQUEST, message=msg) md5sum = request.headers.get("Content-MD5") if not md5sum: msg = "Missing required Content-MD5 header" self.logger.warning( "{} for user = {}, ctrl = {!a}, file = {!a}", msg, username, controller, filename, ) abort(HTTPStatus.BAD_REQUEST, message=msg) status = HTTPStatus.OK try: content_length = int(request.headers["Content-Length"]) except KeyError: msg = "Missing required Content-Length header" status = HTTPStatus.LENGTH_REQUIRED except ValueError: msg = f"Invalid Content-Length header, not an integer ({content_length})" status = HTTPStatus.BAD_REQUEST else: if not (0 < content_length <= self.max_content_length): msg = "Content-Length ({}) must be greater than 0 and no greater than {}".format( content_length, humanize.naturalsize(self.max_content_length)) status = (HTTPStatus.REQUEST_ENTITY_TOO_LARGE if 0 < content_length else HTTPStatus.BAD_REQUEST) if status != HTTPStatus.OK: self.logger.warning( "{} for user = {}, ctrl = {!a}, file = {!a}", msg, username, controller, filename, ) abort(status, message=msg) path = self.upload_directory / controller path.mkdir(exist_ok=True) tar_full_path = Path(path, filename) md5_full_path = Path(path, f"{filename}.md5") bytes_received = 0 # Create a tracking dataset object; it'll begin in UPLOADING state try: dataset = Dataset(owner=username, controller=controller, path=tar_full_path, md5=md5sum) dataset.add() except DatasetDuplicate: self.logger.info( "Dataset already exists, user = {}, ctrl = {!a}, file = {!a}", username, controller, filename, ) response = jsonify(dict(message="Dataset already exists")) response.status_code = HTTPStatus.OK return response except Exception as exc: self.logger.error( "unable to create dataset, '{}', for user = {}, ctrl = {!a}, file = {!a}", exc, username, controller, filename, ) abort( HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR", ) if tar_full_path.is_file() or md5_full_path.is_file(): self.logger.error( "Dataset, or corresponding md5 file, already present; tar {} ({}), md5 {} ({})", tar_full_path, "present" if tar_full_path.is_file() else "missing", md5_full_path, "present" if md5_full_path.is_file() else "missing", ) abort( HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR", ) self.logger.info( "Uploading file {!a} (user = {}, ctrl = {!a}) to {}", filename, username, controller, dataset, ) with tempfile.NamedTemporaryFile(mode="wb", dir=path) as ofp: hash_md5 = hashlib.md5() try: while True: chunk = request.stream.read(self.CHUNK_SIZE) bytes_received += len(chunk) if len(chunk) == 0 or bytes_received > content_length: break ofp.write(chunk) hash_md5.update(chunk) except OSError as exc: if exc.errno == errno.ENOSPC: self.logger.error( "Not enough space on volume, {}, for upload:" " user = {}, ctrl = {!a}, file = {!a}", path, username, controller, filename, ) abort(HTTPStatus.INSUFFICIENT_STORAGE) else: msg = "Unexpected error encountered during file upload" self.logger.error( "{}, {}, for user = {}, ctrl = {!a}, file = {!a}", msg, exc, username, controller, filename, ) abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR") except Exception as exc: msg = "Unexpected error encountered during file upload" self.logger.error( "{}, {}, for user = {}, ctrl = {!a}, file = {!a}", msg, exc, username, controller, filename, ) abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR") if bytes_received != content_length: msg = ( "Bytes received do not match Content-Length header" f" (expected {content_length}; received {bytes_received})") self.logger.warning( "{} for user = {}, ctrl = {!a}, file = {!a}", msg, username, controller, filename, ) abort(HTTPStatus.BAD_REQUEST, message=msg) elif hash_md5.hexdigest() != md5sum: msg = ("MD5 checksum does not match Content-MD5 header" f" ({hash_md5.hexdigest()} != {md5sum})") self.logger.warning( "{} for user = {}, ctrl = {!a}, file = {!a}", msg, username, controller, filename, ) abort(HTTPStatus.BAD_REQUEST, message=msg) # First write the .md5 try: md5_full_path.write_text(f"{md5sum} {filename}\n") except Exception as exc: try: md5_full_path.unlink(missing_ok=True) except Exception as md5_exc: self.logger.error( "Failed to remove .md5 {} when trying to clean up: '{}'", md5_full_path, md5_exc, ) self.logger.error("Failed to write .md5 file, '{}': '{}'", md5_full_path, exc) abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR") # Then create the final filename link to the temporary file. try: os.link(ofp.name, tar_full_path) except Exception as exc: try: md5_full_path.unlink() except Exception as md5_exc: self.logger.error( "Failed to remove .md5 {} when trying to clean up: {}", md5_full_path, md5_exc, ) self.logger.error( "Failed to rename tar ball '{}' to '{}': '{}'", ofp.name, md5_full_path, exc, ) abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR") try: dataset.advance(States.UPLOADED) except Exception as exc: self.logger.error("Unable to finalize {}, '{}'", dataset, exc) abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR") response = jsonify(dict(message="File successfully uploaded")) response.status_code = HTTPStatus.CREATED return response
def test_construct_bad_owner(self): """Test with a non-existent username """ with pytest.raises(DatasetBadParameterType): Dataset(owner="notme", controller="frodo", name="fio")