def test_advanced_bad_state(self): """Test with a non-States state value """ ds = Dataset(owner="test", controller="me", name="too") ds.add() with pytest.raises(DatasetBadParameterType): ds.advance("notStates")
def test_advanced_illegal(self): """ Test that we can't advance to a state that's not a successor to the initial state. """ ds = Dataset(owner="drb", controller="sam", name="fio") ds.add() with pytest.raises(DatasetBadStateTransition): ds.advance(States.EXPIRED)
def test_advanced_good(self): """ Test advancing the state of a dataset """ ds = Dataset(owner="drb", controller="frodo", name="precious") ds.add() ds.advance(States.UPLOADED) assert ds.state == States.UPLOADED assert ds.created <= ds.transition
def test_advanced_terminal(self): """ Test that we can't advance from a terminal state """ ds = Dataset(owner="drb", controller="gimli", name="fio", state=States.EXPIRED) ds.add() with pytest.raises(DatasetTerminalStateViolation): ds.advance(States.UPLOADING)
def test_construct(self): """ Test dataset contructor """ ds = Dataset(owner="drb", controller="frodo", name="fio") ds.add() assert ds.owner == "drb" assert ds.controller == "frodo" assert ds.name == "fio" assert ds.state == States.UPLOADING assert ds.md5 is None assert ds.created <= ds.transition assert ds.id is not None assert "drb|frodo|fio" == str(ds)
def test_attach_filename(self): """ Test that we can create a dataset using the full tarball file path. """ ds1 = Dataset(owner="webb", path="/foo/bilbo/rover.tar.xz", state=States.QUARANTINED) ds1.add() ds2 = Dataset.attach(controller="bilbo", name="rover") assert ds2.owner == ds1.owner assert ds2.controller == ds1.controller assert ds2.name == ds1.name assert ds2.state == States.QUARANTINED assert ds2.md5 is ds1.md5 assert ds2.id is ds1.id
def test_attach_controller_path(self): """ Test that we can attach using controller and name to a dataset created by file path. """ ds1 = Dataset( owner="pete", path="/foo/frodo/rover.tar.xz", state=States.INDEXING, ) ds1.add() ds2 = Dataset.attach(controller="frodo", name="rover") assert ds2.owner == ds1.owner assert ds2.controller == ds1.controller assert ds2.name == ds1.name assert ds2.state == States.INDEXING assert ds2.md5 is ds1.md5 assert ds2.id is ds1.id
def test_attach_exists(self): """ Test that we can attach to a dataset """ ds1 = Dataset(owner="drb", controller="frodo", name="fido", state=States.INDEXING) ds1.add() ds2 = Dataset.attach(controller="frodo", name="fido", state=States.INDEXED) assert ds2.owner == ds1.owner assert ds2.controller == ds1.controller assert ds2.name == ds1.name assert ds2.state == States.INDEXED assert ds2.md5 is ds1.md5 assert ds2.id is ds1.id
def test_lifecycle(self): """ Advance a dataset through the entire lifecycle using the state transition dict. """ ds = Dataset(owner="dave", controller="bilbo", name="Fred") ds.add() assert ds.state == States.UPLOADING beenthere = [ds.state] while ds.state in Dataset.transitions: advances = Dataset.transitions[ds.state] for n in advances: if n not in beenthere: next = n break else: break # avoid infinite reindex loop! beenthere.append(next) ds.advance(next) assert ds.state == next lifecycle = ",".join([s.name for s in beenthere]) assert ( lifecycle == "UPLOADING,UPLOADED,UNPACKING,UNPACKED,INDEXING,INDEXED,EXPIRING,EXPIRED" )
def put(self, controller): # FIXME: This should be assigned from the decoded authorization token username = "******" if not request.headers.get("filename"): self.logger.debug( "Tarfile upload: Post operation failed due to missing filename header" ) abort( 400, message= "Missing filename header, POST operation requires a filename header to name the uploaded file", ) filename = secure_filename(request.headers.get("filename")) if not request.headers.get("Content-MD5"): self.logger.debug( f"Tarfile upload: Post operation failed due to missing md5sum header for file {filename}" ) abort( 400, message= "Missing md5sum header, POST operation requires md5sum of an uploaded file in header", ) md5sum = request.headers.get("Content-MD5") self.logger.debug("Receiving file: {}", filename) if not self.allowed_file(filename): self.logger.debug( f"Tarfile upload: Bad file extension received for file {filename}" ) abort(400, message="File extension not supported. Only .xz") try: content_length = int(request.headers.get("Content-Length")) except ValueError: self.logger.debug( f"Tarfile upload: Invalid content-length header, not an integer for file {filename}" ) abort(400, message="Invalid content-length header, not an integer") except Exception: self.logger.debug( f"Tarfile upload: No Content-Length header value found for file {filename}" ) abort(400, message="Missing required content-length header") else: if content_length > self.max_content_length: self.logger.debug( f"Tarfile upload: Content-Length exceeded maximum upload size allowed. File: {filename}" ) abort( 400, message= f"Payload body too large, {content_length:d} bytes, maximum size should be less than " f"or equal to {humanize.naturalsize(self.max_content_length)}", ) elif content_length == 0: self.logger.debug( f"Tarfile upload: Content-Length header value is 0 for file {filename}" ) abort( 400, message= "Upload failed, Content-Length received in header is 0", ) path = self.upload_directory / controller path.mkdir(exist_ok=True) tar_full_path = Path(path, filename) md5_full_path = Path(path, f"{filename}.md5") bytes_received = 0 # TODO: Need real user from PUT! # Create a tracking dataset object; it'll begin in UPLOADING state try: dataset = Dataset(owner=username, controller=controller, path=tar_full_path, md5=md5sum) dataset.add() except Exception: self.logger.exception("unable to create dataset for {}", filename) abort( HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR", ) self.logger.info("Uploading file {} to {}", filename, dataset) with tempfile.NamedTemporaryFile(mode="wb", dir=path) as ofp: chunk_size = 4096 self.logger.debug("Writing chunks") hash_md5 = hashlib.md5() try: while True: chunk = request.stream.read(chunk_size) bytes_received += len(chunk) if len(chunk) == 0 or bytes_received > content_length: break ofp.write(chunk) hash_md5.update(chunk) except Exception: self.logger.exception( "Tarfile upload: There was something wrong uploading {}", filename) abort( 500, message=f"There was something wrong uploading {filename}") if bytes_received != content_length: self.logger.debug( f"Tarfile upload: Bytes received does not match with content length header value for file {filename}" ) message = ( f"Bytes received ({bytes_received}) does not match with content length header" f" ({content_length}), upload failed") abort(400, message=message) elif hash_md5.hexdigest() != md5sum: self.logger.debug( f"Tarfile upload: md5sum check failed for file {filename}") message = f"md5sum check failed for {filename}, upload failed" abort(400, message=message) # First write the .md5 try: with md5_full_path.open("w") as md5fp: md5fp.write(f"{md5sum} {filename}\n") except Exception: try: os.remove(md5_full_path) except FileNotFoundError: pass except Exception as exc: self.logger.warning( "Failed to remove .md5 %s when trying to clean up: %s", md5_full_path, exc, ) self.logger.exception("Failed to write .md5 file, '%s'", md5_full_path) raise # Then create the final filename link to the temporary file. try: os.link(ofp.name, tar_full_path) except Exception: try: os.remove(md5_full_path) except Exception as exc: self.logger.warning( "Failed to remove .md5 %s when trying to clean up: %s", md5_full_path, exc, ) self.logger.exception( "Failed to rename tar ball '%s' to '%s'", ofp.name, md5_full_path, ) raise try: dataset.advance(States.UPLOADED) except Exception: self.logger.exception("Unable to finalize {}", dataset) abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR") response = jsonify(dict(message="File successfully uploaded")) response.status_code = 201 return response