def publish_input_data(self, expected_input_id: str, metadata: InputMetadata, input_data_stream: BinaryIO) -> None: input_file_path = self.input_file(expected_input_id) if os.path.exists(input_file_path): input_data_stream.close() return file_hash = hashlib.sha256() fd, temp_file_path = tempfile.mkstemp(dir=self.temp_data_dir) try: with os.fdopen(fd, 'wb') as f: bytes_read = 0 while True: data = input_data_stream.read(READ_BUFFER_SIZE) bytes_read += len(data) log.debug(f'{bytes_read} bytes of input read') if not data: break f.write(data) file_hash.update(data) input_id = file_hash.hexdigest() if input_id != expected_input_id: raise IncorrectInputIDException() os.rename(temp_file_path, input_file_path) if metadata.has_all_args(): self._store_input_id(metadata, input_id) return except Exception: os.remove(temp_file_path) raise
def check_input_data(self, input_id: str, metadata: InputMetadata) -> bool: if self._input_file_exists(input_id): if metadata.has_all_args(): # The reason to do this is that, if there's a blob that # changed timestamp but not hash (because of a `touch`, for # instance), the timestamp check will always return false and # the tarball will be constructed all the times on the client # side. It happened. self._store_input_id(metadata, input_id) return True else: return False