def storage(args): job_manager = JobManager() params = '-d {} '.format(args.d) for j in job_manager.get_running_job_list(): if j.type == JobType.STORAGELOAD : logger.info("Cannot start storage load job for 'sar',") print("-1") return print( job_manager.add_job(JobType.STORAGELOAD,params)) logger.info("Start storage load job for 'sar'") sys.exit(0)
def client(args): job_manager = JobManager() params = '-d {} -t {} -b {} -m {} -p {}'.format(args.d, args.t, args.b, args.m, args.p) for j in job_manager.get_running_job_list(): if j.type == JobType.CLIENTSTRESS: logger.info("Cannot start client stress job for 'rados',") print("-1") return print(job_manager.add_job(JobType.CLIENTSTRESS, params)) logger.info("Start client stress job for rados") sys.exit(0)
def delete_cache(args): if not configuration().get_node_info().is_storage: print("-1") return job_manager = JobManager() params = '-disk_name {}'.format(args.disk_name) for j in job_manager.get_running_job_list(): if j.type == JobType.DELETEOSD or j.type == JobType.ADDDISK or \ j.type == JobType.ADDJOURNAL or j.type == JobType.DELETEJOURNAL or \ j.type == JobType.ADDCACHE or j.type == JobType.DELETECACHE: logger.info("Cannot start delete job for cache of disk {},There ara running jobs. ".format(args.disk_name)) print("-1") return print(job_manager.add_job(JobType.DELETECACHE,params)) logger.info("Start delete job for cache of disk {}".format(args.disk_name)) sys.exit()
def start(self, type, duration_sec, threads, clients, pool, cleanup): job_manager = JobManager() clients = "" + ",".join(clients) + "" for j in job_manager.get_running_job_list(): if j.type == JobType.BENCHMANAGER: logger.info( "Cannot start benchmark manager there is a job already running." ) return -1 cleanup_val = 1 if not cleanup: cleanup_val = 0 params = '-d {} -t {} -type {} -c {} -p {} --cleanup {}'.format( duration_sec, threads, type, clients, pool, cleanup_val) id = job_manager.add_job(JobType.BENCHMANAGER, params) return id
def add_journal(args): if not configuration().get_node_info().is_storage: print("-1") return job_manager = JobManager() params = '-disk_name {}'.format(args.disk_name) for j in job_manager.get_running_job_list(): if j.type == JobType.DELETEOSD or j.type == JobType.ADDDISK or\ j.type == JobType.ADDJOURNAL or j.type == JobType.DELETEJOURNAL: logger.info( "Cannot start add journal job to create journal for disk.{}.There ara running jobs. " .format(args.disk_name)) print("-1") return print(job_manager.add_job(JobType.ADDJOURNAL, params)) logger.info("Start add journal job for disk {}.".format(args.disk_name)) sys.exit()
def add_osd(args): if not configuration().get_node_info().is_storage: print("-1") return job_manager = JobManager() # If no journal and no cache : if str(args.journal) == "" and str(args.cache) == "": params = '-disk_name {}'.format(args.disk_name) # If journal but no cache : elif str(args.journal) != "" and str(args.cache) == "": params = '-disk_name {} -journal {}'.format(args.disk_name, args.journal) # If cache but no journal : elif str(args.journal) == "" and str(args.cache) != "" and str( args.cache_type) != "": params = '-disk_name {} -cache {} -cache_type {}'.format( args.disk_name, args.cache, args.cache_type) # If both journal and cache : else: params = '-disk_name {} -journal {} -cache {} -cache_type {}'.format( args.disk_name, args.journal, args.cache, args.cache_type) # Getting all running jobs : for j in job_manager.get_running_job_list(): if j.type == JobType.DELETEOSD or j.type == JobType.ADDDISK or \ j.type == JobType.ADDJOURNAL or j.type == JobType.DELETEJOURNAL or \ j.type == JobType.ADDCACHE or j.type == JobType.DELETECACHE: logger.info( "Cannot start add job to create osd for disk : {}. There ara running jobs." .format(args.disk_name)) print("-1") return print(job_manager.add_job(JobType.ADDDISK, params)) logger.info("Start add osd job for disk {}.".format(args.disk_name)) sys.exit()
def delete_disk(self, disk_id, pool): ceph_api = CephAPI() consul_api = ConsulAPI() ls = ceph_api.get_disks_meta_for_pool(pool) try: for disk in ls: if disk_id == disk.id: if disk and hasattr(disk, "paths") and not disk.paths: disk_status = DisplayDiskStatus.unattached elif disk and hasattr(disk, "paths") and disk.paths: data = consul_api.find_disk(disk.id) if data is not None: disk_status = DisplayDiskStatus.started if str(data.Flags) == "1": disk_status = DisplayDiskStatus.stopping else: disk_status = DisplayDiskStatus.stopped break disk_status = None except: return StopDiskStatus.error if disk_status == DisplayDiskStatus.started or disk_status == DisplayDiskStatus.stopping: return StopDiskStatus.working elif disk_status is None: return StopDiskStatus.error elif disk_status == DisplayDiskStatus.stopped or disk_status == DisplayDiskStatus.unattached: # return ceph_api.delete_disk(disk_id,pool) # start: delete disk as a job __image_name_prefix = ConfigAPI().get_image_name_prefix() # set image_name by disk_id : image_name = disk_id # if PetaSAN disk : if disk_id.isdigit() and (len(disk_id) == 5): image_name = __image_name_prefix + str(disk_id) jm = JobManager() try: id = jm.add_job(JobType.DELETE_DISK, image_name + ' ' + pool) print("Start Delete image: ", image_name) if id > 0: logger.info( "Deleting disk: {} has been started as a job".format( image_name)) return id except Exception as ex: logger.error("Error Deleting disk: {}".format(image_name)) # end: delete disk as a job # else: return StopDiskStatus.error
def clear_disk(args): disk_id = args.disk_id image_name = "image-" + disk_id try: # Get which ceph user is using this function & get his keyring file path # # ---------------------------------------------------------------------- # ceph_auth = CephAuthenticator() config = configuration() cluster_name = config.get_cluster_name() # Get disk metadata : # ------------------- ceph_api = CephAPI() disk_metadata = ceph_api.get_diskmeta(disk_id) # Get pool name : # --------------- pool_name = disk_metadata.pool data_pool = "" # Check if disk has been created on replicated pool or erasure pool : # ------------------------------------------------------------------- if len(disk_metadata.data_pool) > 0: data_pool = disk_metadata.data_pool tmp_image_name = "tmp_disk_" + disk_metadata.id # (1.) Check if a previous tmp image for this disk is still existed : # =================================================================== images_list = ceph_api.get_all_images(pool_name) for image in images_list: if tmp_image_name in image: # Delete image # cmd = "rbd rm {}/{} {} --cluster {}".format( pool_name, image, ceph_auth.get_authentication_string(), cluster_name) if not call_cmd(cmd): print( "Error : clear_disk.py script : cannot remove tmp image ,\ncmd : " + cmd) sys.exit(-1) print( "Stage 1 :\n\tCheck if a previous tmp image for this disk is still existed > (Completed)" ) logger.info( "Stage 1 :\n\tCheck if a previous tmp image for this disk is still existed > (Completed)" ) # (2.) Stop old disk : # ==================== consul_api = ConsulAPI() kv = consul_api.find_disk(disk_id) if kv is not None: manage_disk = ManageDisk() status = manage_disk.stop(disk_id) if status != Status.done: print('Error : Cannot stop disk , id = ' + disk_id) sys.exit(-1) print("Stage 2 :\n\tStop old disk > (Completed)") logger.info("Stage 2 :\n\tStop old disk > (Completed)") time.sleep(3) # (3.) Check if old disk is stopped or not : # ========================================== if len(data_pool) > 0: pool_type = "erasure" _confirm_disk_stopped(data_pool, disk_id, pool_type) else: pool_type = "replicated" _confirm_disk_stopped(pool_name, disk_id, pool_type) print( "Stage 3 :\n\tConfirm that disk is completely stopped > (Completed)" ) logger.info( "Stage 3 :\n\tConfirm that disk is completely stopped > (Completed)" ) else: print("Stage 2 :\n\tStop old disk > (Completed)") logger.info("Stage 2 :\n\tStop old disk > (Completed)") print( "Stage 3 :\n\tConfirm that disk is completely stopped > (Completed)" ) logger.info( "Stage 3 :\n\tConfirm that disk is completely stopped > (Completed)" ) print('\tclear_disk.py script : disk {} is already stopped'.format( disk_id)) # (4.) Create a tmp image (not PetaSAN image) : # ============================================= # Generate a random value between 1 and 99999 # random_no = str(random.randint(1, 100000)) tmp_image_name = tmp_image_name + "_" + str(random_no) image_size = disk_metadata.size * 1024 if len(data_pool) > 0: cmd = "rbd create {}/{} --size {} --data-pool {} {} --cluster {}".format( pool_name, tmp_image_name, image_size, data_pool, ceph_auth.get_authentication_string(), cluster_name) else: cmd = "rbd create {}/{} --size {} {} --cluster {}".format( pool_name, tmp_image_name, image_size, ceph_auth.get_authentication_string(), cluster_name) if not call_cmd(cmd): print( "Error : clear_disk.py script : cannot create new tmp image ,\ncmd : " + cmd) sys.exit(-1) print("Stage 4 :\n\tCreate a tmp image called ( " + tmp_image_name + " ) > (Completed)") logger.info("Stage 4 :\n\tCreate a tmp image called ( " + tmp_image_name + " ) > (Completed)") # (5.) Run script to copy "old disk" metadata to new "tmp_disk" : # =============================================================== metadata_script_file = ConfigAPI().get_disk_meta_script_path() # Function : read_disks_metadata : parser_key_1 = "read" arg_1 = "--image" arg_2 = "--pool" # Function : set_disk_metadata : parser_key_2 = "write" arg_3 = "--file" cmd = metadata_script_file + " " + parser_key_1 + " " + arg_1 + " " + image_name + " " + arg_2 + " " + pool_name +\ " | " + metadata_script_file + " " + parser_key_2 + " " + arg_1 + " " + tmp_image_name + " " + arg_2 + " " + pool_name if not call_cmd(cmd): print( "Error : clear_disk.py script : cannot copy metadata from old disk to new tmp image ,\ncmd : " + cmd) sys.exit(-1) print( "Stage 5 :\n\tRun script to copy 'old disk' metadata to new 'tmp_disk' > (Completed)" ) logger.info( "Stage 5 :\n\tRun script to copy 'old disk' metadata to new 'tmp_disk' > (Completed)" ) time.sleep(3) # (6.) Remove metadata of old disk : # =========================================================== old_image_name = str(ceph_api.conf_api.get_image_name_prefix() + disk_metadata.id) confirm = ceph_api.remove_disk_metadata(old_image_name, disk_metadata.pool) if not confirm: print( "Error : clear_disk.py script : cannot remove metadata of old disk" ) # sys.exit(-1) print("Stage 6 :\n\tRemove metadata of old disk > (Completed)") logger.info("Stage 6 :\n\tRemove metadata of old disk > (Completed)") # (7.) Rename old disk image name with "deleted-" + disk_id + random_no: # ====================================================================== new_image_name = "deleted-" + disk_metadata.id + "-" + random_no cmd = "rbd mv {}/{} {} {} --cluster {}".format( pool_name, image_name, new_image_name, ceph_auth.get_authentication_string(), cluster_name) if not call_cmd(cmd): print( "Error : clear_disk.py script : cannot rename old image from {} to {} ,\ncmd : {}" .format(image_name, new_image_name, cmd)) sys.exit(-1) print("Stage 7 :\n\tRename old disk image name with ( " + new_image_name + " ) > (Completed)") logger.info("Stage 7 :\n\tRename old disk image name with ( " + new_image_name + " ) > (Completed)") time.sleep(5) # (8.) Rename "tmp_disk" with old disk image name : # ================================================= cmd = "rbd mv {}/{} {} {} --cluster {}".format( pool_name, tmp_image_name, image_name, ceph_auth.get_authentication_string(), cluster_name) if not call_cmd(cmd): print( "Error : clear_disk.py script : cannot rename \"tmp_disk\" from {} to {} ,\ncmd : {}" .format(tmp_image_name, image_name, cmd)) sys.exit(-1) print( "Stage 8 :\n\tRename 'tmp_disk' with old disk image name > (Completed)" ) logger.info( "Stage 8 :\n\tRename 'tmp_disk' with old disk image name > (Completed)" ) time.sleep(5) jm = JobManager() id = jm.add_job(JobType.DELETE_DISK, new_image_name + ' ' + pool_name) print("Stage 9 :\n\tStart a job to remove old disk image , job id = " + str(id)) logger.info( "Stage 9 :\n\tStart a job to remove old disk image , job id = " + str(id)) sys.exit(0) except PoolException as e: print("Error : PoolException , {}".format(e.message)) logger.error("Clear Disk Error : PoolException , {}".format(e.message)) sys.exit(-1) except DiskListException as e: print("Error : DiskListException , {}".format(e.message)) logger.error("Clear Disk Error : DiskListException , {}".format( e.message)) sys.exit(-1) except CephException as e: if e.id == CephException.GENERAL_EXCEPTION: print("Error : CephException , {}".format(e.message)) logger.error("Clear Disk Error : CephException , {}".format(e.message)) sys.exit(-1) except MetadataException as e: print("Error : MetadataException , {}".format(e.message)) logger.error("Clear Disk Error : MetadataException , {}".format( e.message)) sys.exit(-1) except Exception as e: print("Error : Exception , {}".format(e.message)) logger.error("Clear Disk Error : Exception , {}".format(e.message)) sys.exit(-1)
def delete_pool(self, pool_name): jm = JobManager() id = jm.add_job(JobType.DELETE_POOL, pool_name + ' --yes-i-really-really-mean-it') return id