def add_destination_cluster(self, destination_cluster): consul_api = ConsulAPI() # check if a destination cluster with the same name already exists and if so raise an exception dest_cluster = consul_api.get_replication_destination_cluster( destination_cluster.cluster_name) if dest_cluster: raise ReplicationException( ReplicationException.DESTINATION_CLUSTER_EXIST, "This destination cluster is already exist.") # validate destination cluster name # from PetaSAN.backend.replication.manage_remote_replication import ManageRemoteReplication # manage_remote_replication = ManageRemoteReplication() cluster_name = self.get_dest_cluster_name(destination_cluster) if cluster_name != destination_cluster.cluster_name: raise ReplicationException(ReplicationException.WRONG_CLUSTER_NAME, "Wrong destination cluster name.") # add cluster fsid to the cluster entity # manage_remote_replication.cluster_name = destination_cluster.cluster_name destination_cluster.cluster_fsid = self.get_dest_cluster_fsid( destination_cluster) # encrypt the private key using RSA algorithm before saving in consul private_key = destination_cluster.ssh_private_key rsa_encrypt = RSAEncryption() pub_key = rsa_encrypt.get_key(rsa_encrypt.pub_key_path) encrypted_key = rsa_encrypt.encrypt_public(private_key, pub_key) destination_cluster.ssh_private_key = encrypted_key # save destination cluster entity in consul consul_api.update_replication_destination_cluster(destination_cluster)
def _get_running_pool_disks(pool, pool_type): consul = ConsulAPI() running_pool_disks = [] meta_disk = ManageDisk().get_disks_meta() pool_disks = set() if pool_type == "replicated": if len(meta_disk) > 0: for meta in meta_disk: if meta.pool == pool: pool_disks.add(meta.id) elif pool_type == "erasure": if len(meta_disk) > 0: for meta in meta_disk: if meta.data_pool == pool: pool_disks.add(meta.id) running_disks = consul.get_running_disks() for running_disk in running_disks: if running_disk in pool_disks: running_pool_disks.append(running_disk) return running_pool_disks
def get_consul_data(): api = ConsulAPI() ob = api.find_disk("00001") if ManageDiskStatus.error == ob: print("error get consul data") else: print ob
def get_disks_meta(self): ceph_api = CephAPI() consul_api = ConsulAPI() ls = ceph_api.get_disks_meta() for disk in ls: if disk and hasattr(disk, "paths") and not disk.paths: disk.status = DisplayDiskStatus.unattached elif disk and hasattr(disk, "paths") and disk.paths: data = consul_api.find_disk(disk.id) if data is not None: disk.status = DisplayDiskStatus.starting if str(data.Flags) == "1": disk.status = DisplayDiskStatus.stopping elif consul_api.is_path_locked(disk.id): disk.status = DisplayDiskStatus.started else: disk.status = DisplayDiskStatus.stopped job_manager = JobManager() job_list = job_manager.get_running_job_list() for j in job_list: # Check if the status running if j.is_running: # Set disk status [deleting] if j.type == JobType.DELETE_DISK and str( j.params).find(str(disk.id)) > -1: disk.status = DisplayDiskStatus.deleting return ls
def get_disk_paths(self, disk_id, pool): paths_list = CephAPI().get_disk_meta(disk_id, pool).paths paths_list_with_node = [] sessions_dict = ConsulAPI().get_sessions_dict( ConfigAPI().get_iscsi_service_session_name()) # in case consul lock on disk for kv in ConsulAPI().get_disk_paths(disk_id): path = Path() path_str = paths_list[int(str(kv.Key).split(disk_id + "/")[1]) - 1] path.load_json(json.dumps(path_str)) if hasattr(kv, "Session") and sessions_dict.has_key(kv.Session): path.locked_by = sessions_dict.get(kv.Session).Node paths_list_with_node.append(path) else: paths_list_with_node.append(path) # in case disk is stopped if not paths_list_with_node: for path_str in paths_list: path = Path() path.load_json(json.dumps(path_str)) paths_list_with_node.append(path) return paths_list_with_node
def __init__(self): self.__app_conf = ConfigAPI() self.__context = AssignmentContext() self.__session_dict = ConsulAPI().get_sessions_dict( ConfigAPI().get_iscsi_service_session_name()) self.__node_session_dict = dict() pass
def run(self): try: status = False consul = ConsulAPI() failed_jobs = consul.get_replication_failed_jobs() if len(failed_jobs) > 0: failed_jobs_str = "" for job_id, job_info in failed_jobs.iteritems(): failed_jobs_str += "\n job id: " + job_id + " job name: " + job_info.job_name status = consul.delete_failed_job(job_id) result = Result() result.plugin_name = self.get_plugin_name() result.title = gettext("core_message_notify_failed_jobs_title") result.message = '\n'.join( gettext("core_message_notify_failed_jobs_body").split( "\\n")).format(failed_jobs_str) self.__context.results.append(result) logger.info(result.message) logger.info("status of deleting failed jobs from consul is " + str(status)) except Exception as e: logger.exception(e) logger.error( "An error occurred while ReplicationNotificationPlugin was running." )
def get_replication_job_log(self, job_id): consul_api = ConsulAPI() logs_list = [] logs_list_json = consul_api.get_replication_job_log(job_id) if len(logs_list_json) < 1: return logs_list logs_list = json.loads(logs_list_json) return logs_list
def stop(self, disk_id): try: consul_api = ConsulAPI() kv = consul_api.find_disk(disk_id) return consul_api.add_disk_resource(disk_id, "disk", 1, kv.CreateIndex) except Exception as ex: logger.error("stop disk exception :{}".format(ex.message)) return ManageDiskStatus.error
def get_replication_user(self, user_name): user = ConsulAPI().get_replication_user(user_name) rsa_decrypt = RSAEncryption() prv_key = rsa_decrypt.get_key(rsa_decrypt.prv_key_path) decrypted_prv_key = rsa_decrypt.decrypt_private( user.ssh_prv_key, prv_key) user.ssh_prv_key = decrypted_prv_key decrypted_ceph_key = rsa_decrypt.decrypt_private( user.ceph_keyring, prv_key) user.ceph_keyring = decrypted_ceph_key return user
def _stop_disk(disk_id): try: consul_api = ConsulAPI() kv = consul_api.find_disk(disk_id) if not kv: return #consul_api.add_disk_resource(disk_id, 'disk', 1, kv.CreateIndex) consul_api.add_disk_resource(disk_id, 'disk', 1, kv.ModifyIndex) except Exception as ex: logger.error('Error stopping disk:{} {}'.format(disk_id, ex.message)) raise ConsulException(ConsulException.GENERAL_EXCEPTION, 'General Consul Error')
def update_auth_pools(self, user_name, auth_pools): user = Users() status = user.update_auth_pools(user_name, auth_pools) if status: replication_user = ConsulAPI().get_replication_user(user_name) replication_user.auth_pools = auth_pools ConsulAPI().update_replication_user(replication_user) nodes_list = ManageNode().get_node_list() for node_info in nodes_list: if node_info.is_backup: stat = self.sync_users(node_info.name) return status
def edit_replication_job(self, job_entity, old_node): consul_api = ConsulAPI() # update job in consul consul_api.update_replication_job(job_entity) # build crontab self.start_node_service() system_date_time = str(datetime.datetime.now()).split('.')[0] log_text = "{} - Job {} has been updated.".format(system_date_time, job_entity.job_id) self.log_replication_job(job_entity.job_id, log_text)
def get_pool_by_disk(disk_id): consul_api = ConsulAPI() ceph_api = CephAPI() pool = consul_api.get_disk_pool(disk_id) if pool: return pool pool = ceph_api.get_pool_bydisk(disk_id) if pool: return pool return None
def __unlock_consul_path(self, path): try: logger.debug("Unlock {} path locked by session {}.".format( path, self.__session)) consul_api = ConsulAPI() consul_api.release_disk_path( self.__app_conf.get_consul_disks_path() + path, self.__session, None) logger.info("Unlock path %s" % path) except Exception as e: logger.error("Could not unlock path %s" % path) raise e
def edit_destination_cluster(self, dest_cluster): consul_api = ConsulAPI() # encrypt the private key using RSA algorithm before saving in consul key_text = dest_cluster.ssh_private_key rsa_encrypt = RSAEncryption() pub_key = rsa_encrypt.get_key(rsa_encrypt.pub_key_path) encrypted_key = rsa_encrypt.encrypt_public(key_text, pub_key) dest_cluster.ssh_private_key = encrypted_key # save destination cluster entity in consul consul_api.update_replication_destination_cluster(dest_cluster)
def delete_replication_user(self, user_name): user = Users() stat = user.delete_ceph_user(user_name) if stat: consul = ConsulAPI() consul.delete_replication_user(user_name) nodes_list = ManageNode().get_node_list() for node_info in nodes_list: if node_info.is_backup: stat = self.sync_users(node_info.name) return True else: return False
def get_replication_dest_clusters(self): consul_api = ConsulAPI() dest_clusters_list = consul_api.get_replication_destination_clusters() for key, value in dest_clusters_list.iteritems(): # ========== Decrypt ssh private key ========== # encrypted_key = value.ssh_private_key rsa_decrypt = RSAEncryption() prv_key = rsa_decrypt.get_key(rsa_decrypt.prv_key_path) decrypted_key = rsa_decrypt.decrypt_private(encrypted_key, prv_key) # ========== Update entity ========== # value.ssh_private_key = decrypted_key return dest_clusters_list
def _get_nodes(self): consul_api = ConsulAPI() # Get all PetaSAN nodes[management or storage]. node_list = consul_api.get_node_list() # Get online nodes from consul. consul_members = consul_api.get_consul_members() petasan_node_list = [] for i in node_list: if not i.is_iscsi: continue if i.name in consul_members: petasan_node_list.append(i.name) return petasan_node_list
def __get_down_node_list(self): down_node_list = [] try: con_api = ConsulAPI() node_list = con_api.get_node_list() consul_members = con_api.get_consul_members() for i in node_list: if i.name not in consul_members: i.status = NodeStatus.down down_node_list.append(i.name) return down_node_list except Exception as e: logger.exception("error get down node list") return down_node_list
def set_new_assignments(self, paths_assignment_info): logger.info("Set new assignment.") if self.get_current_reassignment() is not None: raise Exception("There is already running assignment.") config_api = ConfigAPI() consul_api = ConsulAPI() logger.info("Delete old assignments.") consul_api.delete_assignments() session = consul_api.get_new_session_ID( config_api.get_assignment_session_name(), configuration().get_node_name(), True) if consul_api.lock_key(config_api.get_consul_assignment_path(), session, "root"): logger.info("Lock assignment root.") for path_assignment_info in paths_assignment_info: path_assignment_info.status = ReassignPathStatus.pending consul_api.set_path_assignment( path_assignment_info, self._get_node_session(path_assignment_info.target_node)) logger.info( "New assignment for {} ,disk {}, from node {} and to node {} with status {}" .format(path_assignment_info.ip, path_assignment_info.disk_id, path_assignment_info.node, path_assignment_info.target_node, path_assignment_info.status)) else: logger.error("Can't lock paths assignment key.") raise Exception("Can't lock paths assignment key.")
def start(self, disk_id, pool): try: ceph_api = CephAPI() consul_api = ConsulAPI() attr = ceph_api.read_image_metadata( ConfigAPI().get_image_name_prefix() + disk_id, pool) petasan_meta = attr.get(ConfigAPI().get_image_meta_key()) disk_meta = DiskMeta() if petasan_meta: disk_meta.load_json(petasan_meta) else: return Status.error consul_api.add_disk_resource(disk_meta.id, "disk") consul_api.add_disk_pool(disk_meta.id, pool) i = 0 for p in disk_meta.paths: i += 1 consul_api.add_disk_resource( "/".join(["", disk_meta.id, str(i)]), None) except Exception as e: logger.error("Can not start disk %s" % disk_id) logger.exception(e.message) return Status.error return Status.done
def clean_source_node(self, ip, disk_id): if not self.update_path(ip, ReassignPathStatus.moving): return False # pool = CephAPI().get_pool_bydisk(disk_id) pool = self._get_pool_by_disk(disk_id) if not pool: logger.error('Could not find pool for disk ' + disk_id) return False disk = CephAPI().get_disk_meta(disk_id, pool) paths_list = disk.paths disk_path = None path_index = -1 for i in xrange(0, len(paths_list)): path_str = paths_list[i] path = Path() path.load_json(json.dumps(path_str)) if path.ip == ip: disk_path = path path_index = i break if disk_path: self._clean_iscsi_config(disk_id, path_index, disk.iqn) network = Network() NetworkAPI().delete_ip(path.ip, path.eth, path.subnet_mask) if network.is_ip_configured(ip): logger.error( "Move action,cannot clean newtwork config for disk {} path {}." .format(disk_id, path_index)) self.update_path(ip, ReassignPathStatus.failed) return False logger.info( "Move action,clean newtwork config for disk {} path {}.". format(disk_id, path_index)) key = self.__app_conf.get_consul_disks_path( ) + disk_id + "/" + str(path_index + 1) consul_api = ConsulAPI() session = self._get_node_session(configuration().get_node_name()) if ConsulAPI().is_path_locked_by_session(key, session): consul_api.release_disk_path(key, session, None) logger.info("Move action,release disk {} path {}.".format( disk_id, path_index + 1)) else: self.update_path(ip, ReassignPathStatus.failed) return False return True
def get_current_reassignment(self): paths = ConsulAPI().get_assignments() if paths is not None: for ip, path_assignment_info in paths.iteritems(): if not hasattr(path_assignment_info, "session"): logger.info("Path {} not locked by node.".format( path_assignment_info.ip)) if not hasattr( path_assignment_info, "session") and path_assignment_info.status not in [ ReassignPathStatus.succeeded, ReassignPathStatus.failed ]: path_assignment_info.status = ReassignPathStatus.failed return paths
def get_node_list(self): consul_api = ConsulAPI() # Get all PetaSAN nodes[management or storage]. node_list = consul_api.get_node_list() # Get online nodes from consul. consul_members = consul_api.get_consul_members() petasan_node_list = [] for i in node_list: if i.name in consul_members: i.status = NodeStatus.up else: i.status = NodeStatus.down petasan_node_list.append(i) return petasan_node_list
def get_replication_dest_cluster(self, cluster_name): consul_api = ConsulAPI() destination_cluster = consul_api.get_replication_destination_cluster( cluster_name) # ========== Decrypt ssh private key ========== # encrypted_key = destination_cluster.ssh_private_key rsa_decrypt = RSAEncryption() prv_key = rsa_decrypt.get_key(rsa_decrypt.prv_key_path) decrypted_key = rsa_decrypt.decrypt_private(encrypted_key, prv_key) # ========== Update entity ========== # destination_cluster.ssh_private_key = decrypted_key return destination_cluster
def auto(self, type=1): logger.info("User start auto reassignment paths.") assignments_stats = self.get_assignments_stats() if assignments_stats.is_reassign_busy: logger.error("There is already reassignment running.") raise Exception("There is already reassignment running.") ConsulAPI().drop_all_node_sessions( self.__app_conf.get_consul_assignment_path(), configuration().get_node_name()) sleep(3) assignments_stats.paths = [ path for path in assignments_stats.paths if len(path.node.strip()) > 0 and path.status == -1 ] self.__context.paths = assignments_stats.paths self.__context.nodes = assignments_stats.nodes for plugin in self._get_new_plugins_instances(auto_plugins): if plugin.is_enable() and plugin.get_plugin_id() == type: paths_assignments = plugin.get_new_assignments() if len(paths_assignments) == 0: logger.info("There is no node under average.") return self.set_new_assignments(paths_assignments) break self.run()
def __wait_before_lock(self, path=None): disk_id, path_index = str(path).split("/") wait_time = 0 if path: # 1- Calc wait time if path has siblings. wait_time = int(self.__app_conf.get_siblings_paths_delay()) * int( self.__paths_per_disk_local.get(disk_id, 0)) logger.debug("Wait time for siblings is {}.".format(wait_time)) total_nodes = len(ConsulAPI().get_consul_members()) # 2- Calc average paths per node. average_node_paths = float( self.__total_cluster_paths) / float(total_nodes) # Calc the percent of local paths according to average paths. percent = float(self.__paths_per_session.get(self.__session, 0)) / average_node_paths # 3- Calc total wait time if self.__last_acquire_succeeded: wait_time += int( self.__app_conf.get_average_delay_before_lock()) * percent else: logger.debug("Skipping wait time for average delay.") logger.debug( "Wait time depending on average and siblings is {}.".format( math.ceil(wait_time))) sleep(math.ceil(wait_time))
def __fencing(self, node_name): maintenance = ManageMaintenance() if maintenance.get_maintenance_config( ).fencing == MaintenanceConfigState.off: logger.warning( "Fencing action will not fire the admin stopped it,the cluster is in maintenance mode." ) return node_list = ConsulAPI().get_node_list() for node in node_list: if str(node.name) == node_name: if Network().ping(node.backend_2_ip): logger.info("This node will stop node {}/{}.".format( node_name, node.backend_2_ip)) ssh().call_command(node.backend_2_ip, " poweroff ", 5) break elif Network().ping(node.management_ip): logger.info("This node will stop node {}/{}.".format( node_name, node.management_ip)) ssh().call_command(node.management_ip, " poweroff ", 5) break elif Network().ping(node.backend_1_ip): logger.info("This node will stop node {}/{}.".format( node_name, node.backend_1_ip)) ssh().call_command(node.backend_1_ip, " poweroff ", 5) break
def _get_pool_by_disk(self, disk_id): consul_api = ConsulAPI() ceph_api = CephAPI() pool = consul_api.get_disk_pool(disk_id) if pool: logger.info('Found pool:{} for disk:{} via consul'.format( pool, disk_id)) return pool pool = ceph_api.get_pool_bydisk(disk_id) if pool: logger.info('Found pool:{} for disk:{} via ceph'.format( pool, disk_id)) return pool logger.error('Could not find pool for disk ' + disk_id) return None