Beispiel #1
0
    def add_destination_cluster(self, destination_cluster):
        consul_api = ConsulAPI()
        # check if a destination cluster with the same name already exists and if so raise an exception
        dest_cluster = consul_api.get_replication_destination_cluster(
            destination_cluster.cluster_name)
        if dest_cluster:
            raise ReplicationException(
                ReplicationException.DESTINATION_CLUSTER_EXIST,
                "This destination cluster is already exist.")

        # validate destination cluster name

        # from PetaSAN.backend.replication.manage_remote_replication import ManageRemoteReplication
        # manage_remote_replication = ManageRemoteReplication()

        cluster_name = self.get_dest_cluster_name(destination_cluster)
        if cluster_name != destination_cluster.cluster_name:
            raise ReplicationException(ReplicationException.WRONG_CLUSTER_NAME,
                                       "Wrong destination cluster name.")

        # add cluster fsid to the cluster entity
        # manage_remote_replication.cluster_name = destination_cluster.cluster_name

        destination_cluster.cluster_fsid = self.get_dest_cluster_fsid(
            destination_cluster)

        # encrypt the private key using RSA algorithm before saving in consul
        private_key = destination_cluster.ssh_private_key
        rsa_encrypt = RSAEncryption()
        pub_key = rsa_encrypt.get_key(rsa_encrypt.pub_key_path)
        encrypted_key = rsa_encrypt.encrypt_public(private_key, pub_key)
        destination_cluster.ssh_private_key = encrypted_key

        # save destination cluster entity in consul
        consul_api.update_replication_destination_cluster(destination_cluster)
def _get_running_pool_disks(pool, pool_type):
    consul = ConsulAPI()
    running_pool_disks = []
    meta_disk = ManageDisk().get_disks_meta()

    pool_disks = set()

    if pool_type == "replicated":
        if len(meta_disk) > 0:
            for meta in meta_disk:
                if meta.pool == pool:
                    pool_disks.add(meta.id)

    elif pool_type == "erasure":
        if len(meta_disk) > 0:
            for meta in meta_disk:
                if meta.data_pool == pool:
                    pool_disks.add(meta.id)

    running_disks = consul.get_running_disks()
    for running_disk in running_disks:
        if running_disk in pool_disks:
            running_pool_disks.append(running_disk)

    return running_pool_disks
def get_consul_data():
    api = ConsulAPI()
    ob = api.find_disk("00001")
    if ManageDiskStatus.error == ob:
        print("error get consul data")
    else:
        print ob
    def get_disks_meta(self):
        ceph_api = CephAPI()
        consul_api = ConsulAPI()
        ls = ceph_api.get_disks_meta()
        for disk in ls:
            if disk and hasattr(disk, "paths") and not disk.paths:
                disk.status = DisplayDiskStatus.unattached
            elif disk and hasattr(disk, "paths") and disk.paths:
                data = consul_api.find_disk(disk.id)
                if data is not None:
                    disk.status = DisplayDiskStatus.starting
                    if str(data.Flags) == "1":
                        disk.status = DisplayDiskStatus.stopping
                    elif consul_api.is_path_locked(disk.id):
                        disk.status = DisplayDiskStatus.started

                else:
                    disk.status = DisplayDiskStatus.stopped

                job_manager = JobManager()
                job_list = job_manager.get_running_job_list()

                for j in job_list:

                    # Check if the status running
                    if j.is_running:
                        # Set disk status [deleting]
                        if j.type == JobType.DELETE_DISK and str(
                                j.params).find(str(disk.id)) > -1:
                            disk.status = DisplayDiskStatus.deleting

        return ls
    def get_disk_paths(self, disk_id, pool):
        paths_list = CephAPI().get_disk_meta(disk_id, pool).paths
        paths_list_with_node = []
        sessions_dict = ConsulAPI().get_sessions_dict(
            ConfigAPI().get_iscsi_service_session_name())

        # in case consul lock on disk
        for kv in ConsulAPI().get_disk_paths(disk_id):
            path = Path()
            path_str = paths_list[int(str(kv.Key).split(disk_id + "/")[1]) - 1]
            path.load_json(json.dumps(path_str))
            if hasattr(kv, "Session") and sessions_dict.has_key(kv.Session):

                path.locked_by = sessions_dict.get(kv.Session).Node
                paths_list_with_node.append(path)
            else:
                paths_list_with_node.append(path)

        # in case disk is stopped
        if not paths_list_with_node:

            for path_str in paths_list:
                path = Path()
                path.load_json(json.dumps(path_str))
                paths_list_with_node.append(path)
        return paths_list_with_node
Beispiel #6
0
 def __init__(self):
     self.__app_conf = ConfigAPI()
     self.__context = AssignmentContext()
     self.__session_dict = ConsulAPI().get_sessions_dict(
         ConfigAPI().get_iscsi_service_session_name())
     self.__node_session_dict = dict()
     pass
 def run(self):
     try:
         status = False
         consul = ConsulAPI()
         failed_jobs = consul.get_replication_failed_jobs()
         if len(failed_jobs) > 0:
             failed_jobs_str = ""
             for job_id, job_info in failed_jobs.iteritems():
                 failed_jobs_str += "\n job id: " + job_id + " job name: " + job_info.job_name
                 status = consul.delete_failed_job(job_id)
             result = Result()
             result.plugin_name = self.get_plugin_name()
             result.title = gettext("core_message_notify_failed_jobs_title")
             result.message = '\n'.join(
                 gettext("core_message_notify_failed_jobs_body").split(
                     "\\n")).format(failed_jobs_str)
             self.__context.results.append(result)
             logger.info(result.message)
             logger.info("status of deleting failed jobs from consul is " +
                         str(status))
     except Exception as e:
         logger.exception(e)
         logger.error(
             "An error occurred while ReplicationNotificationPlugin was running."
         )
Beispiel #8
0
 def get_replication_job_log(self, job_id):
     consul_api = ConsulAPI()
     logs_list = []
     logs_list_json = consul_api.get_replication_job_log(job_id)
     if len(logs_list_json) < 1:
         return logs_list
     logs_list = json.loads(logs_list_json)
     return logs_list
 def stop(self, disk_id):
     try:
         consul_api = ConsulAPI()
         kv = consul_api.find_disk(disk_id)
         return consul_api.add_disk_resource(disk_id, "disk", 1,
                                             kv.CreateIndex)
     except Exception as ex:
         logger.error("stop disk exception :{}".format(ex.message))
         return ManageDiskStatus.error
 def get_replication_user(self, user_name):
     user = ConsulAPI().get_replication_user(user_name)
     rsa_decrypt = RSAEncryption()
     prv_key = rsa_decrypt.get_key(rsa_decrypt.prv_key_path)
     decrypted_prv_key = rsa_decrypt.decrypt_private(
         user.ssh_prv_key, prv_key)
     user.ssh_prv_key = decrypted_prv_key
     decrypted_ceph_key = rsa_decrypt.decrypt_private(
         user.ceph_keyring, prv_key)
     user.ceph_keyring = decrypted_ceph_key
     return user
Beispiel #11
0
def _stop_disk(disk_id):
    try:
        consul_api = ConsulAPI()
        kv = consul_api.find_disk(disk_id)
        if not kv:
            return
        #consul_api.add_disk_resource(disk_id, 'disk', 1, kv.CreateIndex)
        consul_api.add_disk_resource(disk_id, 'disk', 1, kv.ModifyIndex)
    except Exception as ex:
        logger.error('Error stopping disk:{} {}'.format(disk_id, ex.message))
        raise ConsulException(ConsulException.GENERAL_EXCEPTION,
                              'General Consul Error')
 def update_auth_pools(self, user_name, auth_pools):
     user = Users()
     status = user.update_auth_pools(user_name, auth_pools)
     if status:
         replication_user = ConsulAPI().get_replication_user(user_name)
         replication_user.auth_pools = auth_pools
         ConsulAPI().update_replication_user(replication_user)
         nodes_list = ManageNode().get_node_list()
         for node_info in nodes_list:
             if node_info.is_backup:
                 stat = self.sync_users(node_info.name)
     return status
Beispiel #13
0
    def edit_replication_job(self, job_entity, old_node):
        consul_api = ConsulAPI()

        # update job in consul
        consul_api.update_replication_job(job_entity)

        # build crontab
        self.start_node_service()

        system_date_time = str(datetime.datetime.now()).split('.')[0]
        log_text = "{} - Job {} has been updated.".format(system_date_time, job_entity.job_id)
        self.log_replication_job(job_entity.job_id, log_text)
def get_pool_by_disk(disk_id):

    consul_api = ConsulAPI()
    ceph_api = CephAPI()
    pool = consul_api.get_disk_pool(disk_id)
    if pool:
        return pool
    pool = ceph_api.get_pool_bydisk(disk_id)
    if pool:
        return pool

    return None
Beispiel #15
0
 def __unlock_consul_path(self, path):
     try:
         logger.debug("Unlock {} path locked by session {}.".format(
             path, self.__session))
         consul_api = ConsulAPI()
         consul_api.release_disk_path(
             self.__app_conf.get_consul_disks_path() + path, self.__session,
             None)
         logger.info("Unlock path %s" % path)
     except Exception as e:
         logger.error("Could not unlock path %s" % path)
         raise e
Beispiel #16
0
    def edit_destination_cluster(self, dest_cluster):
        consul_api = ConsulAPI()

        # encrypt the private key using RSA algorithm before saving in consul
        key_text = dest_cluster.ssh_private_key
        rsa_encrypt = RSAEncryption()
        pub_key = rsa_encrypt.get_key(rsa_encrypt.pub_key_path)
        encrypted_key = rsa_encrypt.encrypt_public(key_text, pub_key)
        dest_cluster.ssh_private_key = encrypted_key

        # save destination cluster entity in consul
        consul_api.update_replication_destination_cluster(dest_cluster)
 def delete_replication_user(self, user_name):
     user = Users()
     stat = user.delete_ceph_user(user_name)
     if stat:
         consul = ConsulAPI()
         consul.delete_replication_user(user_name)
         nodes_list = ManageNode().get_node_list()
         for node_info in nodes_list:
             if node_info.is_backup:
                 stat = self.sync_users(node_info.name)
         return True
     else:
         return False
Beispiel #18
0
    def get_replication_dest_clusters(self):
        consul_api = ConsulAPI()
        dest_clusters_list = consul_api.get_replication_destination_clusters()

        for key, value in dest_clusters_list.iteritems():
            # ==========  Decrypt ssh private key  ========== #
            encrypted_key = value.ssh_private_key
            rsa_decrypt = RSAEncryption()
            prv_key = rsa_decrypt.get_key(rsa_decrypt.prv_key_path)
            decrypted_key = rsa_decrypt.decrypt_private(encrypted_key, prv_key)

            # ==========  Update entity  ========== #
            value.ssh_private_key = decrypted_key
        return dest_clusters_list
Beispiel #19
0
    def _get_nodes(self):
        consul_api = ConsulAPI()
        # Get all PetaSAN nodes[management or storage].
        node_list = consul_api.get_node_list()
        # Get online nodes from consul.
        consul_members = consul_api.get_consul_members()
        petasan_node_list = []
        for i in node_list:
            if not i.is_iscsi:
                continue
            if i.name in consul_members:
                petasan_node_list.append(i.name)

        return petasan_node_list
 def __get_down_node_list(self):
     down_node_list = []
     try:
         con_api = ConsulAPI()
         node_list = con_api.get_node_list()
         consul_members = con_api.get_consul_members()
         for i in node_list:
             if i.name not in consul_members:
                 i.status = NodeStatus.down
                 down_node_list.append(i.name)
         return down_node_list
     except Exception as e:
         logger.exception("error get down node list")
         return down_node_list
Beispiel #21
0
    def set_new_assignments(self, paths_assignment_info):
        logger.info("Set new assignment.")
        if self.get_current_reassignment() is not None:
            raise Exception("There is already running assignment.")

        config_api = ConfigAPI()
        consul_api = ConsulAPI()
        logger.info("Delete old assignments.")
        consul_api.delete_assignments()
        session = consul_api.get_new_session_ID(
            config_api.get_assignment_session_name(),
            configuration().get_node_name(), True)
        if consul_api.lock_key(config_api.get_consul_assignment_path(),
                               session, "root"):
            logger.info("Lock assignment root.")
            for path_assignment_info in paths_assignment_info:
                path_assignment_info.status = ReassignPathStatus.pending
                consul_api.set_path_assignment(
                    path_assignment_info,
                    self._get_node_session(path_assignment_info.target_node))
                logger.info(
                    "New assignment for {} ,disk {}, from node {}  and to node {} with status {}"
                    .format(path_assignment_info.ip,
                            path_assignment_info.disk_id,
                            path_assignment_info.node,
                            path_assignment_info.target_node,
                            path_assignment_info.status))
        else:
            logger.error("Can't lock paths assignment key.")
            raise Exception("Can't lock paths assignment key.")
    def start(self, disk_id, pool):
        try:
            ceph_api = CephAPI()
            consul_api = ConsulAPI()

            attr = ceph_api.read_image_metadata(
                ConfigAPI().get_image_name_prefix() + disk_id, pool)
            petasan_meta = attr.get(ConfigAPI().get_image_meta_key())
            disk_meta = DiskMeta()
            if petasan_meta:
                disk_meta.load_json(petasan_meta)
            else:
                return Status.error

            consul_api.add_disk_resource(disk_meta.id, "disk")
            consul_api.add_disk_pool(disk_meta.id, pool)
            i = 0
            for p in disk_meta.paths:
                i += 1
                consul_api.add_disk_resource(
                    "/".join(["", disk_meta.id, str(i)]), None)

        except Exception as e:
            logger.error("Can not start disk %s" % disk_id)
            logger.exception(e.message)
            return Status.error
        return Status.done
Beispiel #23
0
    def clean_source_node(self, ip, disk_id):
        if not self.update_path(ip, ReassignPathStatus.moving):
            return False

        # pool = CephAPI().get_pool_bydisk(disk_id)
        pool = self._get_pool_by_disk(disk_id)
        if not pool:
            logger.error('Could not find pool for disk ' + disk_id)
            return False

        disk = CephAPI().get_disk_meta(disk_id, pool)
        paths_list = disk.paths
        disk_path = None
        path_index = -1

        for i in xrange(0, len(paths_list)):
            path_str = paths_list[i]
            path = Path()
            path.load_json(json.dumps(path_str))
            if path.ip == ip:
                disk_path = path
                path_index = i
                break
        if disk_path:
            self._clean_iscsi_config(disk_id, path_index, disk.iqn)
            network = Network()
            NetworkAPI().delete_ip(path.ip, path.eth, path.subnet_mask)
            if network.is_ip_configured(ip):
                logger.error(
                    "Move action,cannot clean newtwork config for disk {} path {}."
                    .format(disk_id, path_index))
                self.update_path(ip, ReassignPathStatus.failed)
                return False
            logger.info(
                "Move action,clean newtwork config for disk {} path {}.".
                format(disk_id, path_index))
            key = self.__app_conf.get_consul_disks_path(
            ) + disk_id + "/" + str(path_index + 1)
            consul_api = ConsulAPI()
            session = self._get_node_session(configuration().get_node_name())
            if ConsulAPI().is_path_locked_by_session(key, session):
                consul_api.release_disk_path(key, session, None)
                logger.info("Move action,release disk {} path {}.".format(
                    disk_id, path_index + 1))
        else:
            self.update_path(ip, ReassignPathStatus.failed)
            return False

        return True
Beispiel #24
0
 def get_current_reassignment(self):
     paths = ConsulAPI().get_assignments()
     if paths is not None:
         for ip, path_assignment_info in paths.iteritems():
             if not hasattr(path_assignment_info, "session"):
                 logger.info("Path {} not locked by node.".format(
                     path_assignment_info.ip))
             if not hasattr(
                     path_assignment_info,
                     "session") and path_assignment_info.status not in [
                         ReassignPathStatus.succeeded,
                         ReassignPathStatus.failed
                     ]:
                 path_assignment_info.status = ReassignPathStatus.failed
     return paths
    def get_node_list(self):
        consul_api = ConsulAPI()
        # Get all PetaSAN nodes[management or storage].
        node_list = consul_api.get_node_list()
        # Get online nodes from consul.
        consul_members = consul_api.get_consul_members()
        petasan_node_list = []
        for i in node_list:
            if i.name in consul_members:
                i.status = NodeStatus.up
            else:
                i.status = NodeStatus.down
            petasan_node_list.append(i)

        return petasan_node_list
Beispiel #26
0
    def get_replication_dest_cluster(self, cluster_name):
        consul_api = ConsulAPI()
        destination_cluster = consul_api.get_replication_destination_cluster(
            cluster_name)

        # ==========  Decrypt ssh private key  ========== #
        encrypted_key = destination_cluster.ssh_private_key
        rsa_decrypt = RSAEncryption()
        prv_key = rsa_decrypt.get_key(rsa_decrypt.prv_key_path)
        decrypted_key = rsa_decrypt.decrypt_private(encrypted_key, prv_key)

        # ==========  Update entity  ========== #
        destination_cluster.ssh_private_key = decrypted_key

        return destination_cluster
Beispiel #27
0
    def auto(self, type=1):
        logger.info("User start auto reassignment paths.")
        assignments_stats = self.get_assignments_stats()
        if assignments_stats.is_reassign_busy:
            logger.error("There is already reassignment running.")
            raise Exception("There is already reassignment running.")

        ConsulAPI().drop_all_node_sessions(
            self.__app_conf.get_consul_assignment_path(),
            configuration().get_node_name())
        sleep(3)

        assignments_stats.paths = [
            path for path in assignments_stats.paths
            if len(path.node.strip()) > 0 and path.status == -1
        ]
        self.__context.paths = assignments_stats.paths
        self.__context.nodes = assignments_stats.nodes
        for plugin in self._get_new_plugins_instances(auto_plugins):
            if plugin.is_enable() and plugin.get_plugin_id() == type:
                paths_assignments = plugin.get_new_assignments()
                if len(paths_assignments) == 0:
                    logger.info("There is no node under average.")
                    return
                self.set_new_assignments(paths_assignments)
                break
        self.run()
Beispiel #28
0
    def __wait_before_lock(self, path=None):

        disk_id, path_index = str(path).split("/")
        wait_time = 0
        if path:
            # 1- Calc wait time if path has siblings.
            wait_time = int(self.__app_conf.get_siblings_paths_delay()) * int(
                self.__paths_per_disk_local.get(disk_id, 0))

        logger.debug("Wait time for siblings is {}.".format(wait_time))
        total_nodes = len(ConsulAPI().get_consul_members())
        # 2- Calc average paths per node.
        average_node_paths = float(
            self.__total_cluster_paths) / float(total_nodes)
        # Calc the percent of local paths according to average paths.
        percent = float(self.__paths_per_session.get(self.__session,
                                                     0)) / average_node_paths
        # 3- Calc total wait time
        if self.__last_acquire_succeeded:
            wait_time += int(
                self.__app_conf.get_average_delay_before_lock()) * percent
        else:
            logger.debug("Skipping wait time for average delay.")
        logger.debug(
            "Wait time depending on average and siblings is {}.".format(
                math.ceil(wait_time)))
        sleep(math.ceil(wait_time))
Beispiel #29
0
    def __fencing(self, node_name):
        maintenance = ManageMaintenance()
        if maintenance.get_maintenance_config(
        ).fencing == MaintenanceConfigState.off:
            logger.warning(
                "Fencing action will not fire the admin stopped it,the cluster is in maintenance mode."
            )
            return

        node_list = ConsulAPI().get_node_list()
        for node in node_list:

            if str(node.name) == node_name:
                if Network().ping(node.backend_2_ip):
                    logger.info("This node will stop node {}/{}.".format(
                        node_name, node.backend_2_ip))
                    ssh().call_command(node.backend_2_ip, " poweroff ", 5)
                    break
                elif Network().ping(node.management_ip):
                    logger.info("This node will stop node {}/{}.".format(
                        node_name, node.management_ip))
                    ssh().call_command(node.management_ip, " poweroff ", 5)
                    break
                elif Network().ping(node.backend_1_ip):
                    logger.info("This node will stop node {}/{}.".format(
                        node_name, node.backend_1_ip))
                    ssh().call_command(node.backend_1_ip, " poweroff ", 5)
                    break
Beispiel #30
0
    def _get_pool_by_disk(self, disk_id):
        consul_api = ConsulAPI()
        ceph_api = CephAPI()
        pool = consul_api.get_disk_pool(disk_id)
        if pool:
            logger.info('Found pool:{} for disk:{} via consul'.format(
                pool, disk_id))
            return pool
        pool = ceph_api.get_pool_bydisk(disk_id)
        if pool:
            logger.info('Found pool:{} for disk:{} via ceph'.format(
                pool, disk_id))
            return pool

        logger.error('Could not find pool for disk ' + disk_id)
        return None