def run(self):
        '''
        This function will be executed when we call the start method of any object in our PoolCheckerThread class
        '''
        # Get which ceph user is using this function & get his keyring file path #
        # ====================================================================== #
        ceph_auth = CephAuthenticator()
        cmd = 'ceph pg ls-by-pool {} --format json-pretty {} --cluster {}'.format(
            self.pool, ceph_auth.get_authentication_string(),
            self.cluster_name)
        ret, stdout, stderr = exec_command_ex(cmd)

        if ret != 0:
            if stderr and ('Connection timed out' in stderr
                           or 'error connecting' in stderr):
                logger.error('Error in Ceph Connection cmd:' + cmd)
                raise CephException(CephException.CONNECTION_TIMEOUT,
                                    'ConnectionTimeError')

            logger.error('General error in Ceph cmd:' + cmd)
            raise CephException(CephException.GENERAL_EXCEPTION,
                                'GeneralCephException')

        output = stdout
        pgdp = PGDumpParser()
        pgdp.parse(output)

        self.active_pgs_num = pgdp.active_pgs
        self.active_osds_num = pgdp.active_osds

        return
    def do_connect(self):
        try:
            conf_api = ConfigAPI()

            # Get which ceph user is using this function #
            # ========================================== #
            users = Users()
            user_name = users.get_current_system_user().strip()
            if user_name == "root":
                user_name = "admin"

            # Get ceph user's keyring file path #
            # ================================= #
            ceph_auth = CephAuthenticator()

            cluster_name = configuration().get_cluster_name()

            cluster = rados.Rados(conffile=conf_api.get_ceph_conf_path(cluster_name),
                                  conf=dict(keyring=ceph_auth.get_keyring_path()), rados_id=user_name)
            cluster.connect()

            return cluster

        except Exception as e:
            logger.error("do_connect() Cannot connect to ceph cluster.")
            logger.exception(e.message)

            try:
                cluster.shutdown()
            except Exception as e:
                pass

            return -1
예제 #3
0
def set_disk_metadata(args):
    io_ctx = None
    ceph_api = CephAPI()
    cluster = None

    try:
        cluster = ceph_api.connect()
        io_ctx = cluster.open_ioctx(args.pool)

        # Get which ceph user is using this function & get his keyring file path #
        ceph_auth = CephAuthenticator()

        config = configuration()
        cluster_name = config.get_cluster_name()

        if args.file:
            with open(str(args.file), 'r') as file:
                disk_metadata_str = file.read()

        else:
            disk_metadata = sys.stdin.readlines()
            disk_metadata_str = ''.join(
                str(line)
                for line in disk_metadata)  # converting list to string

        # read object meta :
        cmd = "rbd info " + args.pool + "/" + str(
            args.image) + " " + ceph_auth.get_authentication_string(
            ) + " --cluster " + cluster_name + " | grep rbd_data"
        ret, stdout, stderr = exec_command_ex(cmd)

        if ret != 0:
            if stderr:
                cluster.shutdown()
                print("Cannot get image meta object from rbd header.")

        rbd_data = stdout.rstrip().strip()
        dot_indx = rbd_data.rfind(".")

        image_id = rbd_data[(dot_indx + 1):]

        meta_object = "rbd_header." + image_id
        attr_object = meta_object

        io_ctx.set_xattr(str(attr_object),
                         str(ConfigAPI().get_image_meta_key()),
                         disk_metadata_str)
        io_ctx.close()
        cluster.shutdown()
        sys.exit(0)

    except Exception as e:
        print("Error in executing script function : set_disk_metadata , " +
              str(e.message))
        io_ctx.close()
        cluster.shutdown()
        sys.exit(-1)
예제 #4
0
def read_disks_metadata(args):
    io_ctx = None
    ceph_api = CephAPI()
    cluster = None

    try:
        cluster = ceph_api.connect()
        io_ctx = cluster.open_ioctx(args.pool)

        # Get which ceph user is using this function & get his keyring file path #
        ceph_auth = CephAuthenticator()

        config = configuration()
        cluster_name = config.get_cluster_name()

        cmd = "rbd info " + args.pool + "/" + str(
            args.image) + " " + ceph_auth.get_authentication_string(
            ) + " --cluster " + cluster_name + " | grep rbd_data"

        ret, stdout, stderr = exec_command_ex(cmd)

        if ret != 0:
            if stderr:
                cluster.shutdown()
                print("Cannot get image meta object from rbd header.")
                sys.exit(-1)

        rbd_data = stdout.rstrip().strip()
        dot_indx = rbd_data.rfind(".")

        image_id = rbd_data[(dot_indx + 1):]

        rbd_header_object = "rbd_header." + image_id

        try:
            ret = io_ctx.get_xattr(rbd_header_object, meta_key)
        except:
            ret = io_ctx.get_xattr(rbd_header_object[:-1], meta_key)

        io_ctx.close()
        cluster.shutdown()

        if ret:
            print(ret)
            sys.stdout.flush()
            sys.exit(0)
        else:
            # Non-PetaSAN Disk :
            sys.exit(-1)

    except Exception as e:
        print("Error in executing script function : read_disks_metadata , " +
              str(e.message))
        io_ctx.close()
        cluster.shutdown()
        sys.exit(-1)
예제 #5
0
    def rollback_to_snapshot(self, pool_name, image_name, snap_name):
        # Get which ceph user is using this function & get his keyring file path #
        ceph_auth = CephAuthenticator()

        config = configuration()
        cluster_name = config.get_cluster_name()
        cmd = 'rbd snap rollback {}/{}@{} {} --cluster {}'.format(pool_name, image_name, snap_name, ceph_auth.get_authentication_string(), cluster_name)

        ret, stdout, stderr = exec_command_ex(cmd)
        if ret != 0:
            logger.error('General error in Ceph cmd : ' + cmd)
            raise CephException(CephException.GENERAL_EXCEPTION, 'GeneralCephException')

        return True
예제 #6
0
    def get_all_images(self, pool_name):
        # Get which ceph user is using this function & get his keyring file path #
        ceph_auth = CephAuthenticator()

        images = []
        config = configuration()
        cluster_name = config.get_cluster_name()

        cmd = 'rbd ls {} {} --cluster {}'.format(pool_name, ceph_auth.get_authentication_string(),cluster_name)

        ret, stdout, stderr = exec_command_ex(cmd)
        if ret != 0:
            logger.error('General error in Ceph cmd : ' + cmd)
            raise CephException(CephException.GENERAL_EXCEPTION, 'GeneralCephException')

        ls = stdout.splitlines()

        for image in ls:
            images.append(image)

        return images
def readImageMetaData(ioctx, image, pool):
    ret = None

    # Get which ceph user is using this function & get his keyring file path #
    ceph_auth = CephAuthenticator()

    config = configuration()
    cluster_name = config.get_cluster_name()

    try:
        cmd = "rbd info " + pool + "/" + str(
            image) + " " + ceph_auth.get_authentication_string(
            ) + " --cluster " + cluster_name + " | grep rbd_data"
        ret, stdout, stderr = exec_command_ex(cmd)

        if ret != 0:
            if stderr:
                logger.error("Cannot get image meta object from rbd header.")
                return None

        rbd_data = stdout.rstrip().strip()
        dot_indx = rbd_data.rfind(".")

        image_id = rbd_data[(dot_indx + 1):]

        rbd_header_object = "rbd_header." + image_id

        try:
            ret = ioctx.get_xattr(rbd_header_object, meta_key)
        except:
            ret = ioctx.get_xattr(rbd_header_object[:-1], meta_key)

    except:
        return None

    return ret
def clear_disk(args):
    disk_id = args.disk_id
    image_name = "image-" + disk_id

    try:
        # Get which ceph user is using this function & get his keyring file path #
        # ---------------------------------------------------------------------- #
        ceph_auth = CephAuthenticator()

        config = configuration()
        cluster_name = config.get_cluster_name()

        # Get disk metadata :
        # -------------------
        ceph_api = CephAPI()
        disk_metadata = ceph_api.get_diskmeta(disk_id)

        # Get pool name :
        # ---------------
        pool_name = disk_metadata.pool
        data_pool = ""

        # Check if disk has been created on replicated pool or erasure pool :
        # -------------------------------------------------------------------
        if len(disk_metadata.data_pool) > 0:
            data_pool = disk_metadata.data_pool

        tmp_image_name = "tmp_disk_" + disk_metadata.id

        # (1.) Check if a previous tmp image for this disk is still existed :
        # ===================================================================
        images_list = ceph_api.get_all_images(pool_name)

        for image in images_list:
            if tmp_image_name in image:
                # Delete image #
                cmd = "rbd rm {}/{} {} --cluster {}".format(
                    pool_name, image, ceph_auth.get_authentication_string(),
                    cluster_name)
                if not call_cmd(cmd):
                    print(
                        "Error : clear_disk.py script : cannot remove tmp image ,\ncmd : "
                        + cmd)
                    sys.exit(-1)

        print(
            "Stage 1 :\n\tCheck if a previous tmp image for this disk is still existed > (Completed)"
        )
        logger.info(
            "Stage 1 :\n\tCheck if a previous tmp image for this disk is still existed > (Completed)"
        )

        # (2.) Stop old disk :
        # ====================
        consul_api = ConsulAPI()
        kv = consul_api.find_disk(disk_id)
        if kv is not None:
            manage_disk = ManageDisk()
            status = manage_disk.stop(disk_id)

            if status != Status.done:
                print('Error : Cannot stop disk , id = ' + disk_id)
                sys.exit(-1)

            print("Stage 2 :\n\tStop old disk > (Completed)")
            logger.info("Stage 2 :\n\tStop old disk > (Completed)")
            time.sleep(3)

            # (3.) Check if old disk is stopped or not :
            # ==========================================
            if len(data_pool) > 0:
                pool_type = "erasure"
                _confirm_disk_stopped(data_pool, disk_id, pool_type)
            else:
                pool_type = "replicated"
                _confirm_disk_stopped(pool_name, disk_id, pool_type)

            print(
                "Stage 3 :\n\tConfirm that disk is completely stopped > (Completed)"
            )
            logger.info(
                "Stage 3 :\n\tConfirm that disk is completely stopped > (Completed)"
            )

        else:
            print("Stage 2 :\n\tStop old disk > (Completed)")
            logger.info("Stage 2 :\n\tStop old disk > (Completed)")

            print(
                "Stage 3 :\n\tConfirm that disk is completely stopped > (Completed)"
            )
            logger.info(
                "Stage 3 :\n\tConfirm that disk is completely stopped > (Completed)"
            )
            print('\tclear_disk.py script : disk {} is already stopped'.format(
                disk_id))

        # (4.) Create a tmp image (not PetaSAN image) :
        # =============================================
        # Generate a random value between 1 and 99999 #
        random_no = str(random.randint(1, 100000))
        tmp_image_name = tmp_image_name + "_" + str(random_no)
        image_size = disk_metadata.size * 1024

        if len(data_pool) > 0:
            cmd = "rbd create {}/{} --size {} --data-pool {} {} --cluster {}".format(
                pool_name, tmp_image_name, image_size, data_pool,
                ceph_auth.get_authentication_string(), cluster_name)
        else:
            cmd = "rbd create {}/{} --size {} {} --cluster {}".format(
                pool_name, tmp_image_name, image_size,
                ceph_auth.get_authentication_string(), cluster_name)

        if not call_cmd(cmd):
            print(
                "Error : clear_disk.py script : cannot create new tmp image ,\ncmd : "
                + cmd)
            sys.exit(-1)

        print("Stage 4 :\n\tCreate a tmp image called ( " + tmp_image_name +
              " ) > (Completed)")
        logger.info("Stage 4 :\n\tCreate a tmp image called ( " +
                    tmp_image_name + " ) > (Completed)")

        # (5.) Run script to copy "old disk" metadata to new "tmp_disk" :
        # ===============================================================
        metadata_script_file = ConfigAPI().get_disk_meta_script_path()

        # Function : read_disks_metadata :
        parser_key_1 = "read"
        arg_1 = "--image"
        arg_2 = "--pool"

        # Function : set_disk_metadata :
        parser_key_2 = "write"
        arg_3 = "--file"

        cmd = metadata_script_file + " " + parser_key_1 + " " + arg_1 + " " + image_name + " " + arg_2 + " " + pool_name +\
              " | " + metadata_script_file + " " + parser_key_2 + " " + arg_1 + " " + tmp_image_name + " " + arg_2 + " " + pool_name

        if not call_cmd(cmd):
            print(
                "Error : clear_disk.py script : cannot copy metadata from old disk to new tmp image ,\ncmd : "
                + cmd)
            sys.exit(-1)

        print(
            "Stage 5 :\n\tRun script to copy 'old disk' metadata to new 'tmp_disk' > (Completed)"
        )
        logger.info(
            "Stage 5 :\n\tRun script to copy 'old disk' metadata to new 'tmp_disk' > (Completed)"
        )

        time.sleep(3)

        # (6.) Remove metadata of old disk :
        # ===========================================================
        old_image_name = str(ceph_api.conf_api.get_image_name_prefix() +
                             disk_metadata.id)
        confirm = ceph_api.remove_disk_metadata(old_image_name,
                                                disk_metadata.pool)

        if not confirm:
            print(
                "Error : clear_disk.py script : cannot remove metadata of old disk"
            )
            # sys.exit(-1)

        print("Stage 6 :\n\tRemove metadata of old disk > (Completed)")
        logger.info("Stage 6 :\n\tRemove metadata of old disk > (Completed)")

        # (7.) Rename old disk image name with "deleted-" + disk_id + random_no:
        # ======================================================================
        new_image_name = "deleted-" + disk_metadata.id + "-" + random_no
        cmd = "rbd mv {}/{} {} {} --cluster {}".format(
            pool_name, image_name, new_image_name,
            ceph_auth.get_authentication_string(), cluster_name)
        if not call_cmd(cmd):
            print(
                "Error : clear_disk.py script : cannot rename old image from {} to {} ,\ncmd : {}"
                .format(image_name, new_image_name, cmd))
            sys.exit(-1)

        print("Stage 7 :\n\tRename old disk image name with ( " +
              new_image_name + " ) > (Completed)")
        logger.info("Stage 7 :\n\tRename old disk image name with ( " +
                    new_image_name + " ) > (Completed)")

        time.sleep(5)

        # (8.) Rename "tmp_disk" with old disk image name :
        # =================================================
        cmd = "rbd mv {}/{} {} {} --cluster {}".format(
            pool_name, tmp_image_name, image_name,
            ceph_auth.get_authentication_string(), cluster_name)
        if not call_cmd(cmd):
            print(
                "Error : clear_disk.py script : cannot rename \"tmp_disk\" from {} to {} ,\ncmd : {}"
                .format(tmp_image_name, image_name, cmd))
            sys.exit(-1)

        print(
            "Stage 8 :\n\tRename 'tmp_disk' with old disk image name > (Completed)"
        )
        logger.info(
            "Stage 8 :\n\tRename 'tmp_disk' with old disk image name > (Completed)"
        )

        time.sleep(5)

        jm = JobManager()
        id = jm.add_job(JobType.DELETE_DISK, new_image_name + ' ' + pool_name)

        print("Stage 9 :\n\tStart a job to remove old disk image , job id = " +
              str(id))
        logger.info(
            "Stage 9 :\n\tStart a job to remove old disk image , job id = " +
            str(id))

        sys.exit(0)

    except PoolException as e:
        print("Error : PoolException , {}".format(e.message))
        logger.error("Clear Disk Error : PoolException , {}".format(e.message))
        sys.exit(-1)

    except DiskListException as e:
        print("Error : DiskListException , {}".format(e.message))
        logger.error("Clear Disk Error : DiskListException , {}".format(
            e.message))
        sys.exit(-1)

    except CephException as e:
        if e.id == CephException.GENERAL_EXCEPTION:
            print("Error : CephException , {}".format(e.message))
        logger.error("Clear Disk Error : CephException , {}".format(e.message))
        sys.exit(-1)

    except MetadataException as e:
        print("Error : MetadataException , {}".format(e.message))
        logger.error("Clear Disk Error : MetadataException , {}".format(
            e.message))
        sys.exit(-1)

    except Exception as e:
        print("Error : Exception , {}".format(e.message))
        logger.error("Clear Disk Error : Exception , {}".format(e.message))
        sys.exit(-1)
예제 #9
0
    def _read_file_lines(self, backup=False):
        # Get which ceph user is using this function & get his keyring file path #
        ceph_auth = CephAuthenticator()

        call_cmd('mkdir -p ' + self.CRUSH_SAVE_PATH)
        cluster_name = configuration().get_cluster_name()

        rand = self._get_rand_string(6)
        bin_file = self.CRUSH_SAVE_PATH + 'crushmap-tmp-' + rand + '.bin'
        txt_file = self.CRUSH_SAVE_PATH + 'crushmap-tmp-' + rand + '.txt'

        cmd = 'ceph osd getcrushmap -o ' + bin_file + ' ' + ceph_auth.get_authentication_string(
        ) + ' --cluster ' + cluster_name
        ret, stdout, stderr = exec_command_ex(cmd)
        if ret != 0:
            if stderr and ('Connection timed out' in stderr
                           or 'error connecting' in stderr):
                logger.error('Error in Ceph Connection cmd:' + cmd)
                raise CephException(CephException.CONNECTION_TIMEOUT,
                                    'Connection Timeout Error')

            logger.error('General error in Ceph cmd:' + cmd + ' error:' +
                         stderr)
            raise CephException(CephException.GENERAL_EXCEPTION,
                                'General Ceph Error')

        cmd = 'crushtool -d ' + bin_file + ' -o ' + txt_file
        if not call_cmd(cmd):
            raise CrushException(CrushException.DECOMPILE,
                                 'Crush Decompile Error')

        with open(txt_file, 'r') as f:
            lines = f.readlines()
        lines = [line.strip() for line in lines]

        section = 'start'
        # for section tags see src/crush/CrushCompiler.cc decompile

        for line in lines:
            if len(line) == 0:
                continue
            if line.startswith('# begin crush map'):
                section = 'tunables'
                continue
            elif line.startswith('# devices'):
                section = 'devices'
                continue
            elif line.startswith('# types'):
                section = 'types'
                continue
            elif line.startswith('# buckets'):
                section = 'buckets'
                continue
            elif line.startswith('# rules'):
                section = 'rules'
                continue

            elif line.startswith('# choose_args'):
                section = 'end'
                break
            elif line.startswith('# end crush map'):
                section = 'end'
                break

            if section == 'tunables':
                self.lines_tunables.append(line)
            elif section == 'devices':
                self.lines_devices.append(line)
            elif section == 'types':
                self.lines_types.append(line)
            elif section == 'buckets':
                self.lines_buckets.append(line)
            elif section == 'rules':
                self.lines_rules.append(line)

        if backup:
            self._backup(txt_file)

        call_cmd('rm ' + txt_file)
        call_cmd('rm ' + bin_file)
    def get_active_pools(self):
        active_pools = []
        ceph_api = CephAPI()
        cluster = None

        try:
            # Get which ceph user is using this function #
            # ========================================== #
            # users = Users()
            # user_name = users.get_current_system_user().strip()
            # if user_name == "root":
            #     user_name = "admin"
            # # Get ceph user's keyring file path #
            # # ================================= #
            # cluster = rados.Rados(conffile=ConfigAPI().get_ceph_conf_path(cluster_name), conf=dict(keyring=ceph_auth.get_keyring_path()), rados_id=user_name)
            # cluster.connect()

            cluster_name = configuration().get_cluster_name()
            ceph_auth = CephAuthenticator()
            cluster = ceph_api.connect()

            # Get all list of pools:
            pools = cluster.list_pools()
            if not pools or len(pools) == 0:
                active_pools = []

            # Create a list of threads:
            threads = []
            for pool in pools:
                thread = PoolCheckerThread(cluster_name, pool)
                thread.setDaemon(True)
                thread.start()  # Start running the threads!
                threads.append(thread)

            end_time = time() + self.timeout
            for thread in threads:
                wait = end_time - time()
                if wait < 0:
                    break
                thread.join(
                    wait)  # Wait for a timeout for the threads to finish

            for thread in threads:
                # Get pg_num for current thread pool:
                cmd = 'ceph osd pool get {} pg_num {} --cluster {}'.format(
                    thread.pool, ceph_auth.get_authentication_string(),
                    thread.cluster_name)
                ret, stdout, stderr = exec_command_ex(cmd)

                if ret != 0:
                    if stderr and ('Connection timed out' in stderr
                                   or 'error connecting' in stderr):
                        logger.error('Error in Ceph Connection cmd:' + cmd)
                        cluster.shutdown()
                        raise CephException(CephException.CONNECTION_TIMEOUT,
                                            'ConnectionTimeError')

                    logger.error('General error in Ceph cmd:' + cmd)
                    cluster.shutdown()
                    raise CephException(CephException.GENERAL_EXCEPTION,
                                        'GeneralCephException')

                output = stdout
                output_ls = output.split()
                pool_pg_num = output_ls[1]

                if not thread.is_alive() and thread.active_pgs_num > 0:
                    if thread.active_pgs_num == int(pool_pg_num):
                        active_pools.append(thread.pool)

            active_pools.sort()

        except Exception as e:
            logger.error("PoolChecker error : " + e.message)

        cluster.shutdown()

        return active_pools