Example #1
0
    def save_current_tunings(self, ceph, lio, post_script, storage_engine):
        config_api = ConfigAPI()
        path = config_api.get_current_tunings_path()

        if not os.path.exists(path):
            os.makedirs(path)
        with open(path + config_api.get_ceph_tunings_file_name(), 'w', ) as f:
            f.write(ceph)

        with open(path + config_api.get_lio_tunings_file_name(), 'w', ) as f:
            f.write(lio)

        with open(path + config_api.get_post_deploy_script_file_name(), 'w', ) as f:
            f.write(post_script)
        logger.info("Current tuning configurations saved.")

        # Save "storage_engine" in Cluster info #
        # ------------------------------------- #
        try:
            ci = self.get_cluster_info()
            ci.storage_engine = storage_engine
            self.set_cluster_network_info(ci)

        except Exception as ex:
            logger.error("Cannot add storage engine to cluster info , {}".format(ex.message))
Example #2
0
    def _clean_iscsi_config(self, disk_id, path_index, iqn):

        logger.debug("Move action ,start clean disk {} path {}.".format(
            disk_id, path_index))

        lio_api = LioAPI()

        try:

            # Get tpgs for iqn.
            tpgs = lio_api.get_iqns_with_enabled_tpgs().get(iqn, None)
            if not iqn or not tpgs or len(tpgs) == 0:
                logger.info("Move action ,could not find ips for %s " %
                            disk_id)
            # Remove the assigned ips from our interfaces
            elif tpgs and len(tpgs) > 0:
                # Get assigned ips for each path.
                for tpg, ips in tpgs.iteritems():
                    if tpg == str(path_index + 1):
                        lio_api.disable_path(iqn, tpg)
                        logger.info(
                            "Move action,cleaned disk {} path {}.".format(
                                disk_id, path_index))
                        break
        except Exception as e:
            logger.error("Move action,could not clean disk path for %s" %
                         disk_id)
            return False
        logger.debug("Move action end clean disk {} path {}.".format(
            disk_id, path_index))
        return True
def path_host(args):
    logger.info("Reassignment paths script invoked to run clean action.")
    if MangePathAssignment().clean_source_node(args.ip, args.disk_id):
        print "0"
        return

    print "-1"
 def run(self):
     try:
         status = False
         consul = ConsulAPI()
         failed_jobs = consul.get_replication_failed_jobs()
         if len(failed_jobs) > 0:
             failed_jobs_str = ""
             for job_id, job_info in failed_jobs.iteritems():
                 failed_jobs_str += "\n job id: " + job_id + " job name: " + job_info.job_name
                 status = consul.delete_failed_job(job_id)
             result = Result()
             result.plugin_name = self.get_plugin_name()
             result.title = gettext("core_message_notify_failed_jobs_title")
             result.message = '\n'.join(
                 gettext("core_message_notify_failed_jobs_body").split(
                     "\\n")).format(failed_jobs_str)
             self.__context.results.append(result)
             logger.info(result.message)
             logger.info("status of deleting failed jobs from consul is " +
                         str(status))
     except Exception as e:
         logger.exception(e)
         logger.error(
             "An error occurred while ReplicationNotificationPlugin was running."
         )
 def delete_cache(self, node_name, disk_name):
     ssh_obj = ssh()
     cmd = "python {} -disk_name {}".format(
         ConfigAPI().get_admin_delete_cache_job_script(), disk_name)
     stdout, stderr = ssh_obj.exec_command(node_name, cmd)
     logger.info("Start delete cache job {} ".format(stdout))
     return stdout
def manager(args):
    try:

        logger.info("Benchmark manager cmd. ")
        clients = args.c.split(',')
        if len(clients) < 1:
            print "No clients set."
            sys.exit(-1)

        cleanup = True
        if args.cleanup == "0":
            cleanup = False

        result = Benchmark().manager(args.type, args.d, args.t, clients,
                                     args.p, cleanup)

        result = result.write_json()
        # Write job passed flag
        sys.stdout.write(Benchmark().output_split_text)
        # Write output
        sys.stdout.write(result)

    except Exception as ex:
        logger.exception(ex.message)
        sys.exit(-1)
    sys.exit(0)
Example #7
0
def delete_osd_from_crush_map(osd_id):
    cluster_name = configuration().get_cluster_name()
    logger.info("Start remove osd.{} from crush map".format(osd_id))
    is_executing_without_err = True

    if not call_cmd("ceph --cluster {} osd out osd.{}".format(cluster_name, osd_id)):
        logger.error("Error executing ceph osd out osd.{}".format(osd_id))
        is_executing_without_err = False

    if not call_cmd("ceph --cluster {} osd crush remove osd.{}".format(cluster_name, osd_id)):
        logger.error("Error executing ceph osd crush remove osd.{}".format(osd_id))
        is_executing_without_err = False

    if not call_cmd("ceph --cluster {} auth del osd.{}".format(cluster_name, osd_id)):
        logger.error("Error executing ceph auth del osd.{}".format(osd_id))
        is_executing_without_err = False

    # Try to delete the osd completely from ceph in case the osd is up the next command will not execute
    if not call_cmd("ceph --cluster {} osd rm osd.{}".format(cluster_name, osd_id)):
        logger.warning("The osd still up you need to stop osd service of osd.{}".format(osd_id))

    if is_executing_without_err:
        logger.info("osd.{} is removed from crush map".format(osd_id))
    else:
        logger.warning("osd.{} is removed from crush map".format(osd_id))
def test_active_clean_old():
    cluster_name = configuration().get_cluster_name()
    sleeps = [10, 15, 20, 25, 30, 40]
    tries = 5

    while tries:
        status = False
        try:
            out, err = exec_command(
                "ceph --cluster {} -f json pg stat".format(cluster_name))
            ceph_pg_stat = str(out).replace("'", '')
            ceph_pg_stat = json.loads(ceph_pg_stat)
            logger.info("Ceph status is " +
                        ceph_pg_stat['num_pg_by_state'][0]['name'])

            if str(ceph_pg_stat['num_pg_by_state'][0]
                   ['name']) == 'active+clean':
                status = True
            else:
                status = False
        except Exception as e:
            logger.error("Get ceph status returned error.\n" + e.message)

        if not status:
            tries -= 1
            sleep_seconds = sleeps.pop()
            logger.warning(
                'waiting %s seconds before retrying to check active+clean status',
                sleep_seconds)
            time.sleep(sleep_seconds)
        else:
            # Nautilius call pool init when active :
            call_cmd('rbd pool init rbd')
            break
Example #9
0
    def check_mount(self):

        while True:
            if 2 < len(configuration().get_cluster_info().management_nodes):
                break
            sleep(30)

        cluster_info = configuration().get_cluster_info()

        #ip1 = cluster_info.management_nodes[0]['management_ip']
        #ip2 = cluster_info.management_nodes[1]['management_ip']

        ip1 = cluster_info.management_nodes[0]['backend_1_ip']
        ip2 = cluster_info.management_nodes[1]['backend_1_ip']

        cmd_mount = 'mount -t glusterfs  -o backupvolfile-server=' + ip2
        cmd_mount += ' ' + ip1 + ':' + GFS_VOL_NAME
        cmd_mount += ' ' + GFS_MOUNT_PATH + ' >/dev/null 2>&1'

        cmd_mount_test = 'mount | grep ' + GFS_MOUNT_PATH + ' >/dev/null 2>&1'

        while True:
            if subprocess.call(cmd_mount_test, shell=True) != 0:
                logger.info("GlusterFS mount attempt ")
                subprocess.call(cmd_mount, shell=True)
            sleep(30)

        return
Example #10
0
def get_next_partition_index(dev):
    """
    Get the next free partition index on a given device.

    :return: Index number (> 1 if there is already a partition on the device)
    or 1 if there is no partition table.
    """
    try:
        output, err = exec_command('parted --machine -- {} print'.format(dev))
        lines = output
    except subprocess.CalledProcessError as e:
        logger.info('cannot read partition index; assume it '
                    'isn\'t present\n (Error: %s)' % e)
        return 1

    if not lines:
        raise logger.error('parted failed to output anything')
    logger.debug('get_free_partition_index: analyzing ' + lines)
    if ('CHS;' not in lines and
                'CYL;' not in lines and
                'BYT;' not in lines):
        raise logger.error('parted output expected to contain one of ' +
                           'CHH; CYL; or BYT; : ' + lines)
    if os.path.realpath(dev) not in lines:
        raise logger.error('parted output expected to contain ' + dev + ': ' + lines)
    _, partitions = lines.split(os.path.realpath(dev))
    numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
    partition_numbers = map(int, numbers_as_strings)
    if partition_numbers:
        return max(partition_numbers) + 1
    else:
        return 1
Example #11
0
def create_osds_remote(remote_mons_ips_ls):
    config_api = ConfigAPI()
    remote_status = StatusReport()
    for remot_mon in remote_mons_ips_ls:
        ssh_obj = ssh()
        status = StatusReport()

        out, err = ssh_obj.exec_command(
            remot_mon,
            " python {} ".format(config_api.get_node_create_osd_script_path()))

        logger.info(" ".join([remot_mon, out]))

        if "/report/" in out:  # To avoid -- IndexError: list index out of range
            status.load_json(str(out.split("/report/")[1]))
        else:
            if err:
                status.load_json("Status Report Error , error : {}".format(
                    str(err)))
            else:
                status.load_json("Connection Error.")

        remote_status.failed_tasks.extend(status.failed_tasks)

        if not status.success:
            logger.error(
                "Cannot create osd for remote node {}".format(remot_mon))
            remote_status.success = False
            return remote_status

    return remote_status
Example #12
0
    def auto(self, type=1):
        logger.info("User start auto reassignment paths.")
        assignments_stats = self.get_assignments_stats()
        if assignments_stats.is_reassign_busy:
            logger.error("There is already reassignment running.")
            raise Exception("There is already reassignment running.")

        ConsulAPI().drop_all_node_sessions(
            self.__app_conf.get_consul_assignment_path(),
            configuration().get_node_name())
        sleep(3)

        assignments_stats.paths = [
            path for path in assignments_stats.paths
            if len(path.node.strip()) > 0 and path.status == -1
        ]
        self.__context.paths = assignments_stats.paths
        self.__context.nodes = assignments_stats.nodes
        for plugin in self._get_new_plugins_instances(auto_plugins):
            if plugin.is_enable() and plugin.get_plugin_id() == type:
                paths_assignments = plugin.get_new_assignments()
                if len(paths_assignments) == 0:
                    logger.info("There is no node under average.")
                    return
                self.set_new_assignments(paths_assignments)
                break
        self.run()
 def delete_osd(self, node_name, disk_name, osd_id):
     ssh_obj = ssh()
     cmd = "python {} -id {}  -disk_name {}".format(
         ConfigAPI().get_admin_delete_osd_job_script(), osd_id, disk_name)
     # stdout,stderr =exec_command(cmd)
     stdout, stderr = ssh_obj.exec_command(node_name, cmd)
     logger.info("Start delete osd job {} ".format(stdout))
     return stdout
 def collect_local_node_state(self):
     script_path = ConfigAPI().get_collect_state_script()
     node_name=configuration().get_node_name()
     command = "python {}".format(script_path)
     if call_cmd(command):
         logger.info("execute collect script on {}".format(node_name))
         return True
     return False
Example #15
0
 def run_post_deploy_script(self):
     config_api = ConfigAPI()
     path = config_api.get_current_tunings_path(
     ) + config_api.get_post_deploy_script_file_name()
     if os.path.exists(path):
         call_cmd("chmod +x {}".format(path))
         logger.info("Run post deploy script.")
         call_cmd(path)
 def add_journal(self, node_name, disk_name):
     ssh_obj = ssh()
     cmd = "python {} -disk_name {}".format(
         ConfigAPI().get_admin_add_journal_job_script(), disk_name)
     # stdout,stderr =exec_command(cmd)# for test local
     stdout, stderr = ssh_obj.exec_command(node_name, cmd)
     logger.info("Start add journal job {} ".format(stdout))
     return stdout
Example #17
0
def clean_ceph():
    cluster_conf = configuration()
    current_node_info = cluster_conf.get_node_info()
    current_node_name = current_node_info.name
    remote_mons_ips = cluster_conf.get_remote_ips(current_node_name)

    logger.info("Starting clean_ceph")
    clean_ceph_local()
    clean_ceph_remote(remote_mons_ips)
Example #18
0
def clean_consul_remote():
    conf = configuration()
    ssh_exec = ssh()
    for ip in conf.get_remote_ips(conf.get_node_name()):
        logger.info("Trying to clean Consul on {}".format(ip))
        ssh_exec.call_command(
            ip, 'python ' + ConfigAPI().get_consul_stop_script_path())
        ssh_exec.call_command(
            ip, 'python ' + ConfigAPI().get_consul_clean_script_path())
Example #19
0
def set_cluster_interface(bonds=[]):
    if (bonds == None or len(bonds) == 0):
        return
    config = configuration()
    cluster_info = config.get_cluster_info()

    cluster_info.bonds = bonds
    config.set_cluster_network_info(cluster_info)
    logger.info("Updated cluster bonds to 1.3 successfully.")
    def start_action(self):
        logger.info('ClusterLeader start action')
        SharedFS().block_till_mounted()
        logger.info('ClusterLeader starting services')
        subprocess.call('/opt/petasan/scripts/stats-setup.sh', shell=True)
        subprocess.call('/opt/petasan/scripts/stats-start.sh', shell=True)

        subprocess.call('systemctl start petasan-notification', shell=True)
        return
Example #21
0
def manage_disk_add_disk(name):
    manage_disk = ManageDisk()
    disk_meta = DiskMeta()
    disk_meta.disk_name = "sanatech" + str(name)
    disk_meta.size = 1
    #disk_meta.password="******"
    #disk_meta.user="******"
    status = manage_disk.add_disk(disk_meta, None, PathType.both, 2)
    logger.info(status)
Example #22
0
def clean(args):
    try:
        logger.info("Benchmark clean cmd. ")
        pool = args.p
        CephAPI().rados_benchmark_clean(pool)

    except Exception as ex:
        logger.exception(ex.message)
        sys.exit(-1)
    sys.exit(0)
 def __get_wwn(self, disk_id):
     wwn = disk_id
     app_config = ConfigAPI().read_app_config()
     if app_config.wwn_fsid_tag:
         logger.info('include_wwn_fsid_tag() is true')
         fsid = ceph_disk.get_fsid(configuration().get_cluster_name())
         fsid_split = fsid[:8]
         wwn = fsid_split + disk_id
     logger.info('add disk wwn is ' + wwn)
     return wwn
Example #24
0
def get_list_status():
    ceph_manage = ManageDisk()
    ceph_manage.get_disks_meta()
    for i in ceph_manage.get_disks_meta():
        if "mostafa" == i.user:
            logger.info("disk found")
        try:
            print i.user, i.disk_name, i.ip, i.ip2, i.subnet1, i.subnet2, i.password, i.id, i.status, i.iqn, i.size
        except Exception as x:
            pass
def server(args):

    try:
        logger.info("Reassignment paths script invoked to run process action.")
        MangePathAssignment().process()
    except Exception as ex:
        logger.error("error process reassignments actions.")
        logger.exception(ex.message)
        print(-1)
        sys.exit(-1)
def check_process_id(process_id):
    if request.method == 'GET':
        try:
            benchmark = Benchmark()
            is_complete = benchmark.is_test_complete(process_id)
            if is_complete:
                logger.info("Benchmark Test Completed")
            json_data = json.dumps(is_complete)
            return json_data
        except Exception as e:
            return False
Example #27
0
    def write_maintenance_status(self, maintenance_status):

        try:
            j = maintenance_status.write_json()
            cons = BaseAPI()
            cons.write_value(CONSUL_MAINTENANCE_STATUS_PATH, j)
            logger.info("Success saving application config ")
        except Exception as e:
            logger.error("Error saving application config " + e.message)
            raise ConfigWriteException("Error saving application config")
        return
Example #28
0
 def _get_node_session(self, node_name):
     logger.info(self.__node_session_dict)
     if self.__session_dict:
         session = self.__node_session_dict.get(node_name)
         if session is not None:
             return session
         else:
             for sess, node in self.__session_dict.iteritems():
                 if node.Node == node_name:
                     self.__node_session_dict[node] = sess
                     return sess
Example #29
0
def __test_leaders():
    sleeps = [15, 15, 10, 10, 5, 5]
    tries = 5

    leaders_in_cluster = []
    cluster_members = []

    cluster_conf = configuration()
    current_cluster_info = cluster_conf.get_cluster_info()

    current_node_info = cluster_conf.get_node_info()
    cluster_members.append(current_node_info.name)

    for i in current_cluster_info.management_nodes:
        node_info = NodeInfo()
        node_info.load_json(json.dumps(i))
        cluster_members.append(node_info.name)

    status_report = StatusReport()

    for host in cluster_members:
        while tries:
            status = None
            try:
                status = _leader_status_check_(host)
            except Exception as exc:
                logger.error("Error Connecting to consul for leader check.")
            # if not has_reached_quorum:
            if not status:
                tries -= 1
                sleep_seconds = sleeps.pop()
                logger.warning('waiting %s seconds before retrying',
                               sleep_seconds)
                # time.sleep(sleep_seconds)
                sleep(sleep_seconds)
                status_report.success = False
            else:
                leaders_in_cluster.append(host)
                logger.info('Cluster Node {} joined the cluster and is alive' +
                            host)
                status_report.success = True
                break
        if status_report.success is False:
            status_report.failed_tasks.append(
                'core_consul_deploy_build_node_fail_join_cluster_not_alive' +
                "%" + str(host))
    if leaders_in_cluster == cluster_members:
        logger.info("Consul leaders are ready")
        status_report.success = True
        return status_report

    else:
        logger.error("Consul leaders are not ready")
        return status_report
Example #30
0
def storage(args):
    job_manager = JobManager()
    params = '-d {} '.format(args.d)
    for j in job_manager.get_running_job_list():
        if j.type == JobType.STORAGELOAD :
            logger.info("Cannot start storage load job for 'sar',")
            print("-1")
            return

    print( job_manager.add_job(JobType.STORAGELOAD,params))
    logger.info("Start storage load job for 'sar'")
    sys.exit(0)