def run():
    dbcfgs = json.loads(dbcfgs_json)

    nodes = dbcfgs['node_list'].split(',')
    scratch_locs = dbcfgs['scratch_locs'].split(',')

    # this script is running by trafodion user, so get sqroot from env
    traf_home = os.environ['TRAF_HOME']
    if traf_home == '': err('TRAF_HOME var is empty')
    sqconfig_file = traf_home + '/sql/scripts/sqconfig'
    sqconfig_persist_file = traf_home + '/sql/scripts/sqconfig.persist'

    core, processor = run_cmd("lscpu|grep -E '(^CPU\(s\)|^Socket\(s\))'|awk '{print $2}'").split('\n')[:2]
    core = int(core)-1 if int(core) <= 256 else 255

    lines = ['begin node\n']
    if len(nodes) == 1:
        lines.append('_virtualnodes 2\n')
    else:
        for node_id, node in enumerate(nodes):
            line = 'node-id=%s;node-name=%s;cores=0-%d;processors=%s;roles=connection,aggregation,storage\n' % (node_id, node, core, processor)
            lines.append(line)

    lines.append('end node\n')

    # write out the node section
    with open(sqconfig_file, 'w') as f:
        f.writelines(lines)

    print 'sqconfig generated successfully!'

    run_cmd('sqgen')

    print 'sqgen ran successfully!'
Beispiel #2
0
    def wait_for_discovered_blades(self):
        log('Wait for discovered blades')
        discovered_macs = []
        restart_times = BLADE_RESTART_TIMES

        for blade in self.node_ids:
            self.blade_node_dict[blade] = None

        with self.ssh:
            all_discovered = self.discovery_waiting_loop(discovered_macs)

        while not all_discovered and restart_times != 0:
            restart_times -= 1
            for blade in self.get_not_discovered_blades():
                self.dha.node_reset(blade)
            with self.ssh:
                all_discovered = self.discovery_waiting_loop(discovered_macs)

        if not all_discovered:
            err('Not all blades have been discovered: %s' %
                self.not_discovered_blades_summary())

        with io.open(self.updated_dea_file) as stream:
            updated_dea = yaml.load(stream)
        updated_dea.update({'blade_node_map': self.blade_node_dict})
        with io.open(self.updated_dea_file, 'w') as stream:
            yaml.dump(updated_dea, stream, default_flow_style=False)
Beispiel #3
0
    def dumpToFile(self, msg = None):
        # XXX make this a method of RunningTask
        d = 'unknown-benchmarks'
        if not os.path.exists(d):
            os.mkdir(d)

        d = '{0}/{1}-{2}'.format(d, configs['tool'],
                                 configs['started_at'])
        if not os.path.exists(d):
            os.mkdir(d)

        fname = '{0}/{1}-{2}.log'.format(d, self.category,
                                         os.path.basename(self.name))
        try:
            f = open(fname, 'w')
        except IOError as e:
            err('Failed dumping benchmark to file: {0}'.format(str(e)))

        if msg:
            f.write('Reason: {0}\n'.format(msg))
        f.write('category: {0}\n'.format(self.category))
        f.write('name: {0}\n\n'.format(self.name))
        f.write('cmd: {0}\n'.format(self.cmd))
        f.write('machine: {0}\n'.format(self.task.getMachine()))
        f.write('params: {0}\n'.format(configs['params']))
        f.write('versions: {0}\n'.format(self.versions))
        f.write('result: {0}\n'.format(self.result))
        f.write('witness: {0}\n\n'.format(self.witness))
        f.write('memUsage: {0}\n'.format(self.memory))
        f.write('cpuUsage: {0}s\n\n'.format(self.time))
        f.write('other output:\n{0}\n\n'.format(self.output))

        f.write(str(configs))

        f.close()
    def run_deploy(self):
        WAIT_LOOP = 180
        SLEEP_TIME = 60
        LOG_FILE = 'cloud.log'

        log('Starting deployment of environment %s' % self.env_id)
        run_proc('fuel --env %s deploy-changes | strings | tee %s' %
                 (self.env_id, LOG_FILE))

        ready = False
        for i in range(WAIT_LOOP):
            env = parse(exec_cmd('fuel env --env %s' % self.env_id))
            log('Environment status: %s' % env[0][E['status']])
            r, _ = exec_cmd('tail -2 %s | head -1' % LOG_FILE, False)
            if r:
                log(r)
            if env[0][E['status']] == 'operational':
                ready = True
                break
            elif (env[0][E['status']] == 'error'
                  or env[0][E['status']] == 'stopped'):
                break
            else:
                time.sleep(SLEEP_TIME)
        delete(LOG_FILE)

        if ready:
            log('Environment %s successfully deployed' % self.env_id)
        else:
            self.collect_error_logs()
            err('Deployment failed, environment %s is not operational' %
                self.env_id)
Beispiel #5
0
    def _node_power_cmd(self, node_id, cmd):
        expected = 'Chassis Power is %s' % cmd
        if self.node_get_state(node_id) == expected:
            return

        pow_cmd = '%s chassis power %s' % (self.ipmi_cmd(node_id), cmd)
        exec_cmd(pow_cmd,
                 attempts=self.attempts,
                 delay=self.delay,
                 verbose=True,
                 mask_args=[8, 10])

        attempts = self.attempts
        while attempts:
            time.sleep(self.delay)
            state = self.node_get_state(node_id)
            attempts -= 1
            if state == expected:
                return
            elif attempts != 0:
                # reinforce our will, but allow the command to fail,
                # we know our message got across once already...
                exec_cmd(pow_cmd, check=False, mask_args=[8, 10])

        err('Could not set chassis %s for node %s' % (cmd, node_id))
Beispiel #6
0
    def wait_for_discovered_blades(self):
        log('Wait for discovered blades')
        discovered_macs = []
        restart_times = BLADE_RESTART_TIMES

        for blade in self.node_ids:
            self.blade_node_dict[blade] = None

        with self.ssh:
            all_discovered = self.discovery_waiting_loop(discovered_macs)

        while not all_discovered and restart_times != 0:
            restart_times -= 1
            for blade in self.get_not_discovered_blades():
                self.dha.node_reset(blade)
            with self.ssh:
                all_discovered = self.discovery_waiting_loop(discovered_macs)

        if not all_discovered:
            err('Not all blades have been discovered: %s'
                % self.not_discovered_blades_summary())

        with io.open(self.updated_dea_file) as stream:
            updated_dea = yaml.load(stream)
        updated_dea.update({'blade_node_map': self.blade_node_dict})
        with io.open(self.updated_dea_file, 'w') as stream:
            yaml.dump(updated_dea, stream, default_flow_style=False)
Beispiel #7
0
 def health_check(self):
     log('Now running sanity and smoke health checks')
     r = exec_cmd('fuel health --env %s --check sanity,smoke --force'
                  % self.env_id)
     log(r)
     if 'failure' in r:
         err('Healthcheck failed!')
Beispiel #8
0
def run(user, pwd):
    """ gen ssh key on local and copy to all nodes
        copy traf package file from local to all nodes
    """
    dbcfgs = json.loads(dbcfgs_json)
    hosts = dbcfgs['node_list'].split(',')
    traf_package = dbcfgs['traf_package']

    sudo_prefix = get_sudo_prefix()
    run_cmd('%s rm -rf %s*' % (sudo_prefix, SSHKEY_FILE))
    run_cmd('%s echo -e "y" | ssh-keygen -t rsa -N "" -f %s' %
            (sudo_prefix, SSHKEY_FILE))

    files = [SSHKEY_FILE, SSHKEY_FILE + '.pub', traf_package]

    remote_insts = [Remote(h, user=user, pwd=pwd) for h in hosts]
    threads = [
        Thread(target=r.copy, args=(files, '/tmp')) for r in remote_insts
    ]
    for thread in threads:
        thread.start()
    for thread in threads:
        thread.join()
    for r in remote_insts:
        if r.rc != 0: err('Failed to copy files to %s' % r.host)
def run(pwd):
    """ gen ssh key on local and copy to all nodes
        copy traf package file from local to all nodes
    """
    dbcfgs = json.loads(dbcfgs_json)
    hosts = dbcfgs['node_list'].split(',')
    traf_package = dbcfgs['traf_package']

    # save db configs to a tmp file and copy to all trafodion nodes
    dbcfgs_file = '/tmp/dbcfgs'
    p = ParseJson(dbcfgs_file)
    # remove password from config file
    try:
        dbcfgs.pop('mgr_pwd')
        dbcfgs.pop('traf_pwd')
        dbcfgs.pop('kdcadmin_pwd')
    except KeyError:
        pass
    p.save(dbcfgs)

    key_file = '/tmp/id_rsa'
    run_cmd('sudo -n rm -rf %s*' % key_file)
    run_cmd('sudo -n echo -e "y" | ssh-keygen -t rsa -N "" -f %s' % key_file)

    files = [key_file, key_file+'.pub', traf_package, dbcfgs_file]

    remote_insts = [Remote(h, pwd=pwd) for h in hosts]
    threads = [Thread(target=r.copy, args=(files, '/tmp')) for r in remote_insts]
    for thread in threads: thread.start()
    for thread in threads: thread.join()
    for r in remote_insts:
        if r.rc != 0: err('Failed to copy files to %s' % r.host)
Beispiel #10
0
    def get_interface_from_network(self, interfaces, network):
        nics = self.base[interfaces]
        for nic in nics:
            if network in nics[nic]:
                return nic

        err('Network not found: %s' % network)
Beispiel #11
0
 def health_check(self):
     log('Now running sanity and smoke health checks')
     r = exec_cmd('fuel health --env %s --check sanity,smoke --force' %
                  self.env_id)
     log(r)
     if 'failure' in r:
         err('Healthcheck failed!')
Beispiel #12
0
def params_from_string(pars, pard = None):
    " pars = params string, pard = params dictionary "
    " returns updated (or new) dictionary created from params string"

    # default value for all benchmarks is empty string
    # this way we avoid exceptions without explicit checks
    if pard is None:
        pard = {'*':''}

    for p in pars.split(','):
        try:
            k, v = p.split(':', 1)
        except ValueError:
            from common import err
            err('Wrong item in params key: {0}'.format(p))

        k = k.strip()

        # allow omit *
        if not k:
            k = '*'

        pard[k] = v.strip()

    return pard
Beispiel #13
0
    def get_role_interfaces(self, role):
        nodes = self.base['nodes']
        for node in nodes:
            if role in node['role']:
                return node['interfaces']

        err('Role not found: %s' % role)
Beispiel #14
0
def params_from_string(pars, pard=None):
    " pars = params string, pard = params dictionary "
    " returns updated (or new) dictionary created from params string"

    # default value for all benchmarks is empty string
    # this way we avoid exceptions without explicit checks
    if pard is None:
        pard = {'*': ''}

    for p in pars.split(','):
        try:
            k, v = p.split(':', 1)
        except ValueError:
            from common import err
            err('Wrong item in params key: {0}'.format(p))

        k = k.strip()

        # allow omit *
        if not k:
            k = '*'

        pard[k] = v.strip()

    return pard
Beispiel #15
0
    def _parse_string(self, info, string):
        try:
            info = info.split('\n')
            string_line = [line for line in info if string in line][0]
        except IndexError:
            err('Cannot get %s info' % string)

        return string_line
Beispiel #16
0
Datei: reap.py Projekt: rski/fuel
 def get_env(self):
     env_list = parse(exec_cmd('fuel env'))
     if len(env_list) == 0:
         err('No environment deployed')
     elif len(env_list) > 1:
         err('More than 1 environment deployed')
     self.env = env_list[0]
     self.env_id = self.env[E['id']]
Beispiel #17
0
 def translate(self, boot_order_list):
     translated = []
     for boot_dev in boot_order_list:
         if boot_dev in DEV:
             translated.append(DEV[boot_dev])
         else:
             err('Boot device %s not recognized' % boot_dev)
     return translated
Beispiel #18
0
 def translate(self, boot_order_list):
     translated = []
     for boot_dev in boot_order_list:
         if boot_dev in DEV:
             translated.append(DEV[boot_dev])
         else:
             err('Boot device %s not recognized' % boot_dev)
     return translated
Beispiel #19
0
 def scp_put(self, local, remote='.', dir=False):
     try:
         with scp.SCPClient(self.client.get_transport(),
                            sanitize=lambda x: x,
                            socket_timeout=TIMEOUT) as _scp:
             _scp.put(local, remote, dir)
     except Exception as e:
         err(e)
Beispiel #20
0
 def node_power_off(self, node_id):
     log('Power OFF Node %s' % node_id)
     cmd_prefix = self.amt_cmd(node_id)
     resp, ret = exec_cmd('{0} info'.format(cmd_prefix), check=False)
     if "Powerstate:   S0" in resp:
         resp, ret = exec_cmd('{0} powerdown'.format(cmd_prefix), check=False)
         if 'pt_status: success' not in resp:
             err('Could Not Power OFF Node %s' % node_id)
Beispiel #21
0
    def parse_include_tag(self, tag):
        # Remove 'include(' prefix and trailing ')'
        filename = tag[len('include('):].rstrip(')')

        if not filename:
            err('No argument for include().')

        return filename
    def _parse_string(self, info, string):
        try:
            info = info.split('\n')
            string_line = [line for line in info if string in line][0]
        except IndexError:
            err('Cannot get %s info' % string)

        return string_line
Beispiel #23
0
def run():
    """ install Trafodion dependencies """

    dbcfgs = json.loads(dbcfgs_json)

    if dbcfgs['offline_mode'] == 'Y':
        print 'Installing pdsh in offline mode ...'

        # setup temp local repo
        repo_content = LOCAL_REPO_PTR % (dbcfgs['repo_ip'],
                                         dbcfgs['repo_port'])
        with open(REPO_FILE, 'w') as f:
            f.write(repo_content)

        run_cmd(
            'yum install -y --disablerepo=\* --enablerepo=traflocal pdsh-rcmd-ssh pdsh'
        )
    else:
        pdsh_installed = cmd_output('rpm -qa|grep -c pdsh')
        if pdsh_installed == '0':
            release = platform.release()
            releasever, arch = re.search(r'el(\d).(\w+)', release).groups()

            if releasever == '7':
                pdsh_pkg = 'http://mirrors.neusoft.edu.cn/epel/7/%s/p/pdsh-2.31-1.el7.%s.rpm' % (
                    arch, arch)
            elif releasever == '6':
                pdsh_pkg = 'http://mirrors.neusoft.edu.cn/epel/6/%s/pdsh-2.26-4.el6.%s.rpm' % (
                    arch, arch)
            else:
                err('Unsupported Linux version')

            print 'Installing pdsh ...'
            run_cmd('yum install -y %s' % pdsh_pkg)

    package_list = [
        'apr', 'apr-util', 'expect', 'gzip', 'libiodbc-devel', 'lzo', 'lzop',
        'openldap-clients', 'perl-DBD-SQLite', 'perl-Params-Validate',
        'perl-Time-HiRes', 'sqlite', 'snappy', 'unixODBC-devel', 'unzip'
    ]

    all_pkg_list = run_cmd('rpm -qa')
    for pkg in package_list:
        if pkg in all_pkg_list:
            print 'Package %s had already been installed' % pkg
        else:
            print 'Installing %s ...' % pkg
            if dbcfgs['offline_mode'] == 'Y':
                run_cmd(
                    'yum install -y --disablerepo=\* --enablerepo=traflocal %s'
                    % pkg)
            else:
                run_cmd('yum install -y %s' % pkg)

    # remove temp repo file
    if dbcfgs['offline_mode'] == 'Y':
        os.remove(REPO_FILE)
Beispiel #24
0
 def node_power_off(self, node_id):
     log('Power OFF Node %s' % node_id)
     cmd_prefix = self.amt_cmd(node_id)
     resp, ret = exec_cmd('{0} info'.format(cmd_prefix), check=False)
     if "Powerstate:   S0" in resp:
         resp, ret = exec_cmd('{0} powerdown'.format(cmd_prefix),
                              check=False)
         if 'pt_status: success' not in resp:
             err('Could Not Power OFF Node %s' % node_id)
Beispiel #25
0
 def node_power_on(self, node_id):
     log('Power ON Node %s' % node_id)
     cmd_prefix = self.amt_cmd(node_id)
     resp, ret = exec_cmd('{0} info'.format(cmd_prefix), check=False)
     if 'Powerstate:   S0' not in resp:
         dev = self.node_get_boot_dev(node_id)
         resp, ret = exec_cmd('{0} powerup {1}'.format(cmd_prefix, dev), check=False)
         if 'pt_status: success' not in resp:
             err('Could Not Power ON Node %s' % node_id)
Beispiel #26
0
def run():
    hdfs_bin = DEF_HDFS_BIN

    dbcfgs = json.loads(dbcfgs_json)
    distro = dbcfgs['distro']

    if 'CDH' in distro:
        parcel_lib = PARCEL_HBASE_LIB
        if os.path.exists(parcel_lib): hdfs_bin = PARCEL_HDFS_BIN
    elif 'APACHE' in distro:
        hdfs_bin = dbcfgs['hadoop_home'] + '/bin/hdfs'

    traf_loc = '/user/trafodion'
    traf_user = dbcfgs['traf_user']
    hdfs_user = dbcfgs['hdfs_user']
    hbase_user = dbcfgs['hbase_user']
    hbase_group = cmd_output('%s groups %s | cut -d" " -f3' %
                             (hdfs_bin, hbase_user))

    run_cmd_as_user(hdfs_user, '%s dfsadmin -safemode wait' % hdfs_bin)
    run_cmd_as_user(
        hdfs_user,
        '%s dfs -mkdir -p %s/{trafodion_backups,bulkload,lobs} /hbase/archive'
        % (hdfs_bin, traf_loc))
    run_cmd_as_user(
        hdfs_user, '%s dfs -chown -R %s:%s /hbase/archive' %
        (hdfs_bin, hbase_user, hbase_user))
    run_cmd_as_user(
        hdfs_user,
        '%s dfs -chown -R %s:%s %s %s/{trafodion_backups,bulkload,lobs}' %
        (hdfs_bin, traf_user, traf_user, traf_loc, traf_loc))
    run_cmd_as_user(hdfs_user, '%s dfs -chmod 0755 %s' % (hdfs_bin, traf_loc))
    run_cmd_as_user(
        hdfs_user, '%s dfs -chmod 0750 %s/{trafodion_backups,bulkload,lobs}' %
        (hdfs_bin, traf_loc))
    run_cmd_as_user(
        hdfs_user,
        '%s dfs -chgrp %s %s/bulkload' % (hdfs_bin, hbase_group, traf_loc))
    run_cmd_as_user(
        hdfs_user, '%s dfs -setfacl -R -m user:%s:rwx /hbase/archive' %
        (hdfs_bin, traf_user))
    run_cmd_as_user(
        hdfs_user, '%s dfs -setfacl -R -m default:user:%s:rwx /hbase/archive' %
        (hdfs_bin, traf_user))
    run_cmd_as_user(
        hdfs_user, '%s dfs -setfacl -R -m mask::rwx /hbase/archive' % hdfs_bin)

    # Grant all privileges to the Trafodion principal in HBase
    if dbcfgs['secure_hadoop'] == 'Y':
        run_cmd(
            'echo "grant \'%s\', \'RWXC\'" | %s su - %s -s /bin/bash -c "hbase shell" > /tmp/hbase_shell.out'
            % (traf_user, get_sudo_prefix(), hbase_user))
        has_err = cmd_output('grep -c ERROR /tmp/hbase_shell.out')
        if int(has_err):
            err('Failed to grant HBase privileges to %s' % traf_user)
        run_cmd('rm /tmp/hbase_shell.out')
Beispiel #27
0
 def patch_iso(self, new_iso):
     tmp_orig_dir = '%s/origiso' % self.tmp_dir
     tmp_new_dir = '%s/newiso' % self.tmp_dir
     try:
         self.copy(tmp_orig_dir, tmp_new_dir)
         self.patch(tmp_new_dir, new_iso)
     except Exception as e:
         exec_cmd('fusermount -u %s' % tmp_orig_dir, False)
         delete(self.tmp_dir)
         err(e)
Beispiel #28
0
 def load_yaml(self, filespec):
     try:
         if (self.is_url(filespec)):
             response = urllib2.urlopen(filespec)
             return yaml.load(response)
         else:
             with io.open(filespec) as f:
                 return yaml.load(f)
     except Exception as error:
         err('Error opening YAML file: %s' % error)
Beispiel #29
0
 def node_power_on(self, node_id):
     log('Power ON Node %s' % node_id)
     cmd_prefix = self.amt_cmd(node_id)
     resp, ret = exec_cmd('{0} info'.format(cmd_prefix), check=False)
     if 'Powerstate:   S0' not in resp:
         dev = self.node_get_boot_dev(node_id)
         resp, ret = exec_cmd('{0} powerup {1}'.format(cmd_prefix, dev),
                              check=False)
         if 'pt_status: success' not in resp:
             err('Could Not Power ON Node %s' % node_id)
Beispiel #30
0
def check_bridge(pxe_bridge, dha_path):
    with io.open(dha_path) as yaml_file:
        dha_struct = yaml.load(yaml_file)
    if dha_struct['adapter'] != 'libvirt':
        log('Using Linux Bridge %s for booting up the Fuel Master VM'
            % pxe_bridge)
        r = exec_cmd('ip link show %s' % pxe_bridge)
        if pxe_bridge in r and 'state DOWN' in r:
            err('Linux Bridge {0} is not Active, bring'
                ' it UP first: [ip link set dev {0} up]'.format(pxe_bridge))
Beispiel #31
0
 def load_template(self, filespec):
     try:
         if (self.is_url(filespec)):
             response = urllib2.urlopen(filespec)
             return response.read()
         else:
             with io.open(filespec) as f:
                 return f.readlines()
     except Exception as error:
         err('Error opening template file: %s' % error)
Beispiel #32
0
def check_bridge(pxe_bridge, dha_path):
    with io.open(dha_path) as yaml_file:
        dha_struct = yaml.load(yaml_file)
    if dha_struct['adapter'] != 'libvirt':
        log('Using Linux Bridge %s for booting up the Fuel Master VM'
            % pxe_bridge)
        r = exec_cmd('ip link show %s' % pxe_bridge)
        if pxe_bridge in r and 'state DOWN' in r:
            err('Linux Bridge {0} is not Active, bring'
                ' it UP first: [ip link set dev {0} up]'.format(pxe_bridge))
Beispiel #33
0
def satt_log_init(lfile):
    global log_file
    assert log_file is None

    try:
        log_file = open(lfile, 'w')
    except OSError as e:
        from common import err
        err('Failed creating log: {0}'.format(str(e)))

    atexit.register(lambda: log_file.close())
Beispiel #34
0
 def patch_iso(self, new_iso):
     tmp_orig_dir = '%s/origiso' % self.tmp_dir
     tmp_new_dir = '%s/newiso' % self.tmp_dir
     try:
         self.copy(tmp_orig_dir, tmp_new_dir)
         self.patch(tmp_new_dir, new_iso)
     except Exception as e:
         exec_cmd('fusermount -u %s' % tmp_orig_dir, False)
         os.environ.pop(MOUNT_STATE_VAR, None)
         delete(self.tmp_dir)
         err(e)
Beispiel #35
0
 def check_supported_release(self):
     log('Check supported release: %s' % self.wanted_release)
     found = False
     release_list = parse(self.ssh.exec_cmd('fuel release -l'))
     for release in release_list:
         if release[R['name']] == self.wanted_release:
             found = True
             break
     if not found:
         err('This Fuel does not contain the following release: %s' %
             self.wanted_release)
Beispiel #36
0
Datei: log.py Projekt: ufwt/satt
def satt_log_init(lfile):
    global log_file
    assert log_file is None

    try:
        log_file = open(lfile, 'w')
    except OSError as e:
        from common import err
        err('Failed creating log: {0}'.format(str(e)))

    atexit.register(lambda: log_file.close())
Beispiel #37
0
 def check_supported_release(self):
     log('Check supported release: %s' % self.wanted_release)
     found = False
     release_list = parse(self.ssh.exec_cmd('fuel release -l'))
     for release in release_list:
         if release[R['name']] == self.wanted_release:
             found = True
             break
     if not found:
         err('This Fuel does not contain the following release: %s'
             % self.wanted_release)
Beispiel #38
0
    def verify_node_status(self):
        node_list = parse(exec_cmd('fuel node list'))
        failed_nodes = []
        for node in node_list:
            if node[N['status']] != 'ready' and node[N['cluster']] != 'None':
                failed_nodes.append((node[N['id']], node[N['status']]))

        if failed_nodes:
            summary = ''
            for node, status in failed_nodes:
                summary += '[node %s, status %s]\n' % (node, status)
            err('Deployment failed: %s' % summary)
Beispiel #39
0
 def node_reset(self, node_id):
     log('RESET Node %s' % node_id)
     cmd_prefix = self.amt_cmd(node_id)
     dev = self.node_get_boot_dev(node_id)
     resp, ret = exec_cmd('{0} info'.format(cmd_prefix), check=False)
     if 'Powerstate:   S0' in resp:
         resp, ret = exec_cmd('{0} reset {1}'.format(cmd_prefix, dev), check=False)
         if 'pt_status: success' not in resp:
             err('Could Not RESET Node %s' % node_id)
     else:
         err('Cannot RESET Node %s because it\'s not Active, state: %s'
             % (node_id, resp))
Beispiel #40
0
    def verify_node_status(self):
        node_list = parse(exec_cmd('fuel --env %s node' % self.env_id))
        failed_nodes = []
        for node in node_list:
            if node[N['status']] != 'ready':
                failed_nodes.append((node[N['id']], node[N['status']]))

        if failed_nodes:
            summary = ''
            for node, status in failed_nodes:
                summary += '[node %s, status %s]\n' % (node, status)
            err('Deployment failed: %s' % summary, self.collect_logs)
Beispiel #41
0
    def verify_node_status(self):
        node_list = parse(exec_cmd('fuel node list'))
        failed_nodes = []
        for node in node_list:
            if node[N['status']] != 'ready' and node[N['cluster']] != 'None':
                failed_nodes.append((node[N['id']], node[N['status']]))

        if failed_nodes:
            summary = ''
            for node, status in failed_nodes:
                summary += '[node %s, status %s]\n' % (node, status)
            err('Deployment failed: %s' % summary)
Beispiel #42
0
 def node_reset(self, node_id):
     log('RESET Node %s' % node_id)
     cmd_prefix = self.amt_cmd(node_id)
     dev = self.node_get_boot_dev(node_id)
     resp, ret = exec_cmd('{0} info'.format(cmd_prefix), check=False)
     if 'Powerstate:   S0' in resp:
         resp, ret = exec_cmd('{0} reset {1}'.format(cmd_prefix, dev),
                              check=False)
         if 'pt_status: success' not in resp:
             err('Could Not RESET Node %s' % node_id)
     else:
         err('Cannot RESET Node %s because it\'s not Active, state: %s' %
             (node_id, resp))
Beispiel #43
0
def run():
    try:
        dbcfgs_json = sys.argv[1]
    except IndexError:
        err('No db config found')
    dbcfgs = json.loads(dbcfgs_json)
    discover = Discover(dbcfgs)
    methods = [m for m in dir(discover) if m.startswith(PREFIX)]
    result = {}
    for method in methods:
        key, value = getattr(discover, method)()  # call method
        result[key] = value

    print json.dumps(result)
    def restart(self):
        srv_baseurl = CLUSTER_URL_PTR % (self.url, self.cluster_name) + '/services/'
        srvs = ['HBASE', 'ZOOKEEPER', 'HDFS']

        # Stop
        print 'Restarting HDP services ...'
        for srv in srvs:
            srv_url = srv_baseurl + srv
            config = {'RequestInfo': {'context' :'Stop %s services' % srv}, 'ServiceInfo': {'state' : 'INSTALLED'}}
            rc = self.p.put(srv_url, config)

            # check stop status
            if rc:
                stat = self.p.get(srv_url)

                retry_cnt, maxcnt, interval = 0, 30, 5
                while stat['ServiceInfo']['state'] != 'INSTALLED':
                    retry_cnt += 1
                    flush_str = '.' * retry_cnt
                    print '\rCheck HDP service %s stop status (timeout: %dmin) %s' % (srv, maxcnt*interval/60, flush_str),
                    sys.stdout.flush()
                    time.sleep(interval)
                    stat = self.p.get(srv_url)
                    if retry_cnt == maxcnt: err('Failed to stop HDP service %s, timeout' % srv)
                # wrap line
                print
            else:
                print 'HDP service %s had already been stopped' % srv

        time.sleep(5)
        # Start
        config = {'RequestInfo': {'context' :'Start All services'}, 'ServiceInfo': {'state' : 'STARTED'}}
        rc = self.p.put(srv_baseurl, config)

        # check start status
        if rc:
            result_url = rc['href']
            stat = self.p.get(result_url)
            retry_cnt, maxcnt, interval = 0, 120, 5
            while stat['Requests']['request_status'] != 'COMPLETED':
                retry_cnt += 1
                flush_str = '.' * retry_cnt
                print '\rCheck HDP services start status (timeout: %dmin) %s' % (maxcnt*interval/60, flush_str),
                sys.stdout.flush()
                time.sleep(interval)
                stat = self.p.get(result_url)
                if retry_cnt == maxcnt: err('Failed to start all HDP services')
            print 'HDP services started successfully!'
        else:
            print 'HDP services had already been started'
def run():
    try:
        dbcfgs_json = sys.argv[1]
    except IndexError:
        err('No db config found')
    dbcfgs = json.loads(dbcfgs_json)
    discover = Discover(dbcfgs)
    methods = [m for m in dir(discover) if m.startswith(PREFIX)]
    result = {}
    for method in methods:
        key, value = getattr(discover, method)() # call method
        result[key] = value

    print json.dumps(result)
Beispiel #46
0
    def _monitorTasks(self):
        assert self._is_running()

        while self._is_running():
            for fd, flags in self._poll_wait():
                if flags & select.POLLERR:
                    self._killTasks()
                    err('Waiting for benchmark failed')

                if flags & select.POLLIN:
                    bench = self._getBenchmark(fd)
                    try:
                        data = bench.readOutput()
                        while data:
                            self._report.report(data, bench)
                            data = bench.readOutput()
                    # While can be too fast and raise
                    # EBUSY
                    except IOError:
                        continue

                # is benchmark done?
                if flags & select.POLLHUP:
                    # remove the old benchmark
                    bench = self._unregisterFd(fd)
                    if not self._report.done(bench):
                        # something went wrong - queue this one again
                        satt_log('Running benchmark again');
                        # we must take this one as it was not running yet
                        self._benchmarks_done -= 1

                        # XXX we do not have a mechanism how to track
                        # how many times the benchmark ran, so it may happen
                        # that it will run indifinetly many times.
                        # It seems we don't need to care about that atm, so let's
                        # ignore it for now.
                        #
                        # P. S message for future me: If you read this, we probably hit
                        # this error and you hate me and my wickidness - just sorry.
                        bench.task.readd(bench)

                    self._benchmarks_done += 1
                    # set progress
                    if self._benchmarks_done != 0:
                        prgs = float(self._benchmarks_done) / self._benchmarks_count
                        self._report.progress(int(prgs * 100))

                    # run new benchmark
                    self._runBenchmark(bench.task)
Beispiel #47
0
    def __init__(self, query_func):
        res = query_func('SELECT unknown, false_correct, false_incorrect,'
                         'true_correct, true_incorrect '
                         'FROM rating_methods INNER JOIN years '
                         'ON rating_methods.year_id = years.id '
                         'WHERE year = \'{0}\';'.format(configs.configs['year']))
        if not res:
            err('Failed getting rating methods')

        res = res[0]

        self.unknown = res[0]
        self.false_correct = res[1]
        self.false_incorrect = res[2]
        self.true_correct = res[3]
        self.true_incorrect = res[4]
    def restart(self):
        restart_url = RESTART_URL_PTR % (self.url, self.cluster_name)
        deploy_cfg_url = DEPLOY_CFG_URL_PTR % (self.url, self.cluster_name)

        print 'Restarting CDH services ...'
        rc1 = self.p.post(restart_url)
        if self.__retry_check(rc1['id'], 40, 15, 'restart'):
            print 'Restart CDH successfully!'
        else:
            err('Failed to restart CDH, max retry count reached')

        rc2 = self.p.post(deploy_cfg_url)
        if self.__retry_check(rc2['id'], 30, 10, 'deploy'):
            print 'Deploy client config successfully!'
        else:
            err('Failed to deploy CDH client config, max retry count reached')
    def configure_environment(self):
        log('Configure environment')
        delete(self.yaml_config_dir)
        create_dir_if_not_exists(self.yaml_config_dir)
        env_name = self.dea.get_env_name()
        env_net_segment_type = self.dea.get_env_net_segment_type()
        log('Creating environment %s release %s net-segment-type %s'
            % (env_name, self.release_id, env_net_segment_type))
        exec_cmd('fuel env create --name "%s" --release %s --net-segment-type %s'
                 % (env_name, self.release_id, env_net_segment_type))

        if not self.env_exists(env_name):
            err('Failed to create environment %s' % env_name)
        self.config_settings()
        self.config_network()
        self.config_nodes()
Beispiel #50
0
    def reap_nodes_interfaces_transformations(self):
        node_list = parse(exec_cmd('fuel node'))
        real_node_ids = [node[N['id']] for node in node_list]
        real_node_ids.sort()
        min_node = real_node_ids[0]
        interfaces = {}
        transformations = {}
        dea_nodes = []
        dha_nodes = []

        for real_node_id in real_node_ids:
            node_id = int(real_node_id) - int(min_node) + 1
            self.last_node = node_id
            node = self.get_node_by_id(node_list, real_node_id)
            roles = commafy(node[N['roles']])
            if not roles:
                err('Fuel Node %s has no role' % real_node_id)
            dea_node = {'id': node_id,
                        'role': roles}
            dha_node = {'id': node_id}
            if_name, mac = self.reap_interface(real_node_id, interfaces)
            log('reap transformation for node %s' % real_node_id)
            tr_name = self.reap_transformation(real_node_id, roles,
                                               transformations)
            dea_node.update(
                {'interfaces': if_name,
                 'transformations': tr_name})

            dha_node.update(
                {'pxeMac': mac if mac else None,
                 'ipmiIp': None,
                 'ipmiUser': None,
                 'ipmiPass': None,
                 'libvirtName': None,
                 'libvirtTemplate': None})

            dea_nodes.append(dea_node)
            dha_nodes.append(dha_node)

        self.write_yaml(self.dha_file, {'nodes': dha_nodes}, False)
        self.write_yaml(self.dea_file, {'nodes': dea_nodes})
        self.write_yaml(self.dea_file, interfaces)
        self.write_yaml(self.dea_file, transformations)
        self.reap_fuel_node_info()
        self.write_yaml(self.dha_file, {'disks': DISKS})
Beispiel #51
0
 def cleanup_fuel_environments(self, env_list):
     WAIT_LOOP = 60
     SLEEP_TIME = 10
     for env in env_list:
         log('Deleting environment %s' % env[E['id']])
         self.ssh.exec_cmd('fuel env --env %s --delete --force'
                           % env[E['id']])
     all_env_erased = False
     for i in range(WAIT_LOOP):
         env_list = parse(self.ssh.exec_cmd('fuel env list'))
         if env_list:
             time.sleep(SLEEP_TIME)
         else:
             all_env_erased = True
             break
     if not all_env_erased:
         err('Could not erase these environments %s'
             % [(env[E['id']], env[E['status']]) for env in env_list])
Beispiel #52
0
    def _updateDb(self, rb):
        def choose_tag():
            if configs.configs.has_key('tool-tag'):
                return configs.configs['tool-tag']
            else:
                return configs.configs['tool']

        ver = rb.versions.strip()

        q = """
        SELECT id FROM years WHERE year = '{0}';
        """.format(configs.configs['year']);
        res = self._db.query(q)
        if not res:
            err('Do not have year {0}. If this is not typo, '
                'update the database and benchmarks'.format(configs.configs['year']))

        year_id = res[0][0]

        # If tool that runs in this run is not known to database, add it
        q = """
        SELECT id FROM tools
        WHERE name = '{0}' and version = '{1}'
              and params = '{2}' and year_id = '{3}';
        """.format(configs.configs['tool'], ver, self.tool_params, year_id)
        res = self._db.query(q)
        if not res:
            q2 = """
            INSERT INTO tools
            (name, year_id, version, params, tag, note)
            VALUES('{0}', '{1}', '{2}', '{3}', '{4}', {5});
            """.format(configs.configs['tool'], year_id,
                       ver, self.tool_params, choose_tag(),
                       Empty2Null(configs.configs['note']))
            self._db.query(q2)

            # get new tool_id
            res = self._db.query(q)
            assert len(res) == 1

        tool_id = res[0][0]

        return tool_id, year_id
Beispiel #53
0
 def node_power_off(self, node_id):
     WAIT_LOOP = 200
     SLEEP_TIME = 3
     log('Power OFF Node %s' % node_id)
     cmd_prefix = self.ipmi_cmd(node_id)
     state = exec_cmd('%s chassis power status' % cmd_prefix)
     if state == 'Chassis Power is on':
         done = False
         exec_cmd('%s chassis power off' % cmd_prefix)
         for i in range(WAIT_LOOP):
             state, _ = exec_cmd('%s chassis power status' % cmd_prefix,
                                 False)
             if state == 'Chassis Power is off':
                 done = True
                 break
             else:
                 time.sleep(SLEEP_TIME)
         if not done:
             err('Could Not Power OFF Node %s' % node_id)
Beispiel #54
0
def parse_configs(path = 'symbiotic/config'):
    from common import err, dbg

    if not os.path.exists(path):
        return configs

    try:
        f = open(path, 'r')
    except IOError as e:
        err("Failed opening configuration file ({0}): {1}"
            .format(path, e.strerror))

    accline = None

    for line in f:
        line = line.strip()

        if not line or line[0] == '#':
            continue

        if not accline is None:
            line = accline + line
            accline = None

        # if \ is on the end of line, append next line
        if line[-1] == '\\':
            accline = line[:-1]
            continue

        key, val = line.split('=', 1)
        key = key.strip()
        val = val.strip()

        if key in allowed_keys:
            if key == 'params':
                configs[key] = params_from_string(val)
            else:
                configs[key] = val
        else:
            err('Unknown config key: {0}'.format(key))

    return configs
def run():
    dbcfgs = json.loads(dbcfgs_json)

    nodes = dbcfgs['node_list'].split(',')
    scratch_locs = dbcfgs['scratch_locs'].split(',')

    # this script is running by trafodion user, so get sqroot from env
    traf_home = os.environ['TRAF_HOME']
    if traf_home == '': err('TRAF_HOME var is empty')
    sqconfig_file = traf_home + '/sql/scripts/sqconfig'
    sqconfig_persist_file = traf_home + '/sql/scripts/sqconfig.persist'

    core, processor = run_cmd("lscpu|grep -E '(^CPU\(s\)|^Socket\(s\))'|awk '{print $2}'").split('\n')[:2]
    core = int(core)-1 if int(core) <= 256 else 255

    lines = ['begin node\n']
    if len(nodes) == 1:
        lines.append('_virtualnodes 2\n')
    else:
        for node_id, node in enumerate(nodes):
            line = 'node-id=%s;node-name=%s;cores=0-%d;processors=%s;roles=connection,aggregation,storage\n' % (node_id, node, core, processor)
            lines.append(line)

    lines.append('end node\n')
    lines.append('\n')
    lines.append('begin overflow\n')

    for scratch_loc in scratch_locs:
        line = 'hdd %s\n' % scratch_loc
        lines.append(line)

    lines.append('end overflow\n')

    # write out the node section
    with open(sqconfig_file, 'w') as f:
        f.writelines(lines)

    print 'sqconfig generated successfully!'

    run_cmd('sqgen')

    print 'sqgen ran successfully!'
def run(pwd):
    """ gen ssh key on local and copy to all nodes
        copy traf package file from local to all nodes
    """
    dbcfgs = json.loads(dbcfgs_json)
    hosts = dbcfgs['node_list'].split(',')
    traf_package = dbcfgs['traf_package']

    key_file = '/tmp/id_rsa'
    run_cmd('sudo -n rm -rf %s*' % key_file)
    run_cmd('sudo -n echo -e "y" | ssh-keygen -t rsa -N "" -f %s' % key_file)

    files = [key_file, key_file+'.pub', traf_package]

    remote_insts = [Remote(h, pwd=pwd) for h in hosts]
    threads = [Thread(target=r.copy, args=(files, '/tmp')) for r in remote_insts]
    for thread in threads: thread.start()
    for thread in threads: thread.join()
    for r in remote_insts:
        if r.rc != 0: err('Failed to copy files to %s' % r.host)
def run(user, pwd):
    """ gen ssh key on local and copy to all nodes
        copy traf package file from local to all nodes
    """
    dbcfgs = json.loads(dbcfgs_json)
    hosts = dbcfgs['node_list'].split(',')
    traf_package = dbcfgs['traf_package']

    sudo_prefix = get_sudo_prefix()
    run_cmd('%s rm -rf %s*' % (sudo_prefix, SSHKEY_FILE))
    run_cmd('%s echo -e "y" | ssh-keygen -t rsa -N "" -f %s' % (sudo_prefix, SSHKEY_FILE))

    files = [SSHKEY_FILE, SSHKEY_FILE+'.pub', traf_package]

    remote_insts = [Remote(h, user=user, pwd=pwd) for h in hosts]
    threads = [Thread(target=r.copy, args=(files, '/tmp')) for r in remote_insts]
    for thread in threads: thread.start()
    for thread in threads: thread.join()
    for r in remote_insts:
        if r.rc != 0: err('Failed to copy files to %s' % r.host)
Beispiel #58
0
        def _exception_handler(args, data):
            q, tool_id, task_id = data

            if (args[1].startswith('Duplicate entry')):

                if configs.configs['ignore-duplicates'] == 'yes':
                    satt_log('Already has this result for this tool, ignoring.')
                else:
                    err('Already has result of this benchmark for this tool.\n'
                        'It is only supported to have one result for each '
                        'benchmark and particular tool\n'
                        'If want ignore this behaviour use --ignore-duplicates.\n'
                        '(tool + version + params). You can delete the old result:\n'
                        '  $ ./db-cli \'DELETE from task_results WHERE tool_id={0}'
                        ' and task_id={1}\'\n'
                        'or you can delete all results for this tool:\n'
                        '  $ ./db-cli \'DELETE from tools WHERE id={0}\'\n'
                        .format(tool_id, task_id, tool_id))
            else:
                err('Failed querying db: {0}\n\n{1}'.format(args[1], q))
def run():
    hdfs_bin = "/usr/bin/hdfs"

    dbcfgs = json.loads(dbcfgs_json)
    DISTRO = dbcfgs["distro"]

    if "CDH" in DISTRO:
        parcel_lib = "/opt/cloudera/parcels/CDH/lib/hbase/lib"
        if os.path.exists(parcel_lib):
            hdfs_bin = "/opt/cloudera/parcels/CDH/bin/hdfs"
    elif "APACHE" in DISTRO:
        hdfs_bin = dbcfgs["hadoop_home"] + "/bin/hdfs"

    traf_loc = "/user/trafodion"
    traf_user = dbcfgs["traf_user"]
    hdfs_user = dbcfgs["hdfs_user"]
    hbase_user = dbcfgs["hbase_user"]

    run_cmd_as_user(hdfs_user, "%s dfsadmin -safemode wait" % hdfs_bin)
    run_cmd_as_user(
        hdfs_user,
        "%s dfs -mkdir -p %s/{trafodion_backups,bulkload,lobs} /hbase/archive /hbase-staging" % (hdfs_bin, traf_loc),
    )
    run_cmd_as_user(
        hdfs_user, "%s dfs -chown -R %s:%s /hbase/archive /hbase-staging" % (hdfs_bin, hbase_user, hbase_user)
    )
    run_cmd_as_user(
        hdfs_user,
        "%s dfs -chown -R %s:%s %s/{trafodion_backups,bulkload,lobs}" % (hdfs_bin, traf_user, traf_user, traf_loc),
    )
    run_cmd_as_user(hdfs_user, "%s dfs -setfacl -R -m user:%s:rwx /hbase/archive" % (hdfs_bin, traf_user))
    run_cmd_as_user(hdfs_user, "%s dfs -setfacl -R -m default:user:%s:rwx /hbase/archive" % (hdfs_bin, traf_user))
    run_cmd_as_user(hdfs_user, "%s dfs -setfacl -R -m mask::rwx /hbase/archive" % hdfs_bin)

    # Grant all privileges to the Trafodion principal in HBase
    if dbcfgs["secure_hadoop"] == "Y":
        run_cmd('grant "%s", "RWXC" | sudo -u %s hbase shell > /tmp/hbase_shell.out' % (traf_user, hbase_user))
        has_err = cmd_output("grep -c ERROR /tmp/hbase_shell.out")
        if int(has_err):
            err("Failed to grant HBase privileges to %s" % traf_user)
        run_cmd("rm /tmp/hbase_shell.out")