def main():
    options = get_options()

    cfgs = defaultdict(str)

    if options.cfgfile:
        if not os.path.exists(options.cfgfile):
            err_m('Cannot find config file \'%s\'' % options.cfgfile)
        config_file = options.cfgfile
    else:
        config_file = DBCFG_FILE

    if options.pwd:
        pwd = getpass.getpass('Input remote host SSH Password: '******''

    if os.path.exists(config_file):
        cfgs = ParseInI(config_file, 'dbconfigs').load()
    else:
        node_lists = expNumRe(
            raw_input(
                'Enter list of Nodes separated by comma, support numeric RE, i.e. n[01-12]: '
            ))

        # check if node list is expanded successfully
        if len([1 for node in node_lists if '[' in node]):
            err('Failed to expand node list, please check your input.')

        cfgs['node_list'] = ','.join(node_lists)

    results = wrapper.run(cfgs, options, mode='discover', pwd=pwd)

    format_output('Discover results')

    if len(results) > 4:
        output = output_row(results)
    else:
        output = output_column(results)

    print output
    with open('discover_result', 'w') as f:
        f.write('Discover Date: %s\n' % time.strftime('%Y-%m-%d %H:%M'))
        f.write(output)
def main():
    """ db_installer main loop """
    global cfgs
    format_output('Trafodion Installation ToolKit')

    # handle parser option
    options = get_options()

    if options.build and options.cfgfile:
        log_err(
            'Wrong parameter, cannot specify both --build and --config-file')

    if options.build and options.offline:
        log_err('Wrong parameter, cannot specify both --build and --offline')

    if options.cfgfile:
        if not os.path.exists(options.cfgfile):
            log_err('Cannot find config file \'%s\'' % options.cfgfile)
        config_file = options.cfgfile
    else:
        config_file = DBCFG_FILE

    if options.pwd:
        pwd = getpass.getpass('Input remote host SSH Password: '******''

    # not specified config file and default config file doesn't exist either
    p = ParseInI(config_file, 'dbconfigs')
    if options.build or (not os.path.exists(config_file)):
        if options.build: format_output('DryRun Start')
        user_input(options, prompt_mode=True, pwd=pwd)

        # save config file as json format
        print '\n** Generating config file to save configs ... \n'
        p.save(cfgs)
    # config file exists
    else:
        print '\n** Loading configs from config file ... \n'
        cfgs = p.load()
        if options.offline and cfgs['offline_mode'] != 'Y':
            log_err(
                'To enable offline mode, must set "offline_mode = Y" in config file'
            )
        user_input(options, prompt_mode=False, pwd=pwd)

    if options.reinstall:
        cfgs['reinstall'] = 'Y'

    if options.offline:
        http_start(cfgs['local_repo_dir'], cfgs['repo_http_port'])
    else:
        cfgs['offline_mode'] = 'N'

    if not options.build:
        format_output('Installation Start')

        ### perform actual installation ###
        wrapper.run(cfgs, options, pwd=pwd)

        format_output('Installation Complete')

        if options.offline: http_stop()

        # rename default config file when successfully installed
        # so next time user can input new variables for a new install
        # or specify the backup config file to install again
        try:
            # only rename default config file
            ts = time.strftime('%y%m%d_%H%M')
            if config_file == DBCFG_FILE and os.path.exists(config_file):
                os.rename(config_file, config_file + '.bak' + ts)
        except OSError:
            log_err('Cannot rename config file')
    else:
        format_output('DryRun Complete')

    # remove temp config file
    if os.path.exists(DBCFG_TMP_FILE): os.remove(DBCFG_TMP_FILE)
def user_input(options, prompt_mode=True, pwd=''):
    """ get user's input and check input value """
    global cfgs

    apache = True if hasattr(options, 'apache') and options.apache else False
    offline = True if hasattr(options,
                              'offline') and options.offline else False
    silent = True if hasattr(options, 'silent') and options.silent else False

    # load from temp config file if in prompt mode
    if os.path.exists(DBCFG_TMP_FILE) and prompt_mode == True:
        tp = ParseInI(DBCFG_TMP_FILE, 'dbconfigs')
        cfgs = tp.load()
        if not cfgs:
            # set cfgs to defaultdict again
            cfgs = defaultdict(str)

    u = UserInput(options, pwd)
    g = lambda n: u.get_input(n, cfgs[n], prompt_mode=prompt_mode)

    ### begin user input ###
    if apache:
        g('node_list')
        node_lists = expNumRe(cfgs['node_list'])

        # check if node list is expanded successfully
        if len([1 for node in node_lists if '[' in node]):
            log_err('Failed to expand node list, please check your input.')
        cfgs['node_list'] = ','.join(node_lists)
        g('hadoop_home')
        g('hbase_home')
        g('hive_home')
        g('hdfs_user')
        g('hbase_user')
        g('first_rsnode')
        cfgs['distro'] = 'APACHE'
    else:
        g('mgr_url')
        if not ('http:' in cfgs['mgr_url'] or 'https:' in cfgs['mgr_url']):
            cfgs['mgr_url'] = 'http://' + cfgs['mgr_url']

        # set cloudera default port 7180 if not provided by user
        if not re.search(r':\d+', cfgs['mgr_url']):
            cfgs['mgr_url'] += ':7180'

        g('mgr_user')
        g('mgr_pwd')

        validate_url_v1 = '%s/api/v1/clusters' % cfgs['mgr_url']
        content = ParseHttp(cfgs['mgr_user'],
                            cfgs['mgr_pwd']).get(validate_url_v1)

        # currently only CDH support multiple clusters
        # so if condition is true, it must be CDH cluster
        if len(content['items']) > 1:
            cluster_names = []
            # loop all managed clusters
            for cluster in content['items']:
                cluster_names.append(cluster['name'])

            for index, name in enumerate(cluster_names):
                print str(index + 1) + '. ' + name
            g('cluster_no')
            c_index = int(cfgs['cluster_no']) - 1
            if c_index < 0 or c_index >= len(cluster_names):
                log_err('Incorrect number')
            cluster_name = cluster_names[int(c_index)]
        else:
            try:
                cluster_name = content['items'][0]['name']
            except (IndexError, KeyError):
                try:
                    cluster_name = content['items'][0]['Clusters'][
                        'cluster_name']
                except (IndexError, KeyError):
                    log_err('Failed to get cluster info from management url')

        hadoop_discover = HadoopDiscover(cfgs['mgr_user'], cfgs['mgr_pwd'],
                                         cfgs['mgr_url'], cluster_name)
        rsnodes = hadoop_discover.get_rsnodes()
        hadoop_users = hadoop_discover.get_hadoop_users()

        cfgs['distro'] = hadoop_discover.distro
        cfgs['hbase_lib_path'] = hadoop_discover.get_hbase_lib_path()
        cfgs['hbase_service_name'] = hadoop_discover.get_hbase_srvname()
        cfgs['hdfs_service_name'] = hadoop_discover.get_hdfs_srvname()
        cfgs['zookeeper_service_name'] = hadoop_discover.get_zookeeper_srvname(
        )

        cfgs['cluster_name'] = cluster_name.replace(' ', '%20')
        cfgs['hdfs_user'] = hadoop_users['hdfs_user']
        cfgs['hbase_user'] = hadoop_users['hbase_user']
        cfgs['node_list'] = ','.join(rsnodes)
        cfgs['first_rsnode'] = rsnodes[0]  # first regionserver node

    # check node connection
    for node in cfgs['node_list'].split(','):
        rc = os.system('ping -c 1 %s >/dev/null 2>&1' % node)
        if rc:
            log_err(
                'Cannot ping %s, please check network connection and /etc/hosts'
                % node)

    # set some system default configs
    cfgs['config_created_date'] = time.strftime('%Y/%m/%d %H:%M %Z')
    cfgs['traf_user'] = TRAF_USER
    if apache:
        cfgs['hbase_xml_file'] = cfgs['hbase_home'] + '/conf/hbase-site.xml'
        cfgs['hdfs_xml_file'] = cfgs[
            'hadoop_home'] + '/etc/hadoop/hdfs-site.xml'
    else:
        cfgs['hbase_xml_file'] = DEF_HBASE_XML_FILE

    ### discover system settings, return a dict
    system_discover = wrapper.run(cfgs, options, mode='discover', pwd=pwd)

    # check discover results, return error if fails on any sinlge node
    need_java_home = 0
    has_home_dir = 0
    for result in system_discover:
        host, content = result.items()[0]
        content_dict = json.loads(content)

        java_home = content_dict['default_java']
        if java_home == 'N/A':
            need_java_home += 1
        if content_dict['linux'] == 'N/A':
            log_err('Unsupported Linux version')
        if content_dict['firewall_status'] == 'Running':
            info(
                'Firewall is running, please make sure the ports used by Trafodion are open'
            )
        if content_dict['traf_status'] == 'Running':
            log_err('Trafodion process is found, please stop it first')
        if content_dict['hbase'] == 'N/A':
            log_err('HBase is not found')
        if content_dict['hbase'] == 'N/S':
            log_err('HBase version is not supported')
        else:
            cfgs['hbase_ver'] = content_dict['hbase']
        if content_dict['home_dir']:  # trafodion user exists
            has_home_dir += 1
            cfgs['home_dir'] = content_dict['home_dir']
        if content_dict['hadoop_authentication'] == 'kerberos':
            cfgs['secure_hadoop'] = 'Y'
        else:
            cfgs['secure_hadoop'] = 'N'

    if offline:
        g('local_repo_dir')
        if not glob('%s/repodata' % cfgs['local_repo_dir']):
            log_err(
                'repodata directory not found, this is not a valid repository directory'
            )
        cfgs['offline_mode'] = 'Y'
        cfgs['repo_ip'] = socket.gethostbyname(socket.gethostname())
        ports = ParseInI(DEF_PORT_FILE, 'ports').load()
        cfgs['repo_http_port'] = ports['repo_http_port']

    pkg_list = ['apache-trafodion']
    # find tar in installer folder, if more than one found, use the first one
    for pkg in pkg_list:
        tar_loc = glob('%s/*%s*.tar.gz' % (INSTALLER_LOC, pkg))
        if tar_loc:
            cfgs['traf_package'] = tar_loc[0]
            break

    g('traf_package')
    cfgs['req_java8'] = 'N'

    # get basename and version from tar filename
    try:
        pattern = '|'.join(pkg_list)
        cfgs['traf_basename'], cfgs['traf_version'] = re.search(
            r'.*(%s).*-(\d\.\d\.\d).*' % pattern,
            cfgs['traf_package']).groups()
    except:
        log_err('Invalid package tar file')

    if not cfgs['traf_dirname']:
        cfgs['traf_dirname'] = '%s-%s' % (cfgs['traf_basename'],
                                          cfgs['traf_version'])
    g('traf_dirname')
    if not has_home_dir:
        g('traf_pwd')
    g('dcs_cnt_per_node')
    g('scratch_locs')
    g('traf_start')

    # kerberos
    if cfgs['secure_hadoop'].upper() == 'Y':
        g('kdc_server')
        g('admin_principal')
        g('kdcadmin_pwd')

    # ldap security
    g('ldap_security')
    if cfgs['ldap_security'].upper() == 'Y':
        g('db_root_user')
        g('ldap_hosts')
        g('ldap_port')
        g('ldap_identifiers')
        g('ldap_encrypt')
        if cfgs['ldap_encrypt'] == '1' or cfgs['ldap_encrypt'] == '2':
            g('ldap_certpath')
        elif cfgs['ldap_encrypt'] == '0':
            cfgs['ldap_certpath'] = ''
        else:
            log_err('Invalid ldap encryption level')

        g('ldap_userinfo')
        if cfgs['ldap_userinfo'] == 'Y':
            g('ldap_user')
            g('ldap_pwd')
        else:
            cfgs['ldap_user'] = ''
            cfgs['ldap_pwd'] = ''

    # DCS HA
    g('dcs_ha')
    cfgs['enable_ha'] = 'false'
    if cfgs['dcs_ha'].upper() == 'Y':
        g('dcs_floating_ip')
        g('dcs_interface')
        g('dcs_backup_nodes')
        # check dcs backup nodes should exist in node list
        if sorted(
                list(
                    set((cfgs['dcs_backup_nodes'] + ',' +
                         cfgs['node_list']).split(',')))) != sorted(
                             cfgs['node_list'].split(',')):
            log_err('Invalid DCS backup nodes, please pick up from node list')
        cfgs['enable_ha'] = 'true'

    if need_java_home:
        g('java_home')
    else:
        # don't overwrite user input java home
        if not cfgs['java_home']:
            cfgs['java_home'] = java_home

    if not silent:
        u.notify_user()
Exemple #4
0
def main():
    """ add_nodes main loop """
    cfgs = defaultdict(str)

    # handle parser option
    options = get_options()
    if not options.nodes:
        err_m('Must specifiy the node names using \'--nodes\' option')

    # get node list from user input
    new_nodes = expNumRe(options.nodes)
    if not new_nodes:
        err_m('Incorrect format')

    if options.pwd:
        pwd = getpass.getpass('Input remote host SSH Password: '******''

    u = UserInput(options, pwd)
    g = lambda n: u.get_input(n, cfgs[n], prompt_mode=prompt_mode)

    format_output('Trafodion Elastic Add Nodes Script')

    ### read configs from current trafodion_config and save it to cfgs
    if os.path.exists(TRAF_CFG_FILE):
        with open(TRAF_CFG_FILE, 'r') as f:
            traf_cfgs = f.readlines()
        for traf_cfg in traf_cfgs:
            if not traf_cfg.strip(): continue
            key, value = traf_cfg.replace('export ', '').split('=')
            value = value.replace('"', '')
            value = value.replace('\n', '')
            cfgs[key.lower()] = value
    else:
        err_m(
            'Cannot find %s, be sure to run this script on one of trafodion nodes'
            % TRAF_CFG_FILE)

    ### config check
    if not cfgs['hbase_lib_path'] or not cfgs['traf_version']:
        err_m('Missing parameters in Trafodion config file')

    if not cfgs['traf_home'] or not cmd_output(
            '%s ls %s' % (get_sudo_prefix(), cfgs['traf_home'])):
        err_m('Cannot find trafodion binary folder')
    # get trafodion user from traf_home path
    cfgs['traf_user'] = cfgs['traf_home'].split('/')[-2]
    if not cfgs['traf_user']:
        err_m('Cannot detect trafodion user')

    ### parse trafodion user's password
    cfgs['traf_shadow'] = cmd_output(
        "%s grep %s /etc/shadow |awk -F: '{print $2}'" %
        (get_sudo_prefix(), cfgs['traf_user']))

    def copy_files():
        # package trafodion binary into a tar file
        if not os.path.exists(TRAF_PKG_FILE):
            info(
                'Creating trafodion packages of %s, this will take a while ...'
                % cfgs['traf_home'])
            run_cmd_as_user(
                cfgs['traf_user'],
                'cd %s; tar czf %s ./* --exclude logs/* --exclude core.* --exclude tmp/*'
                % (cfgs['traf_home'], TRAF_PKG_FILE))
        else:
            info('Using existing trafodion package %s' % TRAF_PKG_FILE)

        info(
            'Copying trafodion files to new nodes, this will take a while ...')
        run_cmd('%s cp -rf %s/../.ssh /tmp' %
                (get_sudo_prefix(), cfgs['traf_home']))
        run_cmd('%s chmod -R 755 /tmp/.ssh' % get_sudo_prefix())
        traf_ssh_folder = '/tmp/.ssh'

        hbase_trx_file = cmd_output('ls %s/hbase-trx-*' %
                                    cfgs['hbase_lib_path'])
        trafodion_utility_file = cmd_output('ls %s/trafodion-utility-*' %
                                            cfgs['hbase_lib_path'])

        files = [
            TRAF_CFG_FILE, TRAF_PKG_FILE, traf_ssh_folder, hbase_trx_file,
            trafodion_utility_file
        ]

        remote_insts = [Remote(h, pwd=pwd) for h in new_nodes]
        threads = [
            Thread(target=r.copy, args=(files, '/tmp')) for r in remote_insts
        ]
        for thread in threads:
            thread.start()
        for thread in threads:
            thread.join()

        for r in remote_insts:
            if r.rc != 0: err_m('Failed to copy files to %s' % r.host)

    ### copy trafodion_config/trafodion-package/hbase-trx to the new nodes
    copy_files()

    ### set parameters
    if cfgs['enable_ha'].upper() == 'true':
        g('dcs_backup_nodes')
        cfgs['dcs_ha'] = 'Y'
    else:
        cfgs['dcs_ha'] = 'N'

    if cfgs['trafodion_enable_authentication'] == 'YES':
        cfgs['ldap_security'] = 'Y'
    else:
        cfgs['ldap_security'] = 'N'

    if cfgs['secure_hadoop'].upper() == 'Y':
        g('kdc_server')
        g('admin_principal')
        g('kdcadmin_pwd')

    #TODO: offline support
    cfgs['offline_mode'] = 'N'

    format_output('AddNode sub scripts Start')

    ### run addNode script on new nodes ###
    cfgs['node_list'] = ','.join(new_nodes)
    info('Running add node setup on new node(s) [%s] ...' % cfgs['node_list'])
    wrapper.run(cfgs, options, mode='addnodes_new', pwd=pwd)

    ### run dcs setup script on all nodes ###
    # get current trafodion node list
    current_nodes = cmd_output('%s su - %s -c "trafconf -name 2>/dev/null"' %
                               (get_sudo_prefix(), cfgs['traf_user'])).split()
    all_nodes = list(set(new_nodes + current_nodes))
    cfgs['node_list'] = ','.join(all_nodes)
    info('Running dcs setup on all node(s) [%s] ...' % cfgs['node_list'])
    wrapper.run(cfgs, options, mode='addnodes_all', pwd=pwd)

    ### do sqshell node add/up, sqregen
    # check if trafodion is running
    mon_process = cmd_output('ps -ef|grep -v grep|grep -c "monitor COLD"')
    if int(mon_process) > 0:
        info('Trafodion instance is up, adding node in sqshell ...')

        # cores=0-1;processors=2;roles=connection,aggregation,storage
        sqconfig_ptr = cmd_output(
            '%s su - %s -c "trafconf -node|sed -n 2p|cut -d\\\";\\\" -f3-5"' %
            (get_sudo_prefix(), cfgs['traf_user']))
        for node in new_nodes:
            info('adding node [%s] in sqshell ...' % node)
            run_cmd_as_user(
                cfgs['traf_user'],
                'echo "node add {node-name %s,%s}" | sqshell -a' %
                (node, sqconfig_ptr))
            run_cmd_as_user(cfgs['traf_user'],
                            'echo "node up %s" | sqshell -a' % node)
            ok('Node [%s] added!' % node)

        info('Starting DCS on new nodes ...')
        run_cmd_as_user(cfgs['traf_user'], 'dcsstart')
    else:
        info('Trafodion instance is not up, do sqgen ...')
        run_cmd_as_user(cfgs['traf_user'],
                        'rm %s/sqconfig.db' % cfgs['traf_var'])
        run_cmd_as_user(cfgs['traf_user'], 'sqgen')
        ok('Setup completed. You need to start trafodion manually')

    ### clean up
    run_cmd('%s rm -rf /tmp/.ssh' % get_sudo_prefix())
    run_cmd('%s rm -rf %s' % (get_sudo_prefix(), TRAF_PKG_FILE))

    format_output('AddNode Complete')
    info(
        'NOTICE: You need to manually restart RegionServer on newly added nodes to take effect'
    )
def main():
    """ db_installer main loop """
    global cfgs
    format_output('Trafodion Installation ToolKit')

    # handle parser option
    options = get_options()

    if options.build and options.cfgfile:
        log_err('Wrong parameter, cannot specify both --build and --config-file')

    if options.build and options.offline:
        log_err('Wrong parameter, cannot specify both --build and --offline')

    if options.cfgfile:
        if not os.path.exists(options.cfgfile):
            log_err('Cannot find config file \'%s\'' % options.cfgfile)
        config_file = options.cfgfile
    else:
        config_file = DBCFG_FILE

    if options.pwd:
        pwd = getpass.getpass('Input remote host SSH Password: '******''

    # not specified config file and default config file doesn't exist either
    p = ParseInI(config_file, 'dbconfigs')
    if options.build or (not os.path.exists(config_file)):
        if options.build: format_output('DryRun Start')
        user_input(options, prompt_mode=True, pwd=pwd)

        # save config file as json format
        print '\n** Generating config file to save configs ... \n'
        p.save(cfgs)
    # config file exists
    else:
        print '\n** Loading configs from config file ... \n'
        cfgs = p.load()
        if options.offline and cfgs['offline_mode'] != 'Y':
            log_err('To enable offline mode, must set "offline_mode = Y" in config file')
        user_input(options, prompt_mode=False, pwd=pwd)

    if options.reinstall:
        cfgs['reinstall'] = 'Y'

    if options.offline:
        http_start(cfgs['local_repo_dir'], cfgs['repo_http_port'])
    else:
        cfgs['offline_mode'] = 'N'

    if not options.build:
        format_output('Installation Start')

        ### perform actual installation ###
        wrapper.run(cfgs, options, pwd=pwd)

        format_output('Installation Complete')

        if options.offline: http_stop()

        # rename default config file when successfully installed
        # so next time user can input new variables for a new install
        # or specify the backup config file to install again
        try:
            # only rename default config file
            ts = time.strftime('%y%m%d_%H%M')
            if config_file == DBCFG_FILE and os.path.exists(config_file):
                os.rename(config_file, config_file + '.bak' + ts)
        except OSError:
            log_err('Cannot rename config file')
    else:
        format_output('DryRun Complete')

    # remove temp config file
    if os.path.exists(DBCFG_TMP_FILE): os.remove(DBCFG_TMP_FILE)
def user_input(options, prompt_mode=True, pwd=''):
    """ get user's input and check input value """
    global cfgs

    apache = True if hasattr(options, 'apache') and options.apache else False
    offline = True if hasattr(options, 'offline') and options.offline else False
    silent = True if hasattr(options, 'silent') and options.silent else False

    # load from temp config file if in prompt mode
    if os.path.exists(DBCFG_TMP_FILE) and prompt_mode == True:
        tp = ParseInI(DBCFG_TMP_FILE, 'dbconfigs')
        cfgs = tp.load()
        if not cfgs:
            # set cfgs to defaultdict again
            cfgs = defaultdict(str)

    u = UserInput(options, pwd)
    g = lambda n: u.get_input(n, cfgs[n], prompt_mode=prompt_mode)

    ### begin user input ###
    if apache:
        g('node_list')
        node_lists = expNumRe(cfgs['node_list'])

        # check if node list is expanded successfully
        if len([1 for node in node_lists if '[' in node]):
            log_err('Failed to expand node list, please check your input.')
        cfgs['node_list'] = ','.join(node_lists)
        g('hadoop_home')
        g('hbase_home')
        g('hive_home')
        g('hdfs_user')
        g('hbase_user')
        g('first_rsnode')
        cfgs['distro'] = 'APACHE'
    else:
        g('mgr_url')
        if not ('http:' in cfgs['mgr_url'] or 'https:' in cfgs['mgr_url']):
            cfgs['mgr_url'] = 'http://' + cfgs['mgr_url']

        # set cloudera default port 7180 if not provided by user
        if not re.search(r':\d+', cfgs['mgr_url']):
            cfgs['mgr_url'] += ':7180'

        g('mgr_user')
        g('mgr_pwd')

        validate_url_v1 = '%s/api/v1/clusters' % cfgs['mgr_url']
        content = ParseHttp(cfgs['mgr_user'], cfgs['mgr_pwd']).get(validate_url_v1)

        # currently only CDH support multiple clusters
        # so if condition is true, it must be CDH cluster
        if len(content['items']) > 1:
            cluster_names = []
            # loop all managed clusters
            for cluster in content['items']:
                cluster_names.append(cluster['name'])

            for index, name in enumerate(cluster_names):
                print str(index + 1) + '. ' + name
            g('cluster_no')
            c_index = int(cfgs['cluster_no']) - 1
            if c_index < 0 or c_index >= len(cluster_names):
                log_err('Incorrect number')
            cluster_name = cluster_names[int(c_index)]
        else:
            try:
                cluster_name = content['items'][0]['name']
            except (IndexError, KeyError):
                try:
                    cluster_name = content['items'][0]['Clusters']['cluster_name']
                except (IndexError, KeyError):
                    log_err('Failed to get cluster info from management url')


        hadoop_discover = HadoopDiscover(cfgs['mgr_user'], cfgs['mgr_pwd'], cfgs['mgr_url'], cluster_name)
        rsnodes = hadoop_discover.get_rsnodes()
        hadoop_users = hadoop_discover.get_hadoop_users()

        cfgs['distro'] = hadoop_discover.distro
        cfgs['hbase_lib_path'] = hadoop_discover.get_hbase_lib_path()
        cfgs['hbase_service_name'] = hadoop_discover.get_hbase_srvname()
        cfgs['hdfs_service_name'] = hadoop_discover.get_hdfs_srvname()
        cfgs['zookeeper_service_name'] = hadoop_discover.get_zookeeper_srvname()

        cfgs['cluster_name'] = cluster_name.replace(' ', '%20')
        cfgs['hdfs_user'] = hadoop_users['hdfs_user']
        cfgs['hbase_user'] = hadoop_users['hbase_user']
        cfgs['node_list'] = ','.join(rsnodes)
        cfgs['first_rsnode'] = rsnodes[0] # first regionserver node

    # check node connection
    for node in cfgs['node_list'].split(','):
        rc = os.system('ping -c 1 %s >/dev/null 2>&1' % node)
        if rc: log_err('Cannot ping %s, please check network connection and /etc/hosts' % node)

    # set some system default configs
    cfgs['config_created_date'] = time.strftime('%Y/%m/%d %H:%M %Z')
    cfgs['traf_user'] = TRAF_USER
    if apache:
        cfgs['hbase_xml_file'] = cfgs['hbase_home'] + '/conf/hbase-site.xml'
        cfgs['hdfs_xml_file'] = cfgs['hadoop_home'] + '/etc/hadoop/hdfs-site.xml'
    else:
        cfgs['hbase_xml_file'] = DEF_HBASE_XML_FILE

    ### discover system settings, return a dict
    system_discover = wrapper.run(cfgs, options, mode='discover', pwd=pwd)

    # check discover results, return error if fails on any sinlge node
    need_java_home = 0
    has_home_dir = 0
    for result in system_discover:
        host, content = result.items()[0]
        content_dict = json.loads(content)

        java_home = content_dict['default_java']
        if java_home == 'N/A':
            need_java_home += 1
        if content_dict['linux'] == 'N/A':
            log_err('Unsupported Linux version')
        if content_dict['firewall_status'] == 'Running':
            info('Firewall is running, please make sure the ports used by Trafodion are open')
        if content_dict['traf_status'] == 'Running':
            log_err('Trafodion process is found, please stop it first')
        if content_dict['hbase'] == 'N/A':
            log_err('HBase is not found')
        if content_dict['hbase'] == 'N/S':
            log_err('HBase version is not supported')
        else:
            cfgs['hbase_ver'] = content_dict['hbase']
        if content_dict['home_dir']: # trafodion user exists
            has_home_dir += 1
            cfgs['home_dir'] = content_dict['home_dir']
        if content_dict['hadoop_authentication'] == 'kerberos':
            cfgs['secure_hadoop'] = 'Y'
        else:
            cfgs['secure_hadoop'] = 'N'

    if offline:
        g('local_repo_dir')
        if not glob('%s/repodata' % cfgs['local_repo_dir']):
            log_err('repodata directory not found, this is not a valid repository directory')
        cfgs['offline_mode'] = 'Y'
        cfgs['repo_ip'] = socket.gethostbyname(socket.gethostname())
        ports = ParseInI(DEF_PORT_FILE, 'ports').load()
        cfgs['repo_http_port'] = ports['repo_http_port']

    pkg_list = ['apache-trafodion']
    # find tar in installer folder, if more than one found, use the first one
    for pkg in pkg_list:
        tar_loc = glob('%s/*%s*.tar.gz' % (INSTALLER_LOC, pkg))
        if tar_loc:
            cfgs['traf_package'] = tar_loc[0]
            break

    g('traf_package')
    cfgs['req_java8'] = 'N'

    # get basename and version from tar filename
    try:
        pattern = '|'.join(pkg_list)
        cfgs['traf_basename'], cfgs['traf_version'] = re.search(r'.*(%s).*-(\d\.\d\.\d).*' % pattern, cfgs['traf_package']).groups()
    except:
        log_err('Invalid package tar file')

    if not cfgs['traf_dirname']:
        cfgs['traf_dirname'] = '%s-%s' % (cfgs['traf_basename'], cfgs['traf_version'])
    g('traf_dirname')
    if not has_home_dir:
        g('traf_pwd')
    g('dcs_cnt_per_node')
    g('scratch_locs')
    g('traf_start')

    # kerberos
    if cfgs['secure_hadoop'].upper() == 'Y':
        g('kdc_server')
        g('admin_principal')
        g('kdcadmin_pwd')

    # ldap security
    g('ldap_security')
    if cfgs['ldap_security'].upper() == 'Y':
        g('db_root_user')
        g('ldap_hosts')
        g('ldap_port')
        g('ldap_identifiers')
        g('ldap_encrypt')
        if  cfgs['ldap_encrypt'] == '1' or cfgs['ldap_encrypt'] == '2':
            g('ldap_certpath')
        elif cfgs['ldap_encrypt'] == '0':
            cfgs['ldap_certpath'] = ''
        else:
            log_err('Invalid ldap encryption level')

        g('ldap_userinfo')
        if cfgs['ldap_userinfo'] == 'Y':
            g('ldap_user')
            g('ldap_pwd')
        else:
            cfgs['ldap_user'] = ''
            cfgs['ldap_pwd'] = ''

    # DCS HA
    g('dcs_ha')
    cfgs['enable_ha'] = 'false'
    if cfgs['dcs_ha'].upper() == 'Y':
        g('dcs_floating_ip')
        g('dcs_interface')
        g('dcs_backup_nodes')
        # check dcs backup nodes should exist in node list
        if sorted(list(set((cfgs['dcs_backup_nodes'] + ',' + cfgs['node_list']).split(',')))) != sorted(cfgs['node_list'].split(',')):
            log_err('Invalid DCS backup nodes, please pick up from node list')
        cfgs['enable_ha'] = 'true'

    if need_java_home:
        g('java_home')
    else:
        # don't overwrite user input java home
        if not cfgs['java_home']:
            cfgs['java_home'] = java_home


    if not silent:
        u.notify_user()
def main():
    """ add_nodes main loop """
    cfgs = defaultdict(str)

    # handle parser option
    options = get_options()
    if not options.nodes:
        err_m('Must specifiy the node names using \'--nodes\' option')

    # get node list from user input
    new_nodes = expNumRe(options.nodes)
    if not new_nodes:
        err_m('Incorrect format')

    if options.pwd:
        pwd = getpass.getpass('Input remote host SSH Password: '******''

    u = UserInput(options, pwd)
    g = lambda n: u.get_input(n, cfgs[n], prompt_mode=prompt_mode)

    format_output('Trafodion Elastic Add Nodes Script')

    ### read configs from current trafodion_config and save it to cfgs
    if os.path.exists(TRAF_CFG_FILE):
        with open(TRAF_CFG_FILE, 'r') as f:
            traf_cfgs = f.readlines()
        for traf_cfg in traf_cfgs:
            if not traf_cfg.strip(): continue
            key, value = traf_cfg.replace('export ', '').split('=')
            value = value.replace('"','')
            value = value.replace('\n','')
            cfgs[key.lower()] = value
    else:
        err_m('Cannot find %s, be sure to run this script on one of trafodion nodes' % TRAF_CFG_FILE)

    ### config check
    if not cfgs['hbase_lib_path'] or not cfgs['traf_version']:
        err_m('Missing parameters in Trafodion config file')

    if not cfgs['traf_home'] or not cmd_output('%s ls %s' % (get_sudo_prefix(), cfgs['traf_home'])):
        err_m('Cannot find trafodion binary folder')
    # get trafodion user from traf_home path
    cfgs['traf_user'] = cfgs['traf_home'].split('/')[-2]
    if not cfgs['traf_user']:
        err_m('Cannot detect trafodion user')

    ### parse trafodion user's password
    cfgs['traf_shadow'] = cmd_output("%s grep %s /etc/shadow |awk -F: '{print $2}'" % (get_sudo_prefix(), cfgs['traf_user']))

    def copy_files():
        # package trafodion binary into a tar file
        if not os.path.exists(TRAF_PKG_FILE):
            info('Creating trafodion packages of %s, this will take a while ...' % cfgs['traf_home'])
            run_cmd_as_user(cfgs['traf_user'], 'cd %s; tar czf %s ./* --exclude logs/* --exclude core.* --exclude tmp/*' % (cfgs['traf_home'], TRAF_PKG_FILE))
        else:
            info('Using existing trafodion package %s' % TRAF_PKG_FILE)

        info('Copying trafodion files to new nodes, this will take a while ...')
        run_cmd('%s cp -rf %s/../.ssh /tmp' % (get_sudo_prefix(), cfgs['traf_home']))
        run_cmd('%s chmod -R 755 /tmp/.ssh' % get_sudo_prefix())
        traf_ssh_folder = '/tmp/.ssh'

        hbase_trx_file = cmd_output('ls %s/hbase-trx-*' % cfgs['hbase_lib_path'])
        trafodion_utility_file = cmd_output('ls %s/trafodion-utility-*' % cfgs['hbase_lib_path'])

        files = [TRAF_CFG_FILE, TRAF_PKG_FILE, traf_ssh_folder, hbase_trx_file, trafodion_utility_file]

        remote_insts = [Remote(h, pwd=pwd) for h in new_nodes]
        threads = [Thread(target=r.copy, args=(files, '/tmp')) for r in remote_insts]
        for thread in threads: thread.start()
        for thread in threads: thread.join()

        for r in remote_insts:
            if r.rc != 0: err_m('Failed to copy files to %s' % r.host)

    ### copy trafodion_config/trafodion-package/hbase-trx to the new nodes
    copy_files()

    ### set parameters
    if cfgs['enable_ha'].upper() == 'true':
        g('dcs_backup_nodes')
        cfgs['dcs_ha'] = 'Y'
    else:
        cfgs['dcs_ha'] = 'N'

    if cfgs['trafodion_enable_authentication'] == 'YES':
        cfgs['ldap_security'] = 'Y'
    else:
        cfgs['ldap_security'] = 'N'

    if cfgs['secure_hadoop'].upper() == 'Y':
        g('kdc_server')
        g('admin_principal')
        g('kdcadmin_pwd')

    #TODO: offline support
    cfgs['offline_mode'] = 'N'


    format_output('AddNode sub scripts Start')

    ### run addNode script on new nodes ###
    cfgs['node_list'] = ','.join(new_nodes)
    info('Running add node setup on new node(s) [%s] ...' % cfgs['node_list'])
    wrapper.run(cfgs, options, mode='addnodes_new', pwd=pwd)

    ### run dcs setup script on all nodes ###
    # get current trafodion node list
    current_nodes = cmd_output('%s su - %s -c "trafconf -name 2>/dev/null"' % (get_sudo_prefix(), cfgs['traf_user'])).split()
    all_nodes = list(set(new_nodes + current_nodes))
    cfgs['node_list'] = ','.join(all_nodes)
    info('Running dcs setup on all node(s) [%s] ...' % cfgs['node_list'])
    wrapper.run(cfgs, options, mode='addnodes_all', pwd=pwd)

    ### do sqshell node add/up, sqregen
    # check if trafodion is running
    mon_process = cmd_output('ps -ef|grep -v grep|grep -c "monitor COLD"')
    if int(mon_process) > 0:
        info('Trafodion instance is up, adding node in sqshell ...')

        # cores=0-1;processors=2;roles=connection,aggregation,storage
        sqconfig_ptr = cmd_output('%s su - %s -c "trafconf -node|sed -n 2p|cut -d\\\";\\\" -f3-5"' % (get_sudo_prefix(), cfgs['traf_user']))
        for node in new_nodes:
            info('adding node [%s] in sqshell ...' % node)
            run_cmd_as_user(cfgs['traf_user'], 'echo "node add {node-name %s,%s}" | sqshell -a' % (node, sqconfig_ptr))
            run_cmd_as_user(cfgs['traf_user'], 'echo "node up %s" | sqshell -a' % node)
            ok('Node [%s] added!' % node)

        info('Starting DCS on new nodes ...')
        run_cmd_as_user(cfgs['traf_user'], 'dcsstart')
    else:
        info('Trafodion instance is not up, do sqgen ...')
        run_cmd_as_user(cfgs['traf_user'], 'rm %s/sql/scripts/sqconfig.db' % cfgs['traf_home'])
        run_cmd_as_user(cfgs['traf_user'], 'sqgen')
        ok('Setup completed. You need to start trafodion manually')

    ### clean up
    run_cmd('%s rm -rf /tmp/.ssh' % get_sudo_prefix())
    run_cmd('%s rm -rf %s' % (get_sudo_prefix(), TRAF_PKG_FILE))

    format_output('AddNode Complete')
    info('NOTICE: You need to manually restart RegionServer on newly added nodes to take effect')