def main(): options = get_options() cfgs = defaultdict(str) if options.cfgfile: if not os.path.exists(options.cfgfile): err_m('Cannot find config file \'%s\'' % options.cfgfile) config_file = options.cfgfile else: config_file = DBCFG_FILE if options.pwd: pwd = getpass.getpass('Input remote host SSH Password: '******'' if os.path.exists(config_file): cfgs = ParseInI(config_file, 'dbconfigs').load() else: node_lists = expNumRe( raw_input( 'Enter list of Nodes separated by comma, support numeric RE, i.e. n[01-12]: ' )) # check if node list is expanded successfully if len([1 for node in node_lists if '[' in node]): err('Failed to expand node list, please check your input.') cfgs['node_list'] = ','.join(node_lists) results = wrapper.run(cfgs, options, mode='discover', pwd=pwd) format_output('Discover results') if len(results) > 4: output = output_row(results) else: output = output_column(results) print output with open('discover_result', 'w') as f: f.write('Discover Date: %s\n' % time.strftime('%Y-%m-%d %H:%M')) f.write(output)
def main(): """ db_installer main loop """ global cfgs format_output('Trafodion Installation ToolKit') # handle parser option options = get_options() if options.build and options.cfgfile: log_err( 'Wrong parameter, cannot specify both --build and --config-file') if options.build and options.offline: log_err('Wrong parameter, cannot specify both --build and --offline') if options.cfgfile: if not os.path.exists(options.cfgfile): log_err('Cannot find config file \'%s\'' % options.cfgfile) config_file = options.cfgfile else: config_file = DBCFG_FILE if options.pwd: pwd = getpass.getpass('Input remote host SSH Password: '******'' # not specified config file and default config file doesn't exist either p = ParseInI(config_file, 'dbconfigs') if options.build or (not os.path.exists(config_file)): if options.build: format_output('DryRun Start') user_input(options, prompt_mode=True, pwd=pwd) # save config file as json format print '\n** Generating config file to save configs ... \n' p.save(cfgs) # config file exists else: print '\n** Loading configs from config file ... \n' cfgs = p.load() if options.offline and cfgs['offline_mode'] != 'Y': log_err( 'To enable offline mode, must set "offline_mode = Y" in config file' ) user_input(options, prompt_mode=False, pwd=pwd) if options.reinstall: cfgs['reinstall'] = 'Y' if options.offline: http_start(cfgs['local_repo_dir'], cfgs['repo_http_port']) else: cfgs['offline_mode'] = 'N' if not options.build: format_output('Installation Start') ### perform actual installation ### wrapper.run(cfgs, options, pwd=pwd) format_output('Installation Complete') if options.offline: http_stop() # rename default config file when successfully installed # so next time user can input new variables for a new install # or specify the backup config file to install again try: # only rename default config file ts = time.strftime('%y%m%d_%H%M') if config_file == DBCFG_FILE and os.path.exists(config_file): os.rename(config_file, config_file + '.bak' + ts) except OSError: log_err('Cannot rename config file') else: format_output('DryRun Complete') # remove temp config file if os.path.exists(DBCFG_TMP_FILE): os.remove(DBCFG_TMP_FILE)
def user_input(options, prompt_mode=True, pwd=''): """ get user's input and check input value """ global cfgs apache = True if hasattr(options, 'apache') and options.apache else False offline = True if hasattr(options, 'offline') and options.offline else False silent = True if hasattr(options, 'silent') and options.silent else False # load from temp config file if in prompt mode if os.path.exists(DBCFG_TMP_FILE) and prompt_mode == True: tp = ParseInI(DBCFG_TMP_FILE, 'dbconfigs') cfgs = tp.load() if not cfgs: # set cfgs to defaultdict again cfgs = defaultdict(str) u = UserInput(options, pwd) g = lambda n: u.get_input(n, cfgs[n], prompt_mode=prompt_mode) ### begin user input ### if apache: g('node_list') node_lists = expNumRe(cfgs['node_list']) # check if node list is expanded successfully if len([1 for node in node_lists if '[' in node]): log_err('Failed to expand node list, please check your input.') cfgs['node_list'] = ','.join(node_lists) g('hadoop_home') g('hbase_home') g('hive_home') g('hdfs_user') g('hbase_user') g('first_rsnode') cfgs['distro'] = 'APACHE' else: g('mgr_url') if not ('http:' in cfgs['mgr_url'] or 'https:' in cfgs['mgr_url']): cfgs['mgr_url'] = 'http://' + cfgs['mgr_url'] # set cloudera default port 7180 if not provided by user if not re.search(r':\d+', cfgs['mgr_url']): cfgs['mgr_url'] += ':7180' g('mgr_user') g('mgr_pwd') validate_url_v1 = '%s/api/v1/clusters' % cfgs['mgr_url'] content = ParseHttp(cfgs['mgr_user'], cfgs['mgr_pwd']).get(validate_url_v1) # currently only CDH support multiple clusters # so if condition is true, it must be CDH cluster if len(content['items']) > 1: cluster_names = [] # loop all managed clusters for cluster in content['items']: cluster_names.append(cluster['name']) for index, name in enumerate(cluster_names): print str(index + 1) + '. ' + name g('cluster_no') c_index = int(cfgs['cluster_no']) - 1 if c_index < 0 or c_index >= len(cluster_names): log_err('Incorrect number') cluster_name = cluster_names[int(c_index)] else: try: cluster_name = content['items'][0]['name'] except (IndexError, KeyError): try: cluster_name = content['items'][0]['Clusters'][ 'cluster_name'] except (IndexError, KeyError): log_err('Failed to get cluster info from management url') hadoop_discover = HadoopDiscover(cfgs['mgr_user'], cfgs['mgr_pwd'], cfgs['mgr_url'], cluster_name) rsnodes = hadoop_discover.get_rsnodes() hadoop_users = hadoop_discover.get_hadoop_users() cfgs['distro'] = hadoop_discover.distro cfgs['hbase_lib_path'] = hadoop_discover.get_hbase_lib_path() cfgs['hbase_service_name'] = hadoop_discover.get_hbase_srvname() cfgs['hdfs_service_name'] = hadoop_discover.get_hdfs_srvname() cfgs['zookeeper_service_name'] = hadoop_discover.get_zookeeper_srvname( ) cfgs['cluster_name'] = cluster_name.replace(' ', '%20') cfgs['hdfs_user'] = hadoop_users['hdfs_user'] cfgs['hbase_user'] = hadoop_users['hbase_user'] cfgs['node_list'] = ','.join(rsnodes) cfgs['first_rsnode'] = rsnodes[0] # first regionserver node # check node connection for node in cfgs['node_list'].split(','): rc = os.system('ping -c 1 %s >/dev/null 2>&1' % node) if rc: log_err( 'Cannot ping %s, please check network connection and /etc/hosts' % node) # set some system default configs cfgs['config_created_date'] = time.strftime('%Y/%m/%d %H:%M %Z') cfgs['traf_user'] = TRAF_USER if apache: cfgs['hbase_xml_file'] = cfgs['hbase_home'] + '/conf/hbase-site.xml' cfgs['hdfs_xml_file'] = cfgs[ 'hadoop_home'] + '/etc/hadoop/hdfs-site.xml' else: cfgs['hbase_xml_file'] = DEF_HBASE_XML_FILE ### discover system settings, return a dict system_discover = wrapper.run(cfgs, options, mode='discover', pwd=pwd) # check discover results, return error if fails on any sinlge node need_java_home = 0 has_home_dir = 0 for result in system_discover: host, content = result.items()[0] content_dict = json.loads(content) java_home = content_dict['default_java'] if java_home == 'N/A': need_java_home += 1 if content_dict['linux'] == 'N/A': log_err('Unsupported Linux version') if content_dict['firewall_status'] == 'Running': info( 'Firewall is running, please make sure the ports used by Trafodion are open' ) if content_dict['traf_status'] == 'Running': log_err('Trafodion process is found, please stop it first') if content_dict['hbase'] == 'N/A': log_err('HBase is not found') if content_dict['hbase'] == 'N/S': log_err('HBase version is not supported') else: cfgs['hbase_ver'] = content_dict['hbase'] if content_dict['home_dir']: # trafodion user exists has_home_dir += 1 cfgs['home_dir'] = content_dict['home_dir'] if content_dict['hadoop_authentication'] == 'kerberos': cfgs['secure_hadoop'] = 'Y' else: cfgs['secure_hadoop'] = 'N' if offline: g('local_repo_dir') if not glob('%s/repodata' % cfgs['local_repo_dir']): log_err( 'repodata directory not found, this is not a valid repository directory' ) cfgs['offline_mode'] = 'Y' cfgs['repo_ip'] = socket.gethostbyname(socket.gethostname()) ports = ParseInI(DEF_PORT_FILE, 'ports').load() cfgs['repo_http_port'] = ports['repo_http_port'] pkg_list = ['apache-trafodion'] # find tar in installer folder, if more than one found, use the first one for pkg in pkg_list: tar_loc = glob('%s/*%s*.tar.gz' % (INSTALLER_LOC, pkg)) if tar_loc: cfgs['traf_package'] = tar_loc[0] break g('traf_package') cfgs['req_java8'] = 'N' # get basename and version from tar filename try: pattern = '|'.join(pkg_list) cfgs['traf_basename'], cfgs['traf_version'] = re.search( r'.*(%s).*-(\d\.\d\.\d).*' % pattern, cfgs['traf_package']).groups() except: log_err('Invalid package tar file') if not cfgs['traf_dirname']: cfgs['traf_dirname'] = '%s-%s' % (cfgs['traf_basename'], cfgs['traf_version']) g('traf_dirname') if not has_home_dir: g('traf_pwd') g('dcs_cnt_per_node') g('scratch_locs') g('traf_start') # kerberos if cfgs['secure_hadoop'].upper() == 'Y': g('kdc_server') g('admin_principal') g('kdcadmin_pwd') # ldap security g('ldap_security') if cfgs['ldap_security'].upper() == 'Y': g('db_root_user') g('ldap_hosts') g('ldap_port') g('ldap_identifiers') g('ldap_encrypt') if cfgs['ldap_encrypt'] == '1' or cfgs['ldap_encrypt'] == '2': g('ldap_certpath') elif cfgs['ldap_encrypt'] == '0': cfgs['ldap_certpath'] = '' else: log_err('Invalid ldap encryption level') g('ldap_userinfo') if cfgs['ldap_userinfo'] == 'Y': g('ldap_user') g('ldap_pwd') else: cfgs['ldap_user'] = '' cfgs['ldap_pwd'] = '' # DCS HA g('dcs_ha') cfgs['enable_ha'] = 'false' if cfgs['dcs_ha'].upper() == 'Y': g('dcs_floating_ip') g('dcs_interface') g('dcs_backup_nodes') # check dcs backup nodes should exist in node list if sorted( list( set((cfgs['dcs_backup_nodes'] + ',' + cfgs['node_list']).split(',')))) != sorted( cfgs['node_list'].split(',')): log_err('Invalid DCS backup nodes, please pick up from node list') cfgs['enable_ha'] = 'true' if need_java_home: g('java_home') else: # don't overwrite user input java home if not cfgs['java_home']: cfgs['java_home'] = java_home if not silent: u.notify_user()
def main(): """ add_nodes main loop """ cfgs = defaultdict(str) # handle parser option options = get_options() if not options.nodes: err_m('Must specifiy the node names using \'--nodes\' option') # get node list from user input new_nodes = expNumRe(options.nodes) if not new_nodes: err_m('Incorrect format') if options.pwd: pwd = getpass.getpass('Input remote host SSH Password: '******'' u = UserInput(options, pwd) g = lambda n: u.get_input(n, cfgs[n], prompt_mode=prompt_mode) format_output('Trafodion Elastic Add Nodes Script') ### read configs from current trafodion_config and save it to cfgs if os.path.exists(TRAF_CFG_FILE): with open(TRAF_CFG_FILE, 'r') as f: traf_cfgs = f.readlines() for traf_cfg in traf_cfgs: if not traf_cfg.strip(): continue key, value = traf_cfg.replace('export ', '').split('=') value = value.replace('"', '') value = value.replace('\n', '') cfgs[key.lower()] = value else: err_m( 'Cannot find %s, be sure to run this script on one of trafodion nodes' % TRAF_CFG_FILE) ### config check if not cfgs['hbase_lib_path'] or not cfgs['traf_version']: err_m('Missing parameters in Trafodion config file') if not cfgs['traf_home'] or not cmd_output( '%s ls %s' % (get_sudo_prefix(), cfgs['traf_home'])): err_m('Cannot find trafodion binary folder') # get trafodion user from traf_home path cfgs['traf_user'] = cfgs['traf_home'].split('/')[-2] if not cfgs['traf_user']: err_m('Cannot detect trafodion user') ### parse trafodion user's password cfgs['traf_shadow'] = cmd_output( "%s grep %s /etc/shadow |awk -F: '{print $2}'" % (get_sudo_prefix(), cfgs['traf_user'])) def copy_files(): # package trafodion binary into a tar file if not os.path.exists(TRAF_PKG_FILE): info( 'Creating trafodion packages of %s, this will take a while ...' % cfgs['traf_home']) run_cmd_as_user( cfgs['traf_user'], 'cd %s; tar czf %s ./* --exclude logs/* --exclude core.* --exclude tmp/*' % (cfgs['traf_home'], TRAF_PKG_FILE)) else: info('Using existing trafodion package %s' % TRAF_PKG_FILE) info( 'Copying trafodion files to new nodes, this will take a while ...') run_cmd('%s cp -rf %s/../.ssh /tmp' % (get_sudo_prefix(), cfgs['traf_home'])) run_cmd('%s chmod -R 755 /tmp/.ssh' % get_sudo_prefix()) traf_ssh_folder = '/tmp/.ssh' hbase_trx_file = cmd_output('ls %s/hbase-trx-*' % cfgs['hbase_lib_path']) trafodion_utility_file = cmd_output('ls %s/trafodion-utility-*' % cfgs['hbase_lib_path']) files = [ TRAF_CFG_FILE, TRAF_PKG_FILE, traf_ssh_folder, hbase_trx_file, trafodion_utility_file ] remote_insts = [Remote(h, pwd=pwd) for h in new_nodes] threads = [ Thread(target=r.copy, args=(files, '/tmp')) for r in remote_insts ] for thread in threads: thread.start() for thread in threads: thread.join() for r in remote_insts: if r.rc != 0: err_m('Failed to copy files to %s' % r.host) ### copy trafodion_config/trafodion-package/hbase-trx to the new nodes copy_files() ### set parameters if cfgs['enable_ha'].upper() == 'true': g('dcs_backup_nodes') cfgs['dcs_ha'] = 'Y' else: cfgs['dcs_ha'] = 'N' if cfgs['trafodion_enable_authentication'] == 'YES': cfgs['ldap_security'] = 'Y' else: cfgs['ldap_security'] = 'N' if cfgs['secure_hadoop'].upper() == 'Y': g('kdc_server') g('admin_principal') g('kdcadmin_pwd') #TODO: offline support cfgs['offline_mode'] = 'N' format_output('AddNode sub scripts Start') ### run addNode script on new nodes ### cfgs['node_list'] = ','.join(new_nodes) info('Running add node setup on new node(s) [%s] ...' % cfgs['node_list']) wrapper.run(cfgs, options, mode='addnodes_new', pwd=pwd) ### run dcs setup script on all nodes ### # get current trafodion node list current_nodes = cmd_output('%s su - %s -c "trafconf -name 2>/dev/null"' % (get_sudo_prefix(), cfgs['traf_user'])).split() all_nodes = list(set(new_nodes + current_nodes)) cfgs['node_list'] = ','.join(all_nodes) info('Running dcs setup on all node(s) [%s] ...' % cfgs['node_list']) wrapper.run(cfgs, options, mode='addnodes_all', pwd=pwd) ### do sqshell node add/up, sqregen # check if trafodion is running mon_process = cmd_output('ps -ef|grep -v grep|grep -c "monitor COLD"') if int(mon_process) > 0: info('Trafodion instance is up, adding node in sqshell ...') # cores=0-1;processors=2;roles=connection,aggregation,storage sqconfig_ptr = cmd_output( '%s su - %s -c "trafconf -node|sed -n 2p|cut -d\\\";\\\" -f3-5"' % (get_sudo_prefix(), cfgs['traf_user'])) for node in new_nodes: info('adding node [%s] in sqshell ...' % node) run_cmd_as_user( cfgs['traf_user'], 'echo "node add {node-name %s,%s}" | sqshell -a' % (node, sqconfig_ptr)) run_cmd_as_user(cfgs['traf_user'], 'echo "node up %s" | sqshell -a' % node) ok('Node [%s] added!' % node) info('Starting DCS on new nodes ...') run_cmd_as_user(cfgs['traf_user'], 'dcsstart') else: info('Trafodion instance is not up, do sqgen ...') run_cmd_as_user(cfgs['traf_user'], 'rm %s/sqconfig.db' % cfgs['traf_var']) run_cmd_as_user(cfgs['traf_user'], 'sqgen') ok('Setup completed. You need to start trafodion manually') ### clean up run_cmd('%s rm -rf /tmp/.ssh' % get_sudo_prefix()) run_cmd('%s rm -rf %s' % (get_sudo_prefix(), TRAF_PKG_FILE)) format_output('AddNode Complete') info( 'NOTICE: You need to manually restart RegionServer on newly added nodes to take effect' )
def main(): """ db_installer main loop """ global cfgs format_output('Trafodion Installation ToolKit') # handle parser option options = get_options() if options.build and options.cfgfile: log_err('Wrong parameter, cannot specify both --build and --config-file') if options.build and options.offline: log_err('Wrong parameter, cannot specify both --build and --offline') if options.cfgfile: if not os.path.exists(options.cfgfile): log_err('Cannot find config file \'%s\'' % options.cfgfile) config_file = options.cfgfile else: config_file = DBCFG_FILE if options.pwd: pwd = getpass.getpass('Input remote host SSH Password: '******'' # not specified config file and default config file doesn't exist either p = ParseInI(config_file, 'dbconfigs') if options.build or (not os.path.exists(config_file)): if options.build: format_output('DryRun Start') user_input(options, prompt_mode=True, pwd=pwd) # save config file as json format print '\n** Generating config file to save configs ... \n' p.save(cfgs) # config file exists else: print '\n** Loading configs from config file ... \n' cfgs = p.load() if options.offline and cfgs['offline_mode'] != 'Y': log_err('To enable offline mode, must set "offline_mode = Y" in config file') user_input(options, prompt_mode=False, pwd=pwd) if options.reinstall: cfgs['reinstall'] = 'Y' if options.offline: http_start(cfgs['local_repo_dir'], cfgs['repo_http_port']) else: cfgs['offline_mode'] = 'N' if not options.build: format_output('Installation Start') ### perform actual installation ### wrapper.run(cfgs, options, pwd=pwd) format_output('Installation Complete') if options.offline: http_stop() # rename default config file when successfully installed # so next time user can input new variables for a new install # or specify the backup config file to install again try: # only rename default config file ts = time.strftime('%y%m%d_%H%M') if config_file == DBCFG_FILE and os.path.exists(config_file): os.rename(config_file, config_file + '.bak' + ts) except OSError: log_err('Cannot rename config file') else: format_output('DryRun Complete') # remove temp config file if os.path.exists(DBCFG_TMP_FILE): os.remove(DBCFG_TMP_FILE)
def user_input(options, prompt_mode=True, pwd=''): """ get user's input and check input value """ global cfgs apache = True if hasattr(options, 'apache') and options.apache else False offline = True if hasattr(options, 'offline') and options.offline else False silent = True if hasattr(options, 'silent') and options.silent else False # load from temp config file if in prompt mode if os.path.exists(DBCFG_TMP_FILE) and prompt_mode == True: tp = ParseInI(DBCFG_TMP_FILE, 'dbconfigs') cfgs = tp.load() if not cfgs: # set cfgs to defaultdict again cfgs = defaultdict(str) u = UserInput(options, pwd) g = lambda n: u.get_input(n, cfgs[n], prompt_mode=prompt_mode) ### begin user input ### if apache: g('node_list') node_lists = expNumRe(cfgs['node_list']) # check if node list is expanded successfully if len([1 for node in node_lists if '[' in node]): log_err('Failed to expand node list, please check your input.') cfgs['node_list'] = ','.join(node_lists) g('hadoop_home') g('hbase_home') g('hive_home') g('hdfs_user') g('hbase_user') g('first_rsnode') cfgs['distro'] = 'APACHE' else: g('mgr_url') if not ('http:' in cfgs['mgr_url'] or 'https:' in cfgs['mgr_url']): cfgs['mgr_url'] = 'http://' + cfgs['mgr_url'] # set cloudera default port 7180 if not provided by user if not re.search(r':\d+', cfgs['mgr_url']): cfgs['mgr_url'] += ':7180' g('mgr_user') g('mgr_pwd') validate_url_v1 = '%s/api/v1/clusters' % cfgs['mgr_url'] content = ParseHttp(cfgs['mgr_user'], cfgs['mgr_pwd']).get(validate_url_v1) # currently only CDH support multiple clusters # so if condition is true, it must be CDH cluster if len(content['items']) > 1: cluster_names = [] # loop all managed clusters for cluster in content['items']: cluster_names.append(cluster['name']) for index, name in enumerate(cluster_names): print str(index + 1) + '. ' + name g('cluster_no') c_index = int(cfgs['cluster_no']) - 1 if c_index < 0 or c_index >= len(cluster_names): log_err('Incorrect number') cluster_name = cluster_names[int(c_index)] else: try: cluster_name = content['items'][0]['name'] except (IndexError, KeyError): try: cluster_name = content['items'][0]['Clusters']['cluster_name'] except (IndexError, KeyError): log_err('Failed to get cluster info from management url') hadoop_discover = HadoopDiscover(cfgs['mgr_user'], cfgs['mgr_pwd'], cfgs['mgr_url'], cluster_name) rsnodes = hadoop_discover.get_rsnodes() hadoop_users = hadoop_discover.get_hadoop_users() cfgs['distro'] = hadoop_discover.distro cfgs['hbase_lib_path'] = hadoop_discover.get_hbase_lib_path() cfgs['hbase_service_name'] = hadoop_discover.get_hbase_srvname() cfgs['hdfs_service_name'] = hadoop_discover.get_hdfs_srvname() cfgs['zookeeper_service_name'] = hadoop_discover.get_zookeeper_srvname() cfgs['cluster_name'] = cluster_name.replace(' ', '%20') cfgs['hdfs_user'] = hadoop_users['hdfs_user'] cfgs['hbase_user'] = hadoop_users['hbase_user'] cfgs['node_list'] = ','.join(rsnodes) cfgs['first_rsnode'] = rsnodes[0] # first regionserver node # check node connection for node in cfgs['node_list'].split(','): rc = os.system('ping -c 1 %s >/dev/null 2>&1' % node) if rc: log_err('Cannot ping %s, please check network connection and /etc/hosts' % node) # set some system default configs cfgs['config_created_date'] = time.strftime('%Y/%m/%d %H:%M %Z') cfgs['traf_user'] = TRAF_USER if apache: cfgs['hbase_xml_file'] = cfgs['hbase_home'] + '/conf/hbase-site.xml' cfgs['hdfs_xml_file'] = cfgs['hadoop_home'] + '/etc/hadoop/hdfs-site.xml' else: cfgs['hbase_xml_file'] = DEF_HBASE_XML_FILE ### discover system settings, return a dict system_discover = wrapper.run(cfgs, options, mode='discover', pwd=pwd) # check discover results, return error if fails on any sinlge node need_java_home = 0 has_home_dir = 0 for result in system_discover: host, content = result.items()[0] content_dict = json.loads(content) java_home = content_dict['default_java'] if java_home == 'N/A': need_java_home += 1 if content_dict['linux'] == 'N/A': log_err('Unsupported Linux version') if content_dict['firewall_status'] == 'Running': info('Firewall is running, please make sure the ports used by Trafodion are open') if content_dict['traf_status'] == 'Running': log_err('Trafodion process is found, please stop it first') if content_dict['hbase'] == 'N/A': log_err('HBase is not found') if content_dict['hbase'] == 'N/S': log_err('HBase version is not supported') else: cfgs['hbase_ver'] = content_dict['hbase'] if content_dict['home_dir']: # trafodion user exists has_home_dir += 1 cfgs['home_dir'] = content_dict['home_dir'] if content_dict['hadoop_authentication'] == 'kerberos': cfgs['secure_hadoop'] = 'Y' else: cfgs['secure_hadoop'] = 'N' if offline: g('local_repo_dir') if not glob('%s/repodata' % cfgs['local_repo_dir']): log_err('repodata directory not found, this is not a valid repository directory') cfgs['offline_mode'] = 'Y' cfgs['repo_ip'] = socket.gethostbyname(socket.gethostname()) ports = ParseInI(DEF_PORT_FILE, 'ports').load() cfgs['repo_http_port'] = ports['repo_http_port'] pkg_list = ['apache-trafodion'] # find tar in installer folder, if more than one found, use the first one for pkg in pkg_list: tar_loc = glob('%s/*%s*.tar.gz' % (INSTALLER_LOC, pkg)) if tar_loc: cfgs['traf_package'] = tar_loc[0] break g('traf_package') cfgs['req_java8'] = 'N' # get basename and version from tar filename try: pattern = '|'.join(pkg_list) cfgs['traf_basename'], cfgs['traf_version'] = re.search(r'.*(%s).*-(\d\.\d\.\d).*' % pattern, cfgs['traf_package']).groups() except: log_err('Invalid package tar file') if not cfgs['traf_dirname']: cfgs['traf_dirname'] = '%s-%s' % (cfgs['traf_basename'], cfgs['traf_version']) g('traf_dirname') if not has_home_dir: g('traf_pwd') g('dcs_cnt_per_node') g('scratch_locs') g('traf_start') # kerberos if cfgs['secure_hadoop'].upper() == 'Y': g('kdc_server') g('admin_principal') g('kdcadmin_pwd') # ldap security g('ldap_security') if cfgs['ldap_security'].upper() == 'Y': g('db_root_user') g('ldap_hosts') g('ldap_port') g('ldap_identifiers') g('ldap_encrypt') if cfgs['ldap_encrypt'] == '1' or cfgs['ldap_encrypt'] == '2': g('ldap_certpath') elif cfgs['ldap_encrypt'] == '0': cfgs['ldap_certpath'] = '' else: log_err('Invalid ldap encryption level') g('ldap_userinfo') if cfgs['ldap_userinfo'] == 'Y': g('ldap_user') g('ldap_pwd') else: cfgs['ldap_user'] = '' cfgs['ldap_pwd'] = '' # DCS HA g('dcs_ha') cfgs['enable_ha'] = 'false' if cfgs['dcs_ha'].upper() == 'Y': g('dcs_floating_ip') g('dcs_interface') g('dcs_backup_nodes') # check dcs backup nodes should exist in node list if sorted(list(set((cfgs['dcs_backup_nodes'] + ',' + cfgs['node_list']).split(',')))) != sorted(cfgs['node_list'].split(',')): log_err('Invalid DCS backup nodes, please pick up from node list') cfgs['enable_ha'] = 'true' if need_java_home: g('java_home') else: # don't overwrite user input java home if not cfgs['java_home']: cfgs['java_home'] = java_home if not silent: u.notify_user()
def main(): """ add_nodes main loop """ cfgs = defaultdict(str) # handle parser option options = get_options() if not options.nodes: err_m('Must specifiy the node names using \'--nodes\' option') # get node list from user input new_nodes = expNumRe(options.nodes) if not new_nodes: err_m('Incorrect format') if options.pwd: pwd = getpass.getpass('Input remote host SSH Password: '******'' u = UserInput(options, pwd) g = lambda n: u.get_input(n, cfgs[n], prompt_mode=prompt_mode) format_output('Trafodion Elastic Add Nodes Script') ### read configs from current trafodion_config and save it to cfgs if os.path.exists(TRAF_CFG_FILE): with open(TRAF_CFG_FILE, 'r') as f: traf_cfgs = f.readlines() for traf_cfg in traf_cfgs: if not traf_cfg.strip(): continue key, value = traf_cfg.replace('export ', '').split('=') value = value.replace('"','') value = value.replace('\n','') cfgs[key.lower()] = value else: err_m('Cannot find %s, be sure to run this script on one of trafodion nodes' % TRAF_CFG_FILE) ### config check if not cfgs['hbase_lib_path'] or not cfgs['traf_version']: err_m('Missing parameters in Trafodion config file') if not cfgs['traf_home'] or not cmd_output('%s ls %s' % (get_sudo_prefix(), cfgs['traf_home'])): err_m('Cannot find trafodion binary folder') # get trafodion user from traf_home path cfgs['traf_user'] = cfgs['traf_home'].split('/')[-2] if not cfgs['traf_user']: err_m('Cannot detect trafodion user') ### parse trafodion user's password cfgs['traf_shadow'] = cmd_output("%s grep %s /etc/shadow |awk -F: '{print $2}'" % (get_sudo_prefix(), cfgs['traf_user'])) def copy_files(): # package trafodion binary into a tar file if not os.path.exists(TRAF_PKG_FILE): info('Creating trafodion packages of %s, this will take a while ...' % cfgs['traf_home']) run_cmd_as_user(cfgs['traf_user'], 'cd %s; tar czf %s ./* --exclude logs/* --exclude core.* --exclude tmp/*' % (cfgs['traf_home'], TRAF_PKG_FILE)) else: info('Using existing trafodion package %s' % TRAF_PKG_FILE) info('Copying trafodion files to new nodes, this will take a while ...') run_cmd('%s cp -rf %s/../.ssh /tmp' % (get_sudo_prefix(), cfgs['traf_home'])) run_cmd('%s chmod -R 755 /tmp/.ssh' % get_sudo_prefix()) traf_ssh_folder = '/tmp/.ssh' hbase_trx_file = cmd_output('ls %s/hbase-trx-*' % cfgs['hbase_lib_path']) trafodion_utility_file = cmd_output('ls %s/trafodion-utility-*' % cfgs['hbase_lib_path']) files = [TRAF_CFG_FILE, TRAF_PKG_FILE, traf_ssh_folder, hbase_trx_file, trafodion_utility_file] remote_insts = [Remote(h, pwd=pwd) for h in new_nodes] threads = [Thread(target=r.copy, args=(files, '/tmp')) for r in remote_insts] for thread in threads: thread.start() for thread in threads: thread.join() for r in remote_insts: if r.rc != 0: err_m('Failed to copy files to %s' % r.host) ### copy trafodion_config/trafodion-package/hbase-trx to the new nodes copy_files() ### set parameters if cfgs['enable_ha'].upper() == 'true': g('dcs_backup_nodes') cfgs['dcs_ha'] = 'Y' else: cfgs['dcs_ha'] = 'N' if cfgs['trafodion_enable_authentication'] == 'YES': cfgs['ldap_security'] = 'Y' else: cfgs['ldap_security'] = 'N' if cfgs['secure_hadoop'].upper() == 'Y': g('kdc_server') g('admin_principal') g('kdcadmin_pwd') #TODO: offline support cfgs['offline_mode'] = 'N' format_output('AddNode sub scripts Start') ### run addNode script on new nodes ### cfgs['node_list'] = ','.join(new_nodes) info('Running add node setup on new node(s) [%s] ...' % cfgs['node_list']) wrapper.run(cfgs, options, mode='addnodes_new', pwd=pwd) ### run dcs setup script on all nodes ### # get current trafodion node list current_nodes = cmd_output('%s su - %s -c "trafconf -name 2>/dev/null"' % (get_sudo_prefix(), cfgs['traf_user'])).split() all_nodes = list(set(new_nodes + current_nodes)) cfgs['node_list'] = ','.join(all_nodes) info('Running dcs setup on all node(s) [%s] ...' % cfgs['node_list']) wrapper.run(cfgs, options, mode='addnodes_all', pwd=pwd) ### do sqshell node add/up, sqregen # check if trafodion is running mon_process = cmd_output('ps -ef|grep -v grep|grep -c "monitor COLD"') if int(mon_process) > 0: info('Trafodion instance is up, adding node in sqshell ...') # cores=0-1;processors=2;roles=connection,aggregation,storage sqconfig_ptr = cmd_output('%s su - %s -c "trafconf -node|sed -n 2p|cut -d\\\";\\\" -f3-5"' % (get_sudo_prefix(), cfgs['traf_user'])) for node in new_nodes: info('adding node [%s] in sqshell ...' % node) run_cmd_as_user(cfgs['traf_user'], 'echo "node add {node-name %s,%s}" | sqshell -a' % (node, sqconfig_ptr)) run_cmd_as_user(cfgs['traf_user'], 'echo "node up %s" | sqshell -a' % node) ok('Node [%s] added!' % node) info('Starting DCS on new nodes ...') run_cmd_as_user(cfgs['traf_user'], 'dcsstart') else: info('Trafodion instance is not up, do sqgen ...') run_cmd_as_user(cfgs['traf_user'], 'rm %s/sql/scripts/sqconfig.db' % cfgs['traf_home']) run_cmd_as_user(cfgs['traf_user'], 'sqgen') ok('Setup completed. You need to start trafodion manually') ### clean up run_cmd('%s rm -rf /tmp/.ssh' % get_sudo_prefix()) run_cmd('%s rm -rf %s' % (get_sudo_prefix(), TRAF_PKG_FILE)) format_output('AddNode Complete') info('NOTICE: You need to manually restart RegionServer on newly added nodes to take effect')