def terminate(constellation_name): constellation = ConstellationState(constellation_name) machines = constellation.get_value('machines') constellation_directory = \ constellation.get_value("constellation_directory") credentials_fname = os.path.join(constellation_directory, 'credentials.txt') constellation.set_value('constellation_state', 'terminating') constellation.set_value('sim_glx_state', "not running") constellation.set_value('gazebo', "not running") constellation.set_value('gz_web', "") for machine_name in machines: constellation.set_value("%s_state" % machine_name, 'terminating') constellation.set_value("%s_launch_msg" % machine_name, 'terminating') constellation.set_value("%s_aws_state" % machine_name, 'terminating') constellation.set_value("launch_stage", "nothing") cloud_provider = constellation.get_value('cloud_provider') if cloud_provider == "aws": terminate_aws_constellation(constellation_name, credentials_fname) if cloud_provider == 'softlayer': partial = False constellation_prefix = constellation_name.split('_')[-1] terminate_softlayer_constellation(constellation_name, constellation_prefix, partial, credentials_fname) for machine_name in machines: constellation.set_value("%s_state" % machine_name, 'terminated') constellation.set_value("%s_launch_msg" % machine_name, 'terminated') constellation.set_value("%s_aws_state" % machine_name, 'terminated') constellation.set_value('constellation_state', 'terminated')
def terminate(constellation_name): constellation = ConstellationState(constellation_name) constellation.set_value('constellation_state', 'terminating') machine_name = constellation.get_value('machine_name') constellation_directory = \ constellation.get_value("constellation_directory") credentials_fname = os.path.join(constellation_directory, 'credentials.txt') constellation.set_value('sim_glx_state', "not running") constellation.set_value('gazebo', "not running") constellation.set_value('gz_web', "") constellation.set_value("%s_state" % machine_name, 'terminating') constellation.set_value("%s_launch_msg" % machine_name, 'terminating') constellation.set_value("%s_aws_state" % machine_name, 'terminating') constellation.set_value("launch_stage", "nothing") cloud_provider = constellation.get_value('cloud_provider') if cloud_provider == "aws": terminate_aws_server(constellation_name, credentials_fname) constellation.set_value("%s_state" % machine_name, 'terminated') constellation.set_value("%s_launch_msg" % machine_name, 'terminated') constellation.set_value("%s_aws_state" % machine_name, 'terminated') constellation.set_value('constellation_state', 'terminated')
def _get_ssh_router(constellation_name): constellation = ConstellationState(constellation_name) constellation_directory = \ constellation.get_value('constellation_directory') router_ip = constellation.get_value("router_public_ip") ssh_router = SshClient(constellation_directory, "key-router", 'ubuntu', router_ip) return ssh_router
def _get_ssh_client(constellation_name): constellation = ConstellationState(constellation_name) constellation_directory = \ constellation.get_value('constellation_directory') sim_ip = constellation.get_value("sim_public_ip") ssh_client = SshClient(constellation_directory, "key-sim", 'ubuntu', sim_ip) return ssh_client
def monitor(constellation_name, counter): time.sleep(1) if constellation_is_terminated(constellation_name): log("monitor done for %s" % (constellation_name)) return True # stop the monitoring loop constellation = ConstellationState(constellation_name) launch_stage = constellation.get_value("launch_stage") if launch_sequence.index(launch_stage) < launch_sequence.index('launch'): return False # do it again later machines = constellation.get_value('machines') for machine_name in machines: monitor_launch(constellation_name, machine_name, counter) procs = [] p = multiprocessing.Process(target=monitor_simulator_proc, args=(constellation_name, counter)) procs.append(p) p = multiprocessing.Process(target=monitor_task_proc, args=(constellation_name, counter)) procs.append(p) p = multiprocessing.Process(target=monitor_gzweb_proc, args=(constellation_name, counter)) procs.append(p) p = multiprocessing.Process(target=monitor_notebook_proc, args=(constellation_name, counter)) procs.append(p) for machine_name, data in machines.iteritems(): ip = data['ip'] if machine_name == "router": ip = OPENVPN_CLIENT_IP p = multiprocessing.Process(target=ssh_ping_proc, args=(constellation_name, ip, '%s_latency' % machine_name, counter)) procs.append(p) for p in procs: p.start() for p in procs: p.join() return False
def __wait_for_find_file(constellation_name, constellation_directory, machine_names, ls_cmd, end_state, set_cloud_state=False): constellation = ConstellationState(constellation_name) launch_stage = constellation.get_value("launch_stage") if launch_sequence.index(launch_stage) >= 'running': return ssh_router = _get_ssh_router(constellation_name) q = [] for machine_name in machine_names: key_name = "%s_state" % machine_name if set_cloud_state: key_name = "%s_aws_state" % machine_name q.append( get_ssh_cmd_generator(ssh_router, "timeout -k 1 10 " "cloudsim/find_file_%s.bash %s" % (machine_name, ls_cmd), ls_cmd, constellation, key_name, end_state, max_retries=500)) empty_ssh_queue(q, sleep=2)
def deploy_constellation(constellation_name, cloud_provider, machines, openvpn_fname): constellation = ConstellationState(constellation_name) constellation_directory = \ constellation.get_value('constellation_directory') deploy_fname = _create_deploy_zip_files(constellation_name, constellation_directory, machines, [openvpn_fname]) constellation.set_value('sim_launch_msg', "waiting for machine to be online") __wait_for_find_file(constellation_name, constellation_directory, ["sim"], "launch_stdout_stderr.log", "packages_setup") __wait_for_find_file(constellation_name, constellation_directory, ["sim"], "cloudsim/setup/deploy_ready", "packages_setup") constellation.set_value('sim_launch_msg', "deploying keys") ssh_client = _get_ssh_client(constellation_name) ssh_client.upload_file(deploy_fname, "cloudsim/deploy.zip") ssh_client.cmd('cd cloudsim; unzip -o deploy.zip') ssh_client.cmd('bash cloudsim/deploy/deploy.bash') __wait_for_find_file(constellation_name, constellation_directory, ["sim"], "cloudsim/setup/done", "running")
def monitor_launch(constellation_name, machine_name, counter): ssh_router = _get_ssh_router(constellation_name) constellation = ConstellationState(constellation_name) machine_state = constellation.get_value('%s_state' % machine_name) monitor_launch_state(constellation_name, ssh_router, machine_state, "cloudsim/dpkg_log_%s.bash" % machine_name, '%s_launch_msg' % machine_name) log("monitor_launch() ENDS %s %s" % (machine_name, counter))
def _create_zip_files(constellation_name, constellation_directory, machines): """ Creates zip files for each machines. Different files are generated for user roles (ex: user_router.zip has no router ssh key) """ constellation = ConstellationState(constellation_name) launch_stage = constellation.get_value("launch_stage") if launch_sequence.index(launch_stage) >= launch_sequence.index('zip'): return for machine_name, _ in machines.iteritems(): machine_key_prefix = 'key-%s' % (machine_name) msg_key = '%s_launch_msg' % machine_name zip_ready_key = "%s_zip_file" % machine_name zip_fname = os.path.join(constellation_directory, "%s.zip" % machine_name) zip_user_fname = os.path.join(constellation_directory, "user_%s.zip" % machine_name) constellation.set_value(msg_key, 'creating zip file') if machine_name == "router": '''router_ip = constellation.get_value("router_public_ip") router_zip_fname, router_zip_user_fname = create_router_zip( router_ip, constellation_name, machine_key_prefix, constellation_directory) shutil.copy(router_zip_fname, zip_fname) shutil.copy(router_zip_user_fname, zip_user_fname)''' pass else: constellation.set_value(msg_key, 'creating zip files') ip = constellation.get_value('sim_public_ip') machine_zip_fname = create_private_machine_zip( machine_name, ip, constellation_name, constellation_directory, machine_key_prefix) shutil.copy(machine_zip_fname, zip_fname) if machine_name != "sim": shutil.copy(machine_zip_fname, zip_user_fname) else: zip_fname = os.path.join(constellation_directory, "simulator.zip") shutil.copy(machine_zip_fname, zip_fname) constellation.set_value(zip_ready_key, 'ready') constellation.set_value("launch_stage", "zip")
def atest_launch(self): print("test_launch") tags = {} p = get_boto_path() launch(self.username, self.config, self.constellation_name, tags, self.constellation_directory, credentials_override=p) print("launched") for i in range(50): print('monitor %s' % i) monitor(self.constellation_name, i) time.sleep(5) constellation = ConstellationState(self.constellation_name) self.assert_(constellation.get_value('gazebo') == "not running", "!") task = {} task['latency'] = 0 task['uplink_data_cap'] = 0 task['downlink_data_cap'] = 0 task['ros_package'] = "drcsim_gazebo" task['ros_launch'] = "vrc_task_1.launch" task['ros_args'] = "" task['timeout'] = 60 start_task(self.constellation_name, task) for i in range(3): print('monitor %s' % i) monitor(self.constellation_name, i) time.sleep(5) self.assert_(constellation.get_value('gazebo') == "running", "!") for i in range(20): print('monitor %s' % i) monitor(self.constellation_name, i) time.sleep(5)
def update(constellation_name): """ Update the constellation software on the servers. This function is a plugin function that should be implemented by each constellation type """ constellation = ConstellationState(constellation_name) ssh_router = _get_ssh_router(constellation_name) machines = constellation.get_value('machines') for machine in machines: constellation.set_value("%s_state" % machine, "packages_setup") constellation.set_value("%s_launch_msg" % machine, "updating software") try: o = ssh_router.cmd("cloudsim/update_constellation.bash") log("UPDATE: %s" % o, "toto") finally: for machine in machines: constellation.set_value("%s_state" % machine, "running") constellation.set_value("%s_launch_msg" % machine, "complete") log("UPDATE DONE", "toto")
def _reboot_machines(constellation_name, ssh_router, machine_names, constellation_directory): constellation = ConstellationState(constellation_name) launch_stage = constellation.get_value("launch_stage") if launch_sequence.index(launch_stage) >= launch_sequence.index('reboot'): return #constellation.set_value('router_aws_state', m) __wait_for_find_file(constellation_name, constellation_directory, machine_names, "cloudsim/setup/done", "running") constellation.set_value('router_launch_msg', "Waiting for constellation reboot") for machine_name in machine_names: constellation.set_value('%s_launch_msg' % machine_name, "Rebooting after software installation") constellation.set_value('%s_aws_state' % machine_name, "rebooting") for machine_name in machine_names: ssh_router.cmd("cloudsim/reboot_%s.bash" % machine_name) log("waiting before connecting after reboot") time.sleep(20) constellation.set_value("launch_stage", "reboot")
def deploy_constellation(constellation_name, cloud_provider, machines, openvpn_fname): constellation = ConstellationState(constellation_name) constellation_directory = \ constellation.get_value('constellation_directory') deploy_fname = _create_deploy_zip_files(constellation_name, constellation_directory, machines, [openvpn_fname]) constellation.set_value('router_launch_msg', "waiting for machine to be online") constellation.set_value('sim_launch_msg', "waiting for router access") __wait_for_find_file(constellation_name, constellation_directory, ["router"], "launch_stdout_stderr.log", "packages_setup") constellation.set_value('router_launch_msg', "waiting for network connectivity") __wait_for_find_file(constellation_name, constellation_directory, ["router"], "cloudsim/setup/deploy_ready", "packages_setup") constellation.set_value('router_launch_msg', "deploying keys") ssh_router = _get_ssh_router(constellation_name) ssh_router.upload_file(deploy_fname, "cloudsim/deploy.zip") ssh_router.cmd('cd cloudsim; unzip -o deploy.zip') ssh_router.cmd('bash cloudsim/deploy/deploy.bash') # cmd = ("nohup sudo bash cloudsim/deploy/deploy.bash " # "> ssh_deploy.out 2> ssh_deploy.err < /dev/null &") # ssh_router.cmd(cmd) __wait_for_find_file(constellation_name, constellation_directory, ["router"], "cloudsim/setup/done", "running")
def launch(configuration, constellation_name, tags): """ Called by cloudsimd when it receives a launch message """ constellation = ConstellationState(constellation_name) stable = constellation.get_value('configuration').find('stable') >= 0 use_latest_version = stable is False has_fc1 = False has_fc2 = False scripts = {} scripts['router'] = '' scripts['sim'] = '' scripts['fc1'] = '' scripts['fc2'] = '' cloud_provider = tags['cloud_provider'] #username = tags['username'] config = tags['configuration'] constellation_directory = tags['constellation_directory'] credentials_fname = os.path.join(constellation_directory, 'credentials.txt') router_public_network_itf = "eth0" router_private_network_itf = None if cloud_provider == "softlayer": router_public_network_itf = "bond1" router_private_network_itf = "bond0" log("launch constellation name: %s" % constellation_name) constellation.set_value("launch_stage", "launch") machines = configuration['machines'] _init_computer_data(constellation_name, machines) ros_master_ip = SIM_IP # Not required with any custom AMI if use_latest_version: drcsim_package_name = "drcsim" ppa_list = [] # ['ubuntu-x-swat/x-updates'] gpu_driver_list = [ 'nvidia-current', 'nvidia-settings', 'nvidia-current-dev', 'nvidia-cg-toolkit' ] log("DRC package %s" % drcsim_package_name) log("ppas: %s" % ppa_list) log("gpu packages %s" % gpu_driver_list) ubuntu_sources_repo = "http://us.archive.ubuntu.com/ubuntu/" if cloud_provider == "aws": ubuntu_sources_repo = \ get_aws_ubuntu_sources_repo(credentials_fname) scripts['router'] = get_router_script( ubuntu_sources_repo, router_public_network_itf, router_private_network_itf, ROUTER_IP, SIM_IP, drcsim_package_name, OPENVPN_SERVER_IP, OPENVPN_CLIENT_IP) scripts['sim'] = get_drc_script(ubuntu_sources_repo, drcsim_package_name, SIM_IP, ros_master_ip, gpu_driver_list, ppa_list, OPENVPN_CLIENT_IP, ROUTER_IP) fc1_script = get_drc_script(ubuntu_sources_repo, drcsim_package_name, FC1_IP, ros_master_ip, gpu_driver_list, ppa_list, OPENVPN_CLIENT_IP, ROUTER_IP) fc2_script = get_drc_script(ubuntu_sources_repo, drcsim_package_name, FC2_IP, ros_master_ip, gpu_driver_list, ppa_list, OPENVPN_CLIENT_IP, ROUTER_IP) if has_fc1: scripts['fc1'] = fc1_script if has_fc2: scripts['fc2'] = fc2_script if cloud_provider == "softlayer": log("softlayer %s" % credentials_fname) constellation_prefix = config.split()[-1] log("constellation_prefix %s" % constellation_prefix) partial_deploy = False if config.find("partial") > 0: partial_deploy = True log("partial deploy: %s (only sim and router)" % partial_deploy) acquire_softlayer_constellation(constellation_name, constellation_directory, partial_deploy, constellation_prefix, credentials_fname, tags, scripts['router'], scripts['sim'], fc1_script, fc2_script) if cloud_provider == "aws": log("credentials_ec2 %s" % credentials_fname) acquire_aws_constellation(constellation_name, credentials_fname, machines, scripts, tags) # Setup the VPN ssh_router = _get_ssh_router(constellation_name) openvpn_fname = os.path.join(constellation_directory, 'openvpn.key') create_openvpn_key(openvpn_fname) _create_zip_files(constellation_name, constellation_directory, machines) deploy_constellation(constellation_name, cloud_provider, machines, openvpn_fname) # Not required with any custom AMI if use_latest_version: # reboot all machines but not router machines_to_reboot = machines.keys() machines_to_reboot.remove('router') _reboot_machines(constellation_name, ssh_router, machines_to_reboot, constellation_directory) # Waiting for machines to be ready machine_names = machines.keys() log("_run_machines wait for machines %s : %s" % (constellation_name, machine_names)) __wait_for_find_file(constellation_name, constellation_directory, machine_names, "cloudsim/setup/done", "running") log("_run_machines machines %s : %s rebooted!" % (constellation_name, machine_names)) # make sure the monitoring of package setup is complete time.sleep(10) constellation = ConstellationState(constellation_name) for machine_name in machine_names: constellation.set_value('%s_aws_state' % machine_name, "running") constellation.set_value('%s_launch_state' % machine_name, "running") ssh_router = _get_ssh_router(constellation_name) log("_run_machines %s: simulator check" % (constellation_name)) if "sim" in machine_names: _check_opengl_and_x(constellation, ssh_router) # Install gazebo models locally # using a utility script from cloudsim-client-tools # careful, we are running as root here? # Not required with any custom AMI if constellation.get_value('configuration') == 'DRC' or \ constellation.get_value('configuration') == 'DRC with FC': constellation.set_value('router_launch_msg', 'Final setup') constellation.set_value('sim_launch_msg', 'Loading Gazebo models') ssh_router.cmd("cloudsim/ssh-sim.bash " "cloudsim/load_gazebo_models.bash") constellation.set_value('sim_launch_msg', "complete") log("_run_machines %s: wrap up" % (constellation_name)) constellation.set_value('router_launch_msg', "complete") for machine_name in machine_names: constellation.set_value('%s_launch_msg' % machine_name, "complete") constellation.set_value('%s_aws_state' % machine_name, "running") constellation.set_value('%s_launch_state' % machine_name, "running") constellation.set_value("launch_stage", "running")
def notify_portal(constellation, task): try: root_log_dir = '/tmp/cloudsim_logs' # Get metadata (team, competition, ...) config = get_cloudsim_config() portal_info_fname = config['cloudsim_portal_json_path'] log("** Portal JSON path: %s ***" % portal_info_fname) portal_info = None with open(portal_info_fname, 'r') as f: portal_info = json.loads(f.read()) log("** Portal JSON file opened ***") team = portal_info['team'] comp = portal_info['event'] task_num = task['vrc_num'] log("** Team: %s, Event: %s ***" % (team, comp)) if task_num < '1' or task_num > '3': task_num = '1' task_run = task['vrc_id'] if task_run < '1' or task_run > '5': task_run = '1' start_time = task['start_time'] start_task = dateutil.parser.parse(start_time) start_task = start_task.strftime("%d/%m/%y %H:%M:%S") const = ConstellationState(constellation) constellation_dict = get_constellation_data(constellation) constellation_directory = constellation_dict['constellation_directory'] router_ip = const.get_value("router_public_ip") task_id = task['ros_launch'] task_dirname = task_id.split('.')[0] # Store in this cloudsim the network and sim logs router_key = os.path.join(constellation_directory, 'key-router.pem') new_msg = task['task_message'] + '<B> Getting logs</B>' const.update_task_value(task['task_id'], 'task_message', new_msg) cmd = ('bash /var/www/bin/get_logs.bash %s %s %s' % (task_dirname, router_ip, router_key)) subprocess.check_call(cmd.split()) log("** Log directory created***") # Get the score and falls score = '0' #falls = 'N/A' runtime = 'N/A' try: p = os.path.join(root_log_dir, task_dirname, 'score.log') with open(p) as f: log("** score.log found **") data = f.read() log("** Reading score.log file **") lines = data.split('\n') last_line = lines[-2] log("** Last line: %s **" % last_line) score = last_line.split(',')[4] #falls = last_line.split(',')[5] # Time when the task stopped runtime = last_line.split(',')[1] log("** All sim score fields parsed **") except Exception: None # Create JSON file with the task metadata data = json.dumps( { 'team': team, 'event': comp, 'task': task_num, 'start_time': start_task, 'result': 'Terminated', 'runtime': runtime, 'score': score }, sort_keys=True, indent=4, separators=(',', ': ')) log("** JSON data created **") with open(os.path.join(root_log_dir, task_dirname, 'end_task.json'), 'w') as f: f.write(str(data)) log("** JSON file created ***") new_msg = new_msg.replace('Getting logs', 'Creating tar file') const.update_task_value(task['task_id'], 'task_message', new_msg) # Tar all the log content tar_name = team + '_' + comp + '_' + str(task_num) tar_name += '_' + str(task_run) + '.tar' p = os.path.join(root_log_dir, task_dirname) cmd = 'tar cf /tmp/' + tar_name + ' -C ' + p + ' .' subprocess.check_call(cmd.split()) log("** Log directory stored in a tar file ***") new_msg = new_msg.replace('Creating tar file', 'Uploading logs to the portal') const.update_task_value(task['task_id'], 'task_message', new_msg) # Send the log to the portal config = get_cloudsim_config() portal_info_fname = config['cloudsim_portal_json_path'] portal_info = None with open(portal_info_fname, 'r') as f: portal_info = json.loads(f.read()) ssh_portal = SshClient('xxx', 'xxx', portal_info['user'], portal_info['hostname']) # this is a hack ssh_portal.key_fname = config['cloudsim_portal_key_path'] # Upload the file to the Portal temp dir dest = os.path.join('/tmp', tar_name) cmd = ('scp -o UserKnownHostsFile=/dev/null' '-o StrictHostKeyChecking=no' ' -i ' + ssh_portal.key_fname + ' ' + dest + ' ubuntu@' + portal_info['hostname'] + ':/tmp') log('cmd: %s' % cmd) subprocess.check_call(cmd.split()) # Move the file to the final destination into the Portal final_dest = os.path.join(portal_info['final_destination_dir'], tar_name) cmd = 'sudo mv %s %s' % (dest, final_dest) ssh_portal.cmd(cmd) new_msg = new_msg.replace('Uploading logs to the portal', 'Logs uploaded to the portal') const.update_task_value(task['task_id'], 'task_message', new_msg) except Exception, excep: log('notify_portal() Exception: %s' % (repr(excep))) raise