コード例 #1
0
 def checkpoint_network(self):
     """
     This method will export to a temporary directory the state of the network interface for the node that is running
     the u'/fmoncentralpipecentral/mongoccentral/mongocentralcentral' task. We do this to check how much traffic does
     the central node receives with a centralised approach
     """
     netcheck_directory = ".netcheck"
     curl_node = list(self.masters)[0]
     p = general_util.SshProcess(
         'curl "http://leader.mesos/service/marathon-user/v2/tasks"',
         host=curl_node).run()
     d = json.loads(p.stdout)
     mongo_tasks = filter(
         lambda task: task['appId'] ==
         u'/fmoncentralpipecentral/mongoccentral/mongocentralcentral',
         d.get('tasks'))
     mongo_host = mongo_tasks[0]['host']
     p = general_util.SshProcess('/sbin/ifconfig',
                                 host=mongo_host,
                                 shell=True,
                                 pty=True).run()
     now = strftime("%d_%b_%Y_%H:%M")
     if not exists(netcheck_directory):
         makedirs(netcheck_directory)
     with open('.netcheck/net_checkpoint' + now, 'w') as f:
         f.write(p.stdout)
コード例 #2
0
 def save_results(self):
     logs_from_images = [
         'ches/kafka', 'alvarobrandon/spark-worker',
         'alvarobrandon/spark-master', 'uhopper/hadoop-datanode:2.8.1',
         'uhopper/hadoop-namenode:2.8.1', 'zookeeper',
         'mesosphere/marathon-lb:v1.11.1', 'alvarobrandon/spark-bench',
         'alvarobrandon/ycsb', 'cassandra'
     ]
     # Extract here from the marathon API all the Mesos TaskIDs for the different applications
     for agent in self.private_agents:
         for image in logs_from_images:
             p = general_util.SshProcess(
                 'sudo docker ps -f "ancestor={0}" -q -a'.format(image),
                 host=agent).run()
             image_dir = image.replace('/', '_')
             for containerid in p.stdout.split('\r\n')[:-1]:
                 if image == 'alvarobrandon/spark-worker':
                     print(containerid, image_dir)
                 p = general_util.SshProcess(
                     'mkdir /home/vagrant/{0}_{1}_logs'.format(
                         image_dir, containerid),
                     host=agent).run()
                 p = general_util.SshProcess(
                     'sudo docker logs {1} >> /home/vagrant/{0}_{1}_logs/stdout_{0}_{1}.out 2>&1'
                     .format(image_dir, containerid),
                     host=agent).run()
                 if image == 'ches/kafka':  # if image_dir is kafka then copy some extra logs
                     p = general_util.SshProcess(
                         'sudo docker cp {1}:/kafka/logs /home/vagrant/{0}_{1}_logs/'
                         .format(image_dir, containerid),
                         host=agent).run()
                 if image == 'alvarobrandon/spark-worker':  # if image_dir is spark copy the extra logs
                     p = general_util.SshProcess(
                         'sudo docker cp {1}:/spark/work/ /home/vagrant/{0}_{1}_logs/'
                         .format(image_dir, containerid),
                         host=agent).run()
     RcaVagrantExperiment.save_results(self)
     # clean the jars first since we don't want them
     general_util.Remote(
         hosts=self.private_agents,
         cmd=
         "sudo rm -f /home/vagrant/*/work/*/*/spark-bench-2.1.1_0.3.0-RELEASE.jar"
     ).run()
     general_util.Remote(hosts=self.private_agents,
                         cmd="sudo rm /home/vagrant/*.scrap.gz").run()
     general_util.Get(hosts=self.private_agents,
                      remote_files=["/home/vagrant/"],
                      local_location=self.results_directory).run()
コード例 #3
0
 def start_kafka_queue(self):
     curl_node = list(self.masters)[0]
     nbrokers = list(self.regions[0]).__len__()
     general_util.replace_infile("fmone-resources/kafka.json",
                                 "fmone-resources/exec.json",
                                 {"@nbrokers@": str(nbrokers)})
     general_util.Put(hosts=curl_node,
                      local_files=["fmone-resources/exec.json"],
                      remote_location="/home/vagrant/exec.json").run()
     p = general_util.SshProcess(
         'curl -X POST "http://leader.mesos/service/marathon-user/v2/groups" -H "content-type: application/json" -d@/home/vagrant/exec.json',
         host=curl_node).run()
コード例 #4
0
 def start_dummy_containers(self):
     curl_node = list(self.masters)[0]
     ninstances = list(self.private_agents).__len__() * 5
     general_util.replace_infile("fmone-resources/dummy.json",
                                 "fmone-resources/exec.json",
                                 {"@ninstances@": str(ninstances)})
     general_util.Put(hosts=curl_node,
                      local_files=["fmone-resources/exec.json"],
                      remote_location="/home/vagrant/exec.json").run()
     p = general_util.SshProcess(
         'curl -X POST "http://leader.mesos/service/marathon-user/v2/apps" -H "content-type: application/json" -d@/home/vagrant/exec.json',
         host=curl_node).run()
コード例 #5
0
 def check_elasticity(self, nslaves, force_pull, region):
     curl_node = list(self.masters)[0]
     general_util.replace_infile("fmone-resources/basic.json",
                                 "fmone-resources/exec.json", {
                                     "@nslaves@": nslaves,
                                     "@region@": region
                                 })
     general_util.Put(hosts=curl_node,
                      local_files=["fmone-resources/exec.json"],
                      remote_location="/home/vagrant/exec.json").run()
     p = general_util.SshProcess(
         'curl -X POST "http://leader.mesos/service/marathon-user/v2/apps" -H "content-type: application/json" -d@/home/vagrant/exec.json',
         host=curl_node).run()
     print p.stdout
     print p.stderr
     print "Sleeping for a while"
     sleep(60)
     p = general_util.SshProcess(
         'curl "http://leader.mesos/service/marathon-user/v2/tasks"',
         host=curl_node).run()
     d = json.loads(p.stdout)  # use the basic.json from fmone-resources
     fmone_tasks = filter(lambda task: task['appId'] == u'/fmone/fmones',
                          d.get('tasks'))
     start_end = [(task.get('stagedAt'), task.get('startedAt'))
                  for task in fmone_tasks]
     time_differences = map(
         lambda pair: mktime(strptime(pair[1][:-5], '%Y-%m-%dT%H:%M:%S')) -
         mktime(strptime(pair[0][:-5], '%Y-%m-%dT%H:%M:%S')), start_end)
     print "The mean time to start {0} nslaves instances with pulled {1} is: {2} and its variance {3}"\
                 .format(nslaves, force_pull, mean(time_differences), std(time_differences))
     p = general_util.SshProcess(
         'curl -X DELETE "http://leader.mesos/service/marathon-user/v2/groups/fmone" -H "content-type: application/json"',
         host=curl_node).run()
     sleep(20)
     return (nslaves, force_pull, mean(time_differences),
             std(time_differences))
コード例 #6
0
 def clean_marathon_groups(self):
     curl_node = list(self.masters)[0]
     p = general_util.SshProcess(
         'curl -X DELETE "http://leader.mesos/service/marathon-user/v2/groups/" -H "content-type: application/json"',
         host=curl_node).run()
コード例 #7
0
 def check_resilience(
         self):  # Would be possible to add the region here as a parameter?
     results = []  ## here we are going to include all of the results
     curl_node = list(self.masters)[0]
     p = general_util.SshProcess(
         'curl "http://leader.mesos/service/marathon-user/v2/tasks"',
         host=curl_node).run()
     d = json.loads(p.stdout)
     fmone_tasks = filter(
         lambda task: task['appId'] ==
         u'/fmonmongorpipe2/fmondocker2/fmoneagentdockerregion2',
         d.get('tasks'))
     kill_host = fmone_tasks[0].get('host')
     general_util.Remote('sudo docker rm -f $(sudo docker ps -a -q)',
                         hosts=kill_host,
                         process_args={
                             "nolog_exit_code": True
                         }).run()
     time1 = time()
     sleep(20)  ## We leave some time till the fmone agent runs again
     p = general_util.SshProcess(
         'curl "http://leader.mesos/service/marathon-user/v2/tasks"',
         host=curl_node).run()
     d = json.loads(p.stdout)
     killed_host = filter(lambda task: (task['host'] == kill_host),
                          d.get('tasks'))
     start_end = [(task.get('stagedAt'), task.get('startedAt'))
                  for task in killed_host]
     time_differences = map(
         lambda pair:
         (mktime(strptime(pair[1][:-5], '%Y-%m-%dT%H:%M:%S'))) -
         (time1 - 7200), start_end)
     print "The mean time to recover for a Fmone agent is: {0} and its variance {1}"\
                 .format(mean(time_differences), std(time_differences))
     results.append(mean(time_differences))
     mongo_tasks = filter(
         lambda task: task['appId'] ==
         u'/fmonmongorpipe2/mongor2/mongoregion2', d.get('tasks'))
     kill_host = mongo_tasks[0].get('host')
     general_util.Remote('sudo docker rm -f $(sudo docker ps -a -q)',
                         hosts=kill_host,
                         process_args={
                             "nolog_exit_code": True
                         }).run()
     time1 = time()
     sleep(
         60
     )  ## we leave some time until all the fmone agents are up and running again
     p = general_util.SshProcess(
         'curl "http://leader.mesos/service/marathon-user/v2/tasks"',
         host=curl_node).run()
     d = json.loads(p.stdout)
     fmone_tasks = filter(
         lambda task: task['appId'] ==
         u'/fmonmongorpipe2/fmondocker2/fmoneagentdockerregion2',
         d.get('tasks'))
     df = pd.DataFrame(fmone_tasks)
     df['startedAt'] = pd.to_datetime(df['startedAt'])
     last_started = (df.sort_values(
         'startedAt',
         ascending=False).head(1)['startedAt'].values[0].astype('uint64') /
                     1e9)
     print "The mean time to recover a Fmone pipeline is: {0}".format(
         last_started - time1)
     results.append(last_started - time1)
     general_util.Remote('sudo docker rm -f $(sudo docker ps -a -q)',
                         hosts=self.private_agents,
                         process_args={
                             "nolog_exit_code": True
                         }).run()
     time1 = time()
     sleep(260)
     p = general_util.SshProcess(
         'curl "http://leader.mesos/service/marathon-user/v2/tasks"',
         host=curl_node).run()
     d = json.loads(p.stdout)
     fmone_tasks = filter(
         lambda task: task['appId'] ==
         u'/fmonmongorpipe2/fmondocker2/fmoneagentdockerregion2',
         d.get('tasks'))
     df = pd.DataFrame(fmone_tasks)
     df['startedAt'] = pd.to_datetime(df['startedAt'])
     last_started = (df.sort_values(
         'startedAt',
         ascending=False).head(1)['startedAt'].values[0].astype('uint64') /
                     1e9)
     print "The mean time to recover from a general failure is: {0}".format(
         last_started - time1)
     results.append(last_started - time1)
     return results