コード例 #1
0
def test_list_tasks_without_app_id(m):
    fake_response = '{ "tasks": [ { "appId": "/anapp", "healthCheckResults": ' \
                    '[ { "alive": true, "consecutiveFailures": 0, "firstSuccess": "2014-10-03T22:57:02.246Z", "lastFailure": null, ' \
                    '"lastSuccess": "2014-10-03T22:57:41.643Z", "taskId": "bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799" } ],' \
                    ' "host": "10.141.141.10", "id": "bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799", "ports": [ 31000 ], ' \
                    '"servicePorts": [ 9000 ], "stagedAt": "2014-10-03T22:16:27.811Z", "startedAt": "2014-10-03T22:57:41.587Z", ' \
                    '"version": "2014-10-03T22:16:23.634Z" }, { "appId": "/anotherapp", ' \
                    '"healthCheckResults": [ { "alive": true, "consecutiveFailures": 0, "firstSuccess": "2014-10-03T22:57:02.246Z", ' \
                    '"lastFailure": null, "lastSuccess": "2014-10-03T22:57:41.649Z", "taskId": "bridged-webapp.ef0b5d91-4b4a-11e4-ae49-56847afe9799" } ], ' \
                    '"host": "10.141.141.10", "id": "bridged-webapp.ef0b5d91-4b4a-11e4-ae49-56847afe9799", "ports": [ 31001 ], "servicePorts": [ 9000 ], ' \
                    '"stagedAt": "2014-10-03T22:16:33.814Z", "startedAt": "2014-10-03T22:57:41.593Z", "version": "2014-10-03T22:16:23.634Z" } ] }'
    m.get('http://fake_server/v2/tasks', text=fake_response)
    mock_client = MarathonClient(servers='http://fake_server')
    actual_deployments = mock_client.list_tasks()
    expected_deployments = [
        models.task.MarathonTask(
            app_id="/anapp",
            health_check_results=[
                models.task.MarathonHealthCheckResult(
                    alive=True,
                    consecutive_failures=0,
                    first_success="2014-10-03T22:57:02.246Z",
                    last_failure=None,
                    last_success="2014-10-03T22:57:41.643Z",
                    task_id="bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799"
                )
            ],
            host="10.141.141.10",
            id="bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799",
            ports=[
                31000
            ],
            service_ports=[
                9000
            ],
            staged_at="2014-10-03T22:16:27.811Z",
            started_at="2014-10-03T22:57:41.587Z",
            version="2014-10-03T22:16:23.634Z"
        ),
        models.task.MarathonTask(
            app_id="/anotherapp",
            health_check_results=[
                models.task.MarathonHealthCheckResult(
                    alive=True,
                    consecutive_failures=0,
                    first_success="2014-10-03T22:57:02.246Z",
                    last_failure=None,
                    last_success="2014-10-03T22:57:41.649Z",
                    task_id="bridged-webapp.ef0b5d91-4b4a-11e4-ae49-56847afe9799"
                )
            ],
            host="10.141.141.10",
            id="bridged-webapp.ef0b5d91-4b4a-11e4-ae49-56847afe9799",
            ports=[31001],
            service_ports=[9000],
            staged_at="2014-10-03T22:16:33.814Z",
            started_at="2014-10-03T22:57:41.593Z",
            version="2014-10-03T22:16:23.634Z"
        )]
    assert actual_deployments == expected_deployments
コード例 #2
0
def test_list_tasks_with_app_id(m):
    fake_response = '{ "tasks": [ { "appId": "/anapp", "healthCheckResults": [ { "alive": true, "consecutiveFailures": 0, "firstSuccess": "2014-10-03T22:57:02.246Z", "lastFailure": null, "lastSuccess": "2014-10-03T22:57:41.643Z", "taskId": "bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799" } ], "host": "10.141.141.10", "id": "bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799", "ports": [ 31000 ], "servicePorts": [ 9000 ], "stagedAt": "2014-10-03T22:16:27.811Z", "startedAt": "2014-10-03T22:57:41.587Z", "version": "2014-10-03T22:16:23.634Z" }, { "appId": "/anotherapp", "healthCheckResults": [ { "alive": true, "consecutiveFailures": 0, "firstSuccess": "2014-10-03T22:57:02.246Z", "lastFailure": null, "lastSuccess": "2014-10-03T22:57:41.649Z", "taskId": "bridged-webapp.ef0b5d91-4b4a-11e4-ae49-56847afe9799" } ], "host": "10.141.141.10", "id": "bridged-webapp.ef0b5d91-4b4a-11e4-ae49-56847afe9799", "ports": [ 31001 ], "servicePorts": [ 9000 ], "stagedAt": "2014-10-03T22:16:33.814Z", "startedAt": "2014-10-03T22:57:41.593Z", "version": "2014-10-03T22:16:23.634Z" } ] }'
    m.get('http://fake_server/v2/tasks', text=fake_response)
    mock_client = MarathonClient(servers='http://fake_server')
    actual_deployments = mock_client.list_tasks(app_id='/anapp')
    expected_deployments = [
        models.task.MarathonTask(
            app_id="/anapp",
            health_check_results=[
                models.task.MarathonHealthCheckResult(
                    alive=True,
                    consecutive_failures=0,
                    first_success="2014-10-03T22:57:02.246Z",
                    last_failure=None,
                    last_success="2014-10-03T22:57:41.643Z",
                    task_id=
                    "bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799")
            ],
            host="10.141.141.10",
            id="bridged-webapp.eb76c51f-4b4a-11e4-ae49-56847afe9799",
            ports=[31000],
            service_ports=[9000],
            staged_at="2014-10-03T22:16:27.811Z",
            started_at="2014-10-03T22:57:41.587Z",
            version="2014-10-03T22:16:23.634Z")
    ]
    assert actual_deployments == expected_deployments
コード例 #3
0
def enable_logstash():
    print "Checking ELK entries\n"
    endpoint = os.getenv('MARATHON_ENDPOINT')
    username = os.getenv('MARATHON_HTTP_USER')
    password = os.getenv('MARATHON_HTTP_PASSWORD')
    elk_host = None
    if endpoint:
        try:
            print 'Discovering configuration from %s\n' % endpoint
            c = MarathonClient('https://%s' % endpoint, username=username, password=password)
            tasks = c.list_tasks('yroblaelk')
            for task in tasks:
                if task.started_at:
                    elk_host = task.host
                    break
        except:
            pass

    # check entries in wsrep_cluster_address
    if elk_host:
        print 'Found ELK address %s\n' % elk_host
        for line in fileinput.input(LOGSTASH_CONF_FILE, inplace=True):
            line_content = line
            sys.stdout.write(line.replace("ELK_HOST", elk_host))
        # reboot logstash
        subprocess.call(["service", "logstash-forwarder", "restart"])
コード例 #4
0
ファイル: scale_manager.py プロジェクト: mrz001/magnum-1
    def _get_hosts_with_container(self, context, cluster):
        marathon_client = MarathonClient(
            'http://' + cluster.api_address + '/marathon/')
        hosts = set()
        for task in marathon_client.list_tasks():
            hosts.add(task.host)

        return hosts
コード例 #5
0
ファイル: drain.py プロジェクト: chuckwired/ber-kit
def main(args):
  migration_hosts = args.hosts.replace('"','').replace('\'','').split(',')
  marathon_client = MarathonClient(args.url)

  # Get the running marathon application dictionary
  running_instances = utils.dict_by_key_and_value(lambda x: x.id, lambda y: y.instances, marathon_client.list_apps())
  print(">>> Total Running Applications: ")
  print(json.dumps(running_instances, sort_keys=True, indent=4, separators=(',', ': ')))

  # Get the running marathon applications for all hosts which are going for maintenance
  all_tasks = marathon_client.list_tasks()
  filtered_tasks = [task for task in all_tasks if task.host in migration_hosts]
  dicted_tasks = utils.dict_by_key(lambda x: x.app_id, filtered_tasks)

  print(">>> Total Running Application: ")
  print(json.dumps(dicted_tasks.keys(), sort_keys=True, indent=4, separators=(',', ': ')))

  # Tasks migration
  migrate_tasks(marathon_client, dicted_tasks, migration_hosts, args.force)
コード例 #6
0
ファイル: views.py プロジェクト: huanpc/mesos-admin
def ports_used(request):
    mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port']))
    apps = mc.list_apps()
    used_ports = {}
    for app in apps:
        tasks = mc.list_tasks(app.id)
        for task in tasks:
            if task.host in used_ports.keys():
                used_ports[task.host].extend(task.ports)
            else:
                used_ports[task.host] = task.ports

    list_host_ports = []
    for key in sorted(used_ports.keys()):
        list_host_ports.append([key, sorted(used_ports[key])])

    data = {}
    data['used_ports'] = list_host_ports
    return render(request, 'marathon_mgmt/ports_used.html', data)
コード例 #7
0
ファイル: views.py プロジェクト: cuongtransc/mesos-admin
def ports_used(request):
    mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'],
                                              settings.MARATHON['port']))
    apps = mc.list_apps()
    used_ports = {}
    for app in apps:
        tasks = mc.list_tasks(app.id)
        for task in tasks:
            if task.host in used_ports.keys():
                used_ports[task.host].extend(task.ports)
            else:
                used_ports[task.host] = task.ports

    list_host_ports = []
    for key in sorted(used_ports.keys()):
        list_host_ports.append([key, sorted(used_ports[key])])

    data = {}
    data['used_ports'] = list_host_ports
    return render(request, 'marathon_mgmt/ports_used.html', data)
コード例 #8
0
class Scaler:
    """Class for Scaling"""
    def __init__(self, app_name, config):
        self.logger = logging.getLogger("autoscaling")
        self.logger.setLevel(logging.DEBUG)

        self.logger.debug("Init object scaler...")
        self.config = config

        self.logger.debug("Connect RESTful mariadb and get policies...")
        conn = http.client.HTTPConnection(config["MARIA_RESTFUL"]['host'],
                                          config["MARIA_RESTFUL"]['port'])
        conn.request("GET", "/app/name/" + app_name)
        json_app = conn.getresponse().read().decode("utf-8")
        self.app = json.loads(json_app)
        conn.request("GET", "/app/name/" + app_name + "/policies")
        json_policies = conn.getresponse().read().decode("utf-8")
        self.app["policies"] = json.loads(json_policies)

        self.logger.debug("Connect influxdb and marathon...")
        self.influx_client = InfluxDBClient(config["INFLUXDB"]["host"],
                                            config["INFLUXDB"]["port"],
                                            config["INFLUXDB"]["username"],
                                            config["INFLUXDB"]["password"],
                                            config["INFLUXDB"]["db_name"])
        self.marathon_client = MarathonClient('http://' +
                                              config["MARATHON"]['host'] +
                                              ':' + config["MARATHON"]['port'])

        self.app["instance"] = self.marathon_client.get_app(app_name).instances
        self.app["mem"] = self.marathon_client.get_app(app_name).mem
        self.app["cpus"] = self.marathon_client.get_app(app_name).cpus

        self.logger.debug("Reconfig haproxy.cfg...")
        os.system("sudo ./servicerouter.py --marathon http://" +
                  config["MARATHON"]["host"] + ":" +
                  config["MARATHON"]["port"] +
                  " --haproxy-config /etc/haproxy/haproxy.cfg")

    def setup_logging(self,
                      log_file="autoscaling.log",
                      level=logging.INFO,
                      formatter=None):
        if (formatter == None):
            formatter = logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        fh = logging.FileHandler(log_file)
        fh.setLevel(level)
        fh.setFormatter(formatter)
        self.logger.addHandler(fh)

    def get_cpu_usage(self, container_name):
        """Return cpu usage of container_name

		@param string container_name container name  
		"""
        query = "select DERIVATIVE(cpu_cumulative_usage)  as cpu_usage from stats where container_name = '" + container_name + "' and time > now()-5m group by time(2s) "
        result = self.influx_client.query(query)
        points = result[0]["points"]
        return (points[0][1] / 1000000000 / self.app["cpus"]) * 100

    def get_container_name(self, mesos_task_id):
        """Return container name mapping with mesos_task_id in messos
		
		@param string mesos_task_id
		"""
        query = "select container_name from " + self.config["INFLUXDB"][
            "ts_mapping"] + " where time>now() - 5m and mesos_task_id = '" + mesos_task_id + "' limit 1"
        result = self.influx_client.query(query)
        points = result[0]["points"]
        return points[0][2]

    def get_containers_name(self):
        """Return list all containers name of application have name app_name
		
		@param string app_name name of application
		@return list all containers name of app_name
		"""
        tasks = self.marathon_client.list_tasks(self.app["name"])
        containers_name = []
        for task in tasks:
            containers_name.append(self.get_container_name(task.id))
        return containers_name

    def avg_mem_usage(self, containers_name):
        """Return avg memmory usage of all containers in list containers_name
		
		@param list containers_name list containers name
		@return float avg mem usage
		"""
        number_container = len(containers_name)
        containers_name = ["'" + x + "'" for x in containers_name]
        containers_name = ",".join(containers_name)
        query = "select memory_usage,container_name from stats where  time > now()-5m and  container_name in (" + containers_name + ")  limit " + str(
            number_container * 2)
        result = self.influx_client.query(query)
        points = result[0]["points"]
        sum_memory_usage = 0
        for point in points:
            if (point[3] != None):
                sum_memory_usage += point[3] / (self.app["mem"] *
                                                1048576) * 100
        return sum_memory_usage / number_container

    def avg_cpu_usage(self, containers_name):
        """Return avg cpu usage of all containers in list containers_name
		
		@param list containers_name list containers name
		@return float avg cpu usage
		"""
        number_container = len(containers_name)
        containers_name = ["'" + x + "'" for x in containers_name]
        containers_name = ",".join(containers_name)
        query = "select DERIVATIVE(cpu_cumulative_usage)  as cpu_usage,container_name from stats where  time > now()-5m and  container_name in (" + containers_name + ") group by time(10s),container_name limit " + str(
            number_container)
        result = self.influx_client.query(query)
        points = result[0]["points"]
        sum_cpu_usage = 0
        for point in points:
            sum_cpu_usage += point[1] / 1000000000 / self.app["cpus"] * 100
        return sum_cpu_usage / number_container

    def scale(self, delta):
        """sacle app_name (add or remove) delta intances
		
		@param string app_name name of application
		@param int delta number intances add or remove
		"""
        new_instance = self.app["instance"] + delta
        if (new_instance > self.app['max_instances']):
            new_instance = self.app['max_instances']
        if (new_instance < self.app['min_instances']):
            new_instance = self.app['min_instances']
        if (new_instance != self.app["instance"]):
            self.marathon_client.scale_app(self.app["name"], new_instance)
            self.logger.debug("Scaling " + self.app["name"] + " to: " +
                              str(new_instance))
            self.logger.debug("Waiting for config file haproxy.cfg...")
            time.sleep(self.config["TIME"]['w_config_ha'])
            self.logger.debug("Config file haproxy.cfg...")
            os.system("sudo ./servicerouter.py --marathon http://" +
                      self.config["MARATHON"]["host"] + ":" +
                      self.config["MARATHON"]["port"] +
                      " --haproxy-config /etc/haproxy/haproxy.cfg")
            self.app["instance"] = self.marathon_client.get_app(
                self.app["name"]).instances
            self.logger.debug("Sleep " +
                              str(self.config["TIME"]['after_scale']) + "s...")
            time.sleep(self.config["TIME"]['after_scale'])

    def check_rule(self, policie, value):
        """Check rule and return number intances need scale
		
		@param models.Policie policies
		@param tuple value values of metric
		@return integer number intances need scale
		"""
        delta = {}
        delta["up"] = 0
        delta["down"] = 0
        # Check upper_threshold
        if (value[policie["metric_type"]] > policie["upper_threshold"]):
            delta['up'] = policie["instances_in"]
        # Check lower_threshold
        if (value[policie["metric_type"]] < policie["lower_threshold"]):
            delta['down'] = policie["instances_out"]

        return delta

    def autoscaling(self):
        while True:
            try:
                containers_name = self.get_containers_name()
                avg_cpu = self.avg_cpu_usage(containers_name)
                avg_mem = self.avg_mem_usage(containers_name)
                self.logger.info(
                    "Avg cpu usage, avg memmory usage, current instance: %f %f %d",
                    avg_cpu, avg_mem, self.app["instance"])
                rs_detal = {}
                rs_detal['up'] = 0
                rs_detal['down'] = 10
                for policie in self.app["policies"]:
                    delta = self.check_rule(policie, (avg_cpu, avg_mem))
                    if (rs_detal['up'] < delta['up']):
                        rs_detal['up'] = delta['up']
                    if (rs_detal['down'] > delta['down']):
                        rs_detal['down'] = delta['down']

                if (rs_detal['up'] > 0):
                    self.scale(rs_detal['up'])
                elif (rs_detal['down'] > 0):
                    self.scale(0 - rs_detal['down'])
            except Exception as e:
                self.logger.debug(str(e))
            finally:
                time.sleep(self.config["TIME"]['monitor'])
コード例 #9
0
with open('/etc/rabbitmq/rabbitmq-env.conf', 'a') as file:
    file.write('NODENAME=rabbit@%s\n' % current_host)
    file.write('NODE_PORT=31672\n')
    file.write('DIST_PORT=31673\n')

# start rabbit
print "Starting cluster"
endpoint = os.getenv('MARATHON_ENDPOINT')
username = os.getenv('MARATHON_HTTP_USER')
password = os.getenv('MARATHON_HTTP_PASSWORD')
peers = []
if endpoint:
    try:
        print 'Discovering configuration from %s' % endpoint
        c = MarathonClient('http://%s' % endpoint, username=username, password=password)
        tasks = c.list_tasks(APP_ID)
        for task in tasks:
            if task.started_at and task.host != host:
                peers.append(task.host)
    except:
        pass

cluster = None
if len(peers) > 0:
    cluster = peers[0]
    print 'Found cluster %s' % cluster

if not cluster:
    # set ha policy
    subprocess.call(['sudo', '-E', 'service', 'rabbitmq-server', 'start'])
    time.sleep(10)
コード例 #10
0
class HealthCheckBencher(object):
    def __init__(self, marathon_url, image, tasks):
        self.concurrency = 20
        self.docker_image = image
        self.app_base_name = 'health-check-test-'
        self.total_tasks_cout = int(tasks)
        self.instances_per_app = 50
        if tasks < self.instances_per_app:
            self.instances_per_app = self.total_tasks_cout
            self.app_count = 1
        else:
            self.app_count = self.total_tasks_cout/self.instances_per_app
        self.heath_check_interval = 30
        self.test_duration = 20
        self.marathon_cluster = MarathonClient(marathon_url, timeout=240)
        self.work_queue = Queue()
        self.result_queue = Queue()
        self.app_list_queue = Queue()
        self.action_list = [self.start_collect,
                            'sleep={}'.format(self.test_duration),
                            self.get_stats]

    def remove_apps(self):
        apps = self.marathon_cluster.list_apps()
        for app in apps:
            if app.id.startswith("/"+self.app_base_name):
                self.marathon_cluster.delete_app(app.id)
        active = 0
        while True:
            apps = self.marathon_cluster.list_apps()
            for app in apps:
                if app.id.startswith(self.app_base_name):
                    active += 1
            if active == 0:
                break

    def create_app(self, id):
        port_mapping = MarathonContainerPortMapping(container_port=80,
                                                    protocol="tcp")
        app_docker = MarathonDockerContainer(
            image=self.docker_image,
            network="BRIDGE",
            force_pull_image=True,
            port_mappings=[port_mapping])
        app_container = MarathonContainer(docker=app_docker)
        http_health_check = MarathonHealthCheck(
            protocol="HTTP",
            path="/status",
            grace_period_seconds=300,
            interval_seconds=self.heath_check_interval,
            timeout_seconds=20,
            max_consecutive_failures=0
        )

        app_suffix = str(md5(str(random())).hexdigest())
        app_name = self.app_base_name + app_suffix
        new_app = MarathonApp(cpus=CPUS, mem=MEM, disk=DISK,
                              container=app_container,
                              health_checks=[http_health_check],
                              instances=self.instances_per_app,
                              max_launch_delay_seconds=5)
        print("Creating {}".format(app_name))
        self.marathon_cluster.create_app(app_id=app_name, app=new_app)
        self.app_list_queue.put(app_name)
        return None

    def wait_instances(self, app_name):
        health_ok = 0
        while health_ok < self.instances_per_app:
            health_ok = 0
            tasks = self.marathon_cluster.list_tasks(app_name)
            for task in tasks:
                if task.health_check_results:
                    health_ok += 1

    def start_collect(self, task):
        url = 'http://'+task['host']+':'+str(task['port'])+'/start_collect'
        res = urlopen(url)
        if res.getcode() == 200:
            print(task['id']+': collecter was started')
        else:
            print(task['id']+': failed to start collecter')

    def stop_collect(self, task):
        url = 'http://'+task['host']+':'+str(task['port'])+'/stop_collect'
        res = urlopen(url)
        if res.getcode() == 200:
            print(task['id']+': collecter was stopped')
        else:
            print(task['id']+': failed to stop collecter')

    def clear_stats(self, task):
        url = 'http://'+task['host']+':'+str(task['port'])+'/clear_stats'
        res = urlopen(url)
        if res.getcode() == 200:
            print(task['id']+': stats was dropped')
        else:
            print(task['id']+': stats was dropped')

    def get_stats(self, task):
        url = 'http://'+task['host']+':'+str(task['port'])+'/get_timestamps'
        try:
            res = urlopen(url)
        except Exception:
            print("URL req failed")
            self.result_queue.put({'id': task['id'],
                                   'status': 'Failed',
                                   'data': []})
            return
        if res.getcode() == 200:
            data = res.read()
            timestamps = data.split(',')
            self.result_queue.put({'id': task['id'],
                                   'status': 'ok',
                                   'data': timestamps})
        elif res.getcode() == 202:
            print("Collecting is not enabled")
            self.result_queue.put({'id': task['id'],
                                   'status': 'Collecting is not enabled',
                                   'data': []})
        else:
            print("Unknown response code")
            self.result_queue.put({'id': task['id'],
                                   'status': 'Unknown response code',
                                   'data': []})

    def repeat(self, action):
        while self.work_queue.empty() is False:
            try:
                iteration = self.work_queue.get_nowait()
            except Empty:
                continue
            action(iteration)
            self.work_queue.task_done()

    def fill_queue(self, iterations):
        for iteration in iterations:
            self.work_queue.put(iteration)

    def get_tasks(self):
        res = []
        tasks = self.marathon_cluster.list_tasks()
        for task in tasks:
            if not task.id.startswith('health-check-test-'):
                continue
            res.append({'id': str(task.id),
                        'host': str(task.host),
                        'port': str(task.ports[0])})
        return res

    def create_apps(self):
        self.fill_queue(range(self.app_count))
        for thread_num in range(self.concurrency):
            if self.work_queue.empty() is True:
                break
            worker = Thread(target=self.repeat, args=(self.create_app,))
            worker.start()
        self.work_queue.join()

        while self.app_list_queue.empty() is False:
            try:
                app_name = self.app_list_queue.get_nowait()
            except Empty:
                continue
            self.work_queue.put(app_name)

        for thread_num in range(self.concurrency):
            if self.work_queue.empty() is True:
                break
            worker = Thread(target=self.repeat, args=(self.wait_instances,))
            worker.start()
        self.work_queue.join()

    def start_test(self):
        task_list = self.get_tasks()
        for action in self.action_list:
            if isinstance(action, six.text_type):
                if action.startswith('sleep='):
                    amount = int(action.split('=')[1])
                    sleep(60*amount)
                continue
            self.fill_queue(task_list)
            for thread_num in range(self.concurrency):
                if self.work_queue.empty() is True:
                    break
                worker = Thread(target=self.repeat, args=(action,))
                worker.start()
            self.work_queue.join()

    def generate_report(self):
        today = datetime.today()
        file_prefix = "{:%Y-%m-%d_%H_%M_%S-}".format(today)
        file_name = (file_prefix +
                     'health_check_result-' +
                     str(self.total_tasks_cout) +
                     'tasks.csv')

        f = open(file_name, "w")
        f.write("Task ID,Health check timestamp")

        while self.result_queue.empty() is False:
            try:
                result = self.result_queue.get_nowait()
            except Empty:
                continue
            for timestamp in result['data']:
                f.write("\n%s,%s" % (result['id'], timestamp))

        f.close()
コード例 #11
0
ファイル: fibonaci.py プロジェクト: Vidip/Mesos-Marathon
from marathon import MarathonClient
from marathon.models import MarathonApp
import time
import csv
import json

c = MarathonClient('http://localhost:8080')
print(c.list_tasks())
task_name = ""
fibo_array = [0, 1]
for k in c.list_tasks():
    new_string = str(k)
    app_name = new_string.split("'app_id': ")[1].split(
        ", 'health_check_results'")[0]
    app_id = new_string.split("'id': ")[1].split(", 'ports'")[0]
    if 'fibonaccitest' in app_name:
        task_name = app_name
        task_id = app_id


def Fibonacci(num):
    if num < 0:
        print("Invalid input")
    elif num <= len(fibo_array):
        return fibo_array[num - 1]
    else:
        temp_fib = Fibonacci(num - 1) + Fibonacci(num - 2)
        fibo_array.append(temp_fib)
        return temp_fib

コード例 #12
0
class Scaler:
	"""Class for Scaling"""
	def __init__(self, app_name, config):
		self.logger = logging.getLogger("autoscaling")
		self.logger.setLevel(logging.DEBUG)

		self.logger.debug("Init object scaler...")
		self.config = config

		self.logger.debug("Connect RESTful mariadb and get policies...")
		conn = http.client.HTTPConnection(config["MARIA_RESTFUL"]['host'], config["MARIA_RESTFUL"]['port'])
		conn.request("GET", "/app/name/"+app_name)
		json_app = conn.getresponse().read().decode("utf-8")
		self.app = json.loads(json_app)
		conn.request("GET", "/app/name/"+app_name+"/policies")
		json_policies = conn.getresponse().read().decode("utf-8")
		self.app["policies"] = json.loads(json_policies)

		self.logger.debug("Connect influxdb and marathon...")
		self.influx_client = InfluxDBClient(config["INFLUXDB"]["host"], config["INFLUXDB"]["port"], config["INFLUXDB"]["username"], config["INFLUXDB"]["password"], config["INFLUXDB"]["db_name"])
		self.marathon_client = MarathonClient('http://'+config["MARATHON"]['host']+':'+config["MARATHON"]['port'])
		
		self.app["instance"] = self.marathon_client.get_app(app_name).instances
		self.app["mem"] = self.marathon_client.get_app(app_name).mem
		self.app["cpus"] = self.marathon_client.get_app(app_name).cpus

		self.logger.debug("Reconfig haproxy.cfg...")
		os.system("sudo ./servicerouter.py --marathon http://"+config["MARATHON"]["host"]+":"+config["MARATHON"]["port"]+" --haproxy-config /etc/haproxy/haproxy.cfg")

	def setup_logging(self, log_file = "autoscaling.log", level = logging.INFO, formatter = None):
		if(formatter == None):
			formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
		fh = logging.FileHandler(log_file)
		fh.setLevel(level)
		fh.setFormatter(formatter)
		self.logger.addHandler(fh)


	def get_cpu_usage(self, container_name):
		"""Return cpu usage of container_name

		@param string container_name container name  
		"""
		query = "select DERIVATIVE(cpu_cumulative_usage)  as cpu_usage from stats where container_name = '"+container_name+"' and time > now()-5m group by time(2s) "
		result = self.influx_client.query(query)
		points = result[0]["points"]
		return (points[0][1]/1000000000/self.app["cpus"])*100

	def get_container_name(self, mesos_task_id):
		"""Return container name mapping with mesos_task_id in messos
		
		@param string mesos_task_id
		"""
		query = "select container_name from "+self.config["INFLUXDB"]["ts_mapping"]+" where time>now() - 5m and mesos_task_id = '" +mesos_task_id+"' limit 1" 
		result = self.influx_client.query(query)
		points = result[0]["points"]
		return points[0][2]

	def get_containers_name(self):
		"""Return list all containers name of application have name app_name
		
		@param string app_name name of application
		@return list all containers name of app_name
		"""
		tasks = self.marathon_client.list_tasks(self.app["name"])
		containers_name = []
		for task in tasks:
			containers_name.append(self.get_container_name(task.id))
		return containers_name

	def avg_mem_usage(self, containers_name):
		"""Return avg memmory usage of all containers in list containers_name
		
		@param list containers_name list containers name
		@return float avg mem usage
		"""
		number_container = len(containers_name)
		containers_name = ["'"+x+"'" for x in containers_name]
		containers_name = ",".join(containers_name)
		query = "select memory_usage,container_name from stats where  time > now()-5m and  container_name in ("+containers_name+")  limit "+str(number_container*2)
		result = self.influx_client.query(query)
		points = result[0]["points"]
		sum_memory_usage = 0
		for point in points:
			if(point[3] != None):
				sum_memory_usage += point[3]/(self.app["mem"]*1048576)*100
		return sum_memory_usage / number_container

	def avg_cpu_usage(self, containers_name):
		"""Return avg cpu usage of all containers in list containers_name
		
		@param list containers_name list containers name
		@return float avg cpu usage
		"""
		number_container = len(containers_name)
		containers_name = ["'"+x+"'" for x in containers_name]
		containers_name = ",".join(containers_name)
		query = "select DERIVATIVE(cpu_cumulative_usage)  as cpu_usage,container_name from stats where  time > now()-5m and  container_name in ("+containers_name+") group by time(10s),container_name limit "+str(number_container)
		result = self.influx_client.query(query)
		points = result[0]["points"]
		sum_cpu_usage = 0
		for point in points:
			sum_cpu_usage += point[1]/1000000000/self.app["cpus"]*100
		return sum_cpu_usage / number_container

	def scale(self, delta):
		"""sacle app_name (add or remove) delta intances
		
		@param string app_name name of application
		@param int delta number intances add or remove
		"""
		new_instance = self.app["instance"] + delta
		if(new_instance > self.app['max_instances']):
			new_instance = self.app['max_instances']
		if(new_instance < self.app['min_instances']):
			new_instance = self.app['min_instances']
		if(new_instance != self.app["instance"]):
			self.marathon_client.scale_app(self.app["name"], new_instance)
			self.logger.debug("Scaling "+self.app["name"]+" to: "+str(new_instance))
			self.logger.debug("Waiting for config file haproxy.cfg...")
			time.sleep(self.config["TIME"]['w_config_ha'])
			self.logger.debug("Config file haproxy.cfg...")
			os.system("sudo ./servicerouter.py --marathon http://"+self.config["MARATHON"]["host"]+":"+self.config["MARATHON"]["port"]+" --haproxy-config /etc/haproxy/haproxy.cfg")
			self.app["instance"] =self.marathon_client.get_app(self.app["name"]).instances
			self.logger.debug("Sleep "+str(self.config["TIME"]['after_scale'])+"s...")
			time.sleep(self.config["TIME"]['after_scale'])

	def check_rule(self, policie, value):
		"""Check rule and return number intances need scale
		
		@param models.Policie policies
		@param tuple value values of metric
		@return integer number intances need scale
		"""
		delta = {}
		delta["up"] = 0
		delta["down"] = 0
		# Check upper_threshold
		if(value[policie["metric_type"]] > policie["upper_threshold"]):
			delta['up'] = policie["instances_in"]
		# Check lower_threshold
		if(value[policie["metric_type"]] < policie["lower_threshold"]):
			delta['down'] = policie["instances_out"]
		
		return delta


	def autoscaling(self):
		while True:
			try:
				containers_name = self.get_containers_name()
				avg_cpu = self.avg_cpu_usage(containers_name)
				avg_mem = self.avg_mem_usage(containers_name)
				self.logger.info("Avg cpu usage, avg memmory usage, current instance: %f %f %d", avg_cpu, avg_mem, self.app["instance"])
				rs_detal = {}
				rs_detal['up'] = 0
				rs_detal['down'] = 10
				for policie in self.app["policies"]:
					delta = self.check_rule(policie, (avg_cpu, avg_mem))
					if(rs_detal['up'] < delta['up']):
						rs_detal['up'] = delta['up']
					if(rs_detal['down'] > delta['down']):
						rs_detal['down'] = delta['down']

				if(rs_detal['up'] > 0):
					self.scale(rs_detal['up'])
				elif(rs_detal['down'] > 0):
					self.scale(0-rs_detal['down'])
			except Exception as e:
				self.logger.debug(str(e))
			finally:
				time.sleep(self.config["TIME"]['monitor'])
コード例 #13
0
class HealthCheckBencher(object):
    def __init__(self, marathon_url, image, tasks):
        self.concurrency = 20
        self.docker_image = image
        self.app_base_name = 'health-check-test-'
        self.total_tasks_cout = int(tasks)
        self.instances_per_app = 50
        if tasks < self.instances_per_app:
            self.instances_per_app = self.total_tasks_cout
            self.app_count = 1
        else:
            self.app_count = self.total_tasks_cout/self.instances_per_app
        self.heath_check_interval = 30
        self.test_duration = 20
        self.marathon_cluster = MarathonClient(marathon_url, timeout=240)
        self.work_queue = Queue()
        self.result_queue = Queue()
        self.app_list_queue = Queue()
        self.action_list = [self.start_collect,
                            'sleep={}'.format(self.test_duration),
                            self.get_stats]

    def remove_apps(self):
        apps = self.marathon_cluster.list_apps()
        for app in apps:
            if app.id.startswith("/"+self.app_base_name):
                self.marathon_cluster.delete_app(app.id)
        active = 0
        while True:
            apps = self.marathon_cluster.list_apps()
            for app in apps:
                if app.id.startswith(self.app_base_name):
                    active += 1
            if active == 0:
                break

    def create_app(self, id):
        port_mapping = MarathonContainerPortMapping(container_port=80,
                                                    protocol="tcp")
        app_docker = MarathonDockerContainer(
            image=self.docker_image,
            network="BRIDGE",
            force_pull_image=True,
            port_mappings=[port_mapping])
        app_container = MarathonContainer(docker=app_docker)
        http_health_check = MarathonHealthCheck(
            protocol="HTTP",
            path="/status",
            grace_period_seconds=300,
            interval_seconds=self.heath_check_interval,
            timeout_seconds=20,
            max_consecutive_failures=0
        )

        app_suffix = str(md5(str(random())).hexdigest())
        app_name = self.app_base_name + app_suffix
        new_app = MarathonApp(cpus=CPUS, mem=MEM, disk=DISK,
                              container=app_container,
                              health_checks=[http_health_check],
                              instances=self.instances_per_app,
                              max_launch_delay_seconds=5)
        print("Creating {}".format(app_name))
        self.marathon_cluster.create_app(app_id=app_name, app=new_app)
        self.app_list_queue.put(app_name)
        return None

    def wait_instances(self, app_name):
        health_ok = 0
        while health_ok < self.instances_per_app:
            health_ok = 0
            tasks = self.marathon_cluster.list_tasks(app_name)
            for task in tasks:
                if task.health_check_results:
                    health_ok += 1

    def start_collect(self, task):
        url = 'http://'+task['host']+':'+str(task['port'])+'/start_collect'
        res = urlopen(url)
        if res.getcode() == 200:
            print(task['id']+': collecter was started')
        else:
            print(task['id']+': failed to start collecter')

    def stop_collect(self, task):
        url = 'http://'+task['host']+':'+str(task['port'])+'/stop_collect'
        res = urlopen(url)
        if res.getcode() == 200:
            print(task['id']+': collecter was stopped')
        else:
            print(task['id']+': failed to stop collecter')

    def clear_stats(self, task):
        url = 'http://'+task['host']+':'+str(task['port'])+'/clear_stats'
        res = urlopen(url)
        if res.getcode() == 200:
            print(task['id']+': stats was dropped')
        else:
            print(task['id']+': stats was dropped')

    def get_stats(self, task):
        url = 'http://'+task['host']+':'+str(task['port'])+'/get_timestamps'
        try:
            res = urlopen(url)
        except Exception:
            print("URL req failed")
            self.result_queue.put({'id': task['id'],
                                   'status': 'Failed',
                                   'data': []})
            return
        if res.getcode() == 200:
            data = res.read()
            timestamps = data.split(',')
            self.result_queue.put({'id': task['id'],
                                   'status': 'ok',
                                   'data': timestamps})
        elif res.getcode() == 202:
            print("Collecting is not enabled")
            self.result_queue.put({'id': task['id'],
                                   'status': 'Collecting is not enabled',
                                   'data': []})
        else:
            print("Unknown response code")
            self.result_queue.put({'id': task['id'],
                                   'status': 'Unknown response code',
                                   'data': []})

    def repeat(self, action):
        while self.work_queue.empty() is False:
            try:
                iteration = self.work_queue.get_nowait()
            except Empty:
                continue
            action(iteration)
            self.work_queue.task_done()

    def fill_queue(self, iterations):
        for iteration in iterations:
            self.work_queue.put(iteration)

    def get_tasks(self):
        res = []
        tasks = self.marathon_cluster.list_tasks()
        for task in tasks:
            if not task.id.startswith('health-check-test-'):
                continue
            res.append({'id': str(task.id),
                        'host': str(task.host),
                        'port': str(task.ports[0])})
        return res

    def create_apps(self):
        self.fill_queue(range(self.app_count))
        for thread_num in range(self.concurrency):
            if self.work_queue.empty() is True:
                break
            worker = Thread(target=self.repeat, args=(self.create_app,))
            worker.start()
        self.work_queue.join()

        while self.app_list_queue.empty() is False:
            try:
                app_name = self.app_list_queue.get_nowait()
            except Empty:
                continue
            self.work_queue.put(app_name)

        for thread_num in range(self.concurrency):
            if self.work_queue.empty() is True:
                break
            worker = Thread(target=self.repeat, args=(self.wait_instances,))
            worker.start()
        self.work_queue.join()

    def start_test(self):
        task_list = self.get_tasks()
        for action in self.action_list:
            if isinstance(action, basestring):
                if action.startswith('sleep='):
                    amount = int(action.split('=')[1])
                    sleep(60*amount)
                continue
            self.fill_queue(task_list)
            for thread_num in range(self.concurrency):
                if self.work_queue.empty() is True:
                    break
                worker = Thread(target=self.repeat, args=(action,))
                worker.start()
            self.work_queue.join()

    def generate_report(self):
        today = datetime.today()
        file_prefix = "{:%Y-%m-%d_%H_%M_%S-}".format(today)
        file_name = (file_prefix +
                     'health_check_result-' +
                     str(self.total_tasks_cout) +
                     'tasks.csv')

        f = open(file_name, "w")
        f.write("Task ID,Health check timestamp")

        while self.result_queue.empty() is False:
            try:
                result = self.result_queue.get_nowait()
            except Empty:
                continue
            for timestamp in result['data']:
                f.write("\n%s,%s" % (result['id'], timestamp))

        f.close()