コード例 #1
0
def in_place_restart(client: MarathonClient, appid: str):
    pre = client.get_app(appid).instances
    deployment = client.scale_app(appid, 0)
    wait_for_deployment(client, deployment)
    print('Scaled {} down to 0'.format(appid))
    deployment = client.scale_app(appid, pre)
    wait_for_deployment(client, deployment)
    print('{} back at {} again'.format(appid, pre))
コード例 #2
0
def launch_elsa(marathon, stats_file, scale_window):
    logging.info('Start monitoring the inbound traffic on topics using %s' %
                 (stats_file))
    # make sure the stats file is properly initialized:
    if not os.path.exists(stats_file):
        f = open(stats_file, 'w')
        f.write('0')
        f.close()

    # launch the Elsa app via Marathon
    c = MarathonClient(marathon)
    c.create_app(
        'elsa',
        MarathonApp(cmd='/home/vagrant/elsa/launch-elsa.sh',
                    mem=200,
                    cpus=1,
                    user='******'))
    # c.list_apps()

    print(
        'ElSA is deployed and running, waiting now 5 sec before starting auto-scale ...'
    )
    time.sleep(5)  # allow time to deploy before autoscaling sets in

    # kick off traffic monitoring and trigger autoscaling:
    previous_topic_traffic = 0
    try:
        while True:
            with open(stats_file, 'r') as elsa_file:
                topic_traffic = int(elsa_file.read())
                topic_traffic_diff = topic_traffic - previous_topic_traffic
                print('Difference in traffic in the past %d seconds: %d' %
                      (scale_window, topic_traffic_diff))
                previous_topic_traffic = topic_traffic

                current_instance_num = c.get_app('elsa').instances

                if topic_traffic_diff > TRAFFIC_INCREASE_THRESHOLD:  # we see a surge of traffic above threshold ...
                    instance_multiplier = int(
                        topic_traffic_diff /
                        SCALE_FACTOR)  # ... increase number of instances
                    c.scale_app('elsa',
                                current_instance_num * instance_multiplier)
                    print('Increasing number of instances to %d' %
                          (current_instance_num * instance_multiplier))
                elif topic_traffic_diff < 0:  # negative, back off exponentially
                    target_instance_num = int(current_instance_num / 2)
                    if target_instance_num > 1:
                        c.scale_app('elsa', target_instance_num)
                        print('Decreasing number of instances to %d' %
                              (target_instance_num))
                    else:
                        c.scale_app('elsa', 1)
                        print('Resetting number of instances to 1')
            time.sleep(scale_window)
    except KeyboardInterrupt:
        print(
            'ElSA has been stopped by user, halting app and rolling back deployment. Thanks and bye!'
        )
        c.delete_app('elsa', force=True)
コード例 #3
0
ファイル: views.py プロジェクト: huanpc/mesos-admin
def send_to_marathon(request):
    try:
        if request.method == 'POST':
            action = request.POST.get('action', None)
            id = request.POST.get('id', None)
            mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port']))
            if action == 'stop':
                mc.scale_app(id, 0, force=True)
            elif action == 'start':
                mc.scale_app(id, 1)
            elif action == 'destroy':
                if request.user.has_perm("auth.can_init_app"):
                    mc.delete_app(id)
                else:
                    raise PermissionDenied
            elif action == 'restart':
                mc.restart_app(id)
            elif action == 'scale':
                mc.scale_app(id, int(request.POST.get('number_instance')))
            elif action == 'update':
                app = mc.get_app(id)
                app.cpus = float(request.POST.get('cpus'))
                app.mem = float(request.POST.get('mem'))
                app.container.docker.image = request.POST.get('version')
                mc.update_app(id, app)
            elif action  == "stop-deployment":
                mc.delete_deployment(id)
            result = '{"status":"success", "msg": "%(action)s success"}'%{"action":action}
    except Exception as e:
        result = '{"status":"error", "msg": "%(action)s fail: %(error)s" }'%{"action":action, "error": html.escape(str(e))}
    return HttpResponse(result)
コード例 #4
0
ファイル: autoscale.py プロジェクト: SemanticBeeng/elsa
def launch_elsa(marathon, stats_file, scale_window):
    logging.info('Start monitoring the inbound traffic on topics using %s' %(stats_file))
    # make sure the stats file is properly initialized:
    if not os.path.exists(stats_file):
        f = open(stats_file, 'w')
        f.write('0')
        f.close()
    
    # launch the Elsa app via Marathon
    c = MarathonClient(marathon)
    c.create_app('elsa', MarathonApp(cmd='/home/vagrant/elsa/launch-elsa.sh', mem=200, cpus=1, user='******'))
    # c.list_apps()
    
    print('ElSA is deployed and running, waiting now 5 sec before starting auto-scale ...')
    time.sleep(5) # allow time to deploy before autoscaling sets in
    
    # kick off traffic monitoring and trigger autoscaling:
    previous_topic_traffic = 0
    try:
        while True:
            with open(stats_file, 'r') as elsa_file:
                topic_traffic = int(elsa_file.read())
                topic_traffic_diff = topic_traffic - previous_topic_traffic
                print('Difference in traffic in the past %d seconds: %d' %(scale_window, topic_traffic_diff))
                previous_topic_traffic = topic_traffic
            
                current_instance_num = c.get_app('elsa').instances
            
                if topic_traffic_diff > TRAFFIC_INCREASE_THRESHOLD: # we see a surge of traffic above threshold ...
                    instance_multiplier = int(topic_traffic_diff / SCALE_FACTOR) # ... increase number of instances
                    c.scale_app('elsa', current_instance_num * instance_multiplier)
                    print('Increasing number of instances to %d' %(current_instance_num * instance_multiplier))
                elif topic_traffic_diff < 0: # negative, back off exponentially 
                    target_instance_num = int(current_instance_num/2)
                    if target_instance_num > 1:
                        c.scale_app('elsa', target_instance_num)
                        print('Decreasing number of instances to %d' %(target_instance_num))
                    else:
                        c.scale_app('elsa', 1)
                        print('Resetting number of instances to 1')
            time.sleep(scale_window)
    except KeyboardInterrupt:
        print('ElSA has been stopped by user, halting app and rolling back deployment. Thanks and bye!')
        c.delete_app('elsa', force=True)
コード例 #5
0
ファイル: views.py プロジェクト: ntk148v/mesos-admin
def send_to_marathon(request):
    try:
        if request.method == "POST":
            action = request.POST.get("action", None)
            app_id = request.POST.get("id", None)
            mc = MarathonClient("http://{}:{}".format(settings.MARATHON["host"], settings.MARATHON["port"]))
            if action == "stop":
                mc.scale_app(app_id, 0)
            elif action == "start":
                mc.scale_app(app_id, 1)
            elif action == "destroy":
                mc.delete_app(app_id)
            elif action == "restart":
                pass
            elif action == "scale":
                mc.scale_app(app_id, int(request.POST.get("number_instance")))
            result = '{"status":"success", "msg": "%(action)s success"}' % {"action": action}
    except Exception as e:
        result = '{"status":"error", "msg": "%(action)s fail: %(error)s" }' % {
            "action": action,
            "error": html.escape(str(e)),
        }
    return HttpResponse(result)
コード例 #6
0
ファイル: views.py プロジェクト: cuongtransc/mesos-admin
def send_to_marathon(request):
    try:
        if request.method == 'POST':
            action = request.POST.get('action', None)
            id = request.POST.get('id', None)
            mc = MarathonClient('http://{}:{}'.format(
                settings.MARATHON['host'], settings.MARATHON['port']))
            if action == 'stop':
                mc.scale_app(id, 0, force=True)
            elif action == 'start':
                mc.scale_app(id, 1)
            elif action == 'destroy':
                if request.user.has_perm("auth.can_init_app"):
                    mc.delete_app(id)
                else:
                    raise PermissionDenied
            elif action == 'restart':
                mc.restart_app(id)
            elif action == 'scale':
                mc.scale_app(id, int(request.POST.get('number_instance')))
            elif action == 'update':
                app = mc.get_app(id)
                app.cpus = float(request.POST.get('cpus'))
                app.mem = float(request.POST.get('mem'))
                app.container.docker.image = request.POST.get('version')
                mc.update_app(id, app)
            elif action == "stop-deployment":
                mc.delete_deployment(id)
            result = '{"status":"success", "msg": "%(action)s success"}' % {
                "action": action
            }
    except Exception as e:
        result = '{"status":"error", "msg": "%(action)s fail: %(error)s" }' % {
            "action": action,
            "error": html.escape(str(e))
        }
    return HttpResponse(result)
コード例 #7
0
def deploy(app_definition, marathon_url, instances, auth_token, zero, force):
    old_appids = []
    # Connect to Marathon
    print("\nConnecting to Marathon...")
    c = MarathonClient(marathon_url, auth_token=auth_token)
    print("Connected to", marathon_url)

    # Pick up the Marathon App Definition file
    app_json = open(app_definition).read()
    app = MarathonApp.from_json(json.loads(app_json))
    new_app_id = app.id
    service_name = new_app_id.split("/")[-1].split(".")[0]

    # Instantiate the new application on DC/OS but don't launch it yet
    # The application definition instances field should be 0 by default
    # If forced, the application will be relaunched even if the ID already exists
    print("\nInstantiating new application on Marathon with", app.instances,
          "instances...")
    try:
        c.create_app(new_app_id, app)
    except:
        if force == 'Yes':
            print("\nForcing redeploy of the same app id...", new_app_id)
            c.update_app(new_app_id, app, force=True, minimal=True)
            check_deployment(c, new_app_id)
            pass
        else:
            sys.exit()
    print("Created app", new_app_id)

    # List and find currently running apps of the same service
    # This assumes the naming convention (id): /some/group/service_name.uniquevalue
    print("\nFinding any existing apps for service:", service_name)
    for app in c.list_apps():
        existing_service_name = app.id.split("/")[-1].split(".")[0]
        if (service_name == existing_service_name) and app.instances > 0:
            print("Found up and running application id:", app.id)
            old_appids.append(app.id)

    # If it's the first deployment ever, just launch the desired number of instances
    # Otherwise perform a hybrid release
    # Finally clean up any older app instances running
    if not old_appids:
        if instances is None:
            instances = 2
        print("No current apps found. Launching brand new service with",
              instances, "instances...")
        c.scale_app(new_app_id, instances=instances)
        check_deployment(c, new_app_id)
        check_health(c, new_app_id)

    else:
        old_appids.reverse()
        if zero == 'Yes':
            print("\nStarting zero downtime deployment for...", new_app_id)
            for old_appid in old_appids:
                if instances is None:
                    instances = c.get_app(old_appid).instances
                if (old_appid == '' or old_appid == new_app_id
                        or old_appid == '/' + new_app_id):
                    print("Scaling existing app_id", new_app_id, "to",
                          instances, "instances...")
                    c.scale_app(new_app_id, instances=instances)
                    check_deployment(c, new_app_id)
                    check_health(c, new_app_id)

                else:
                    print("Target number of total instances:", instances)
                    delta = int(round(instances * .50))
                    delta = (delta if delta > 0 else 1)

                    scale(c, new_app_id, old_appid, delta)

                    if (c.get_app(new_app_id).instances != instances):
                        print("\nLaunch", instances - delta,
                              "remaining instance(s) of the new version...")
                        c.scale_app(new_app_id, instances=instances)
                        check_deployment(c, new_app_id)
                        check_health(c, new_app_id)
                    if (c.get_app(old_appid).instances > 0):
                        print(
                            "Finish shutting down remaining instances of the old version..."
                        )
                        c.scale_app(old_appid, instances=0)
                        check_deployment(c, old_appid)
        else:
            print("Started deployment with downtime...")
            for old_appid in old_appids:
                c.scale_app(old_appid, instances=0)
                check_deployment(c, old_appid)
            c.scale_app(new_app_id, instances=instances)
            check_deployment(c, new_app_id)
            check_health(c, new_app_id)

    print("\nSUCCESS:\nNew application ID:", new_app_id,
          "\nRunning instances:", instances)
コード例 #8
0
ファイル: mmapi.py プロジェクト: kratos7/hydra
class MarathonIF(object):
    def __init__(self, marathon_addr, my_addr, mesos):
        self.mcli = MarathonClient(marathon_addr)
        self.myAddr = my_addr
        self.mesos = mesos

    def get_apps(self):
        listapps = self.mcli.list_apps()
        return listapps

    def get_app(self, app_id, timeout=300):
        st_time = time.time()
        while(time.time() - st_time < timeout):
            try:
                try:
                    a = self.mcli.get_app(app_id)
                except marathon.exceptions.NotFoundError as e:  # NOQA
                    return None
                return a
            except:
                l.info("mcli: get_app returned error")
                l.info(traceback.format_exc())
                l.info("Retrying after 10 secs timeout=%d", timeout)
                time.sleep(10)
        raise Exception("mcli get_app timed out, possible zookeper/marathon/mesos malfunction")

    def delete_app(self, app_id, force=False, timeout=200):
        st_time = time.time()
        while(time.time() - st_time < timeout):
            try:
                self.mcli.delete_app(app_id, force)
                return
            except:
                l.info("mcli: delete_app returned error")
                l.info(traceback.format_exc())
                l.info("Retrying after 10 secs timeout=%d", timeout)
                time.sleep(10)
        raise Exception("mcli delete_app timed out, possible zookeper/marathon/mesos malfunction")

    def delete_deployment(self, dep_id):
        return self.mcli.delete_deployment(dep_id)

    def get_deployments(self):
        return self.mcli.list_deployments()

    def delete_app_ifexisting(self, app_id, trys=4):
        for idx in range(0, trys):
            try:
                a = self.get_app(app_id)
                if a:
                    return self.delete_app(app_id)
                return None
            except:
                e = sys.exc_info()[0]
                pprint("<p>Error: %s</p>" % e)
                time.sleep(10)
        raise

    @staticmethod
    def is_valid_app_id(app_id):
        # allowed: lowercase letters, digits, hyphens, slash, dot
        if re.match("^[A-Za-z0-9-/.]*$", app_id):
            return True
        return False

    def create_app(self, app_id, attr):
        """
            Create and start an app.
            :param app_id: (str) - Application ID
            :param attr: marathon.models.app.MarathonApp application to create.
            :return: the created app
        """
        # Validate that app_id conforms to allowed naming scheme.
        if not self.is_valid_app_id(app_id):
            l.error("Error: Only lowercase letters, digits, hyphens are allowed in app_id. %s" % app_id)
            raise Exception("Invalid app_id")

        for idx in range(0, 10):
            try:
                a = self.mcli.create_app(app_id, attr)
                return a
            except marathon.exceptions.MarathonHttpError as e:
                if str(e).find('App is locked by one or more deployments. Override with the option') >= 0:
                    time.sleep(1)
                else:
                    raise
        raise

    def wait_app_removal(self, app):
        cnt = 0
        while True:
            if not self.get_app(app):
                break
            time.sleep(0.2)
            cnt += 1
            if cnt > 0:
                l.info("Stuck waiting for %s to be deleted CNT=%d" % (app, cnt))
        return True

    def wait_app_ready(self, app, running_count, sleep_before_next_try=1):
        cnt = 0
        while True:
            a1 = self.get_app(app)
            # if tasks_running are greater (due to whatever reason, scale down accordingly)
            if a1.tasks_running > running_count:
                delta = a1.tasks_running - running_count
                l.info("Found [%d] more apps, scaling down to [%d]", delta, running_count)
                self.scale_app(app, running_count)
                # Allow for some time before next poll
                time.sleep(1)
                continue
            if a1.tasks_running == running_count:
                return a1
            cnt += 1
            time.sleep(sleep_before_next_try)
            if (cnt % 30) == 29:
                l.info("[%d]Waiting for task to move to running stage, " % cnt +
                       "current stat staged=%d running=%d expected Running=%d" %
                       (a1.tasks_staged, a1.tasks_running, running_count))

    def scale_app(self, app, scale, timeout=300):
        st_time = time.time()
        while(time.time() - st_time < timeout):
            try:
                self.mcli.scale_app(app, scale)
                return
            except:
                l.info("mcli: scale_app returned error")
                l.info(traceback.format_exc())
                l.info("Retrying after 10 secs timeout=%d", timeout)
                time.sleep(10)
        raise Exception("mcli scale_app timed out, possible zookeper/marathon/mesos malfunction")

    def ping(self):
        return self.mcli.ping()

    def kill_task(self, app_id, task_id):
        return self.mcli.kill_task(app_id, task_id)
コード例 #9
0
import time
from optparse import OptionParser
from marathon import MarathonClient
from marathon.models import MarathonApp

if __name__ == '__main__':
    usage = ('python %prog')
    parser = OptionParser(description='Simple marathon-python based master to launch apps',
                          version="0.1 ", usage=usage)
    (options, args) = parser.parse_args()
    if (len(args) != 0):
        parser.print_help()
        sys.exit(2)


    print "Initiating marathonclient..."
    c = MarathonClient('http://localhost:8080')
    app_cmd = "python /home/abdullah/cosmic-space/test-mesos/py-zmq/sub_client.py --server_ip_ports 10.10.0.2:5556"

    # launch app
    print "Initiating zmq-client app"
    c.create_app('zmq-client', MarathonApp(cmd=app_cmd, mem=16, cpus=0.01))

    # scale
    raw_input("scale_apps upto 400")
    c.scale_app('zmq-client', instances=400)

    # delete
    raw_input("delete apps")
    c.delete_app('zmq-client')
コード例 #10
0
    filter_inject = filter_read.read()
    with open('%s/security/config.xml' % GEOSERVER_DATA_DIR) as config_read:
        full_config = config_read.read()
        if 'anonReload' in full_config:
            logging.info('Configuration already supports anonymous REST reloads.')
        # Only shim in anonymous reload and restart GeoServer if it hasn't been done before
        else:
            config_read.seek(0)
            with open('%s/security/config.xml-output' % GEOSERVER_DATA_DIR, 'w') as config_write:
                line_value = config_read.readline()
                while len(line_value):
                    config_write.write('%s' % line_value)
                    if '<filterChain' in line_value:
                        config_write.write('%s' % filter_inject)
                    line_value = config_read.readline()

            shutil.move('%s/security/config.xml-output' % GEOSERVER_DATA_DIR,
                        '%s/security/config.xml' % GEOSERVER_DATA_DIR)

            response = MARATHON_CLIENT.kill_tasks(GEOSERVER_APP)

            if not len(response) == 1:
                logging.critical('Error restarting GeoServer')
                sys.exit(1)

MARATHON_CLIENT.scale_app(GEOSERVER_APP, GEOSERVER_INSTANCES)

block_for_healthy_app(MARATHON_CLIENT, GEOSERVER_APP, GEOSERVER_INSTANCES)

logging.info('Bootstrap complete.')
コード例 #11
0
ファイル: mmapi.py プロジェクト: annym/hydra
class MarathonIF(object):
    def __init__(self, marathon_addr, my_addr, mesos):
        self.mcli = MarathonClient(marathon_addr)
        self.myAddr = my_addr
        self.mesos = mesos

    def get_apps(self):
        listapps = self.mcli.list_apps()
        return listapps

    def get_app(self, app_id):
        try:
            a = self.mcli.get_app(app_id)
        except marathon.exceptions.NotFoundError as e:  # NOQA
            return None
        return a

    def delete_app(self, app_id, force=False):
        return self.mcli.delete_app(app_id, force)

    def delete_deployment(self, dep_id):
        return self.mcli.delete_deployment(dep_id)

    def get_deployments(self):
        return self.mcli.list_deployments()

    def delete_app_ifexisting(self, app_id, trys=4):
        for idx in range(0, trys):
            try:
                a = self.get_app(app_id)
                if a:
                    return self.delete_app(app_id)
                return None
            except:
                e = sys.exc_info()[0]
                pprint("<p>Error: %s</p>" % e)
                time.sleep(10)
        raise

    @staticmethod
    def is_valid_app_id(app_id):
        # allowed: lowercase letters, digits, hyphens, slash, dot
        if re.match("^[A-Za-z0-9-/.]*$", app_id):
            return True
        return False

    def create_app(self, app_id, attr):
        """
            Create and start an app.
            :param app_id: (str) - Application ID
            :param attr: marathon.models.app.MarathonApp application to create.
            :return: the created app
        """
        # Validate that app_id conforms to allowed naming scheme.
        if not self.is_valid_app_id(app_id):
            l.error("Error: Only lowercase letters, digits, hyphens are allowed in app_id. %s" % app_id)
            raise Exception("Invalid app_id")

        for idx in range(0, 10):
            try:
                a = self.mcli.create_app(app_id, attr)
                return a
            except marathon.exceptions.MarathonHttpError as e:
                if str(e).find('App is locked by one or more deployments. Override with the option') >= 0:
                    time.sleep(1)
                else:
                    raise
        raise

    def wait_app_removal(self, app):
        cnt = 0
        while True:
            if not self.get_app(app):
                break
            time.sleep(0.2)
            cnt += 1
            if cnt > 0:
                l.info("Stuck waiting for %s to be deleted CNT=%d" % (app, cnt))
        return True

    def wait_app_ready(self, app, running_count):
        cnt = 0
        while True:
            a1 = self.get_app(app)
            if a1.tasks_running == running_count:
                return a1
            cnt += 1
            time.sleep(1)
            if (cnt % 30) == 29:
                l.info("[%d]Waiting for task to move to running stage, " % cnt +
                       "current stat staged=%d running=%d expected Running=%d" %
                       (a1.tasks_staged, a1.tasks_running, running_count))

    def scale_app(self, app, scale):
        return self.mcli.scale_app(app, scale)

    def ping(self):
        return self.mcli.ping()
コード例 #12
0
class MarathonManager(object):
    def __init__(self, server):
        self._client = MarathonClient(server)

    def __repr__(self):
        return self.server

    def create(self, app):
        app._create(self._client)

    def discover(self, app_filter=None, env_filter=False):
        apps = set()
        for app in self._client.list_apps():
            if not app_filter or ('_tonomi_application',
                                  app_filter) in app.labels.items():
                if not env_filter:
                    apps.add(reduce_app_name(app.id))
                else:
                    if '_tonomi_environment' in app.labels.keys():
                        env_name = app.labels['_tonomi_environment']
                        apps.add('/{}'.format(env_name))

        return list(apps)

    def get_apps(self, app_type, env_name):
        env_name = env_name.replace('/', '')
        apps = []
        for app in self._client.list_apps():
            if ('_tonomi_environment', env_name) in app.labels.items() and (
                    '_tonomi_application', app_type) in app.labels.items():
                apps.append(app)
        return [self._client.get_app(app.id) for app in apps]

    def get_app_host(self, app_type, env_name):
        while True:
            apps = self.get_apps(app_type=app_type, env_name=env_name)
            for app in apps:
                for task in app.tasks:
                    host = task.host
                    return host
            time.sleep(5)

    def health_check(self):
        pass

    def destroy(self, name):
        try:
            self._client.delete_group(name, force=True)
        except:
            pass

    def update(self):
        pass

    def restart(self):
        pass

    def scale_app(self, app_name, num):
        self._client.scale_app(app_name, num, force=True)

    def free_ports(self, num=1):
        return get_free_ports(self._client, num)
コード例 #13
0
ファイル: mmapi.py プロジェクト: tahir24434/hydra
class MarathonIF(object):
    def __init__(self, marathon_addr, my_addr, mesos):
        self.mcli = MarathonClient(marathon_addr)
        self.myAddr = my_addr
        self.mesos = mesos

    def get_apps(self):
        listapps = self.mcli.list_apps()
        return listapps

    def get_app(self, app_id, timeout=300):
        st_time = time.time()
        while (time.time() - st_time < timeout):
            try:
                try:
                    a = self.mcli.get_app(app_id)
                except marathon.exceptions.NotFoundError as e:  # NOQA
                    return None
                return a
            except:
                l.info("mcli: get_app returned error")
                l.info(traceback.format_exc())
                l.info("Retrying after 10 secs timeout=%d", timeout)
                time.sleep(10)
        raise Exception(
            "mcli get_app timed out, possible zookeper/marathon/mesos malfunction"
        )

    def delete_app(self, app_id, force=False, timeout=200):
        st_time = time.time()
        while (time.time() - st_time < timeout):
            try:
                self.mcli.delete_app(app_id, force)
                return
            except:
                l.info("mcli: delete_app returned error")
                l.info(traceback.format_exc())
                l.info("Retrying after 10 secs timeout=%d", timeout)
                time.sleep(10)
        raise Exception(
            "mcli delete_app timed out, possible zookeper/marathon/mesos malfunction"
        )

    def delete_deployment(self, dep_id):
        return self.mcli.delete_deployment(dep_id)

    def get_deployments(self):
        return self.mcli.list_deployments()

    def delete_app_ifexisting(self, app_id, trys=4):
        for idx in range(0, trys):
            try:
                a = self.get_app(app_id)
                if a:
                    return self.delete_app(app_id)
                return None
            except:
                e = sys.exc_info()[0]
                pprint("<p>Error: %s</p>" % e)
                time.sleep(10)
        raise

    @staticmethod
    def is_valid_app_id(app_id):
        # allowed: lowercase letters, digits, hyphens, slash, dot
        if re.match("^[A-Za-z0-9-/.]*$", app_id):
            return True
        return False

    def create_app(self, app_id, attr):
        """
            Create and start an app.
            :param app_id: (str) - Application ID
            :param attr: marathon.models.app.MarathonApp application to create.
            :return: the created app
        """
        # Validate that app_id conforms to allowed naming scheme.
        if not self.is_valid_app_id(app_id):
            l.error(
                "Error: Only lowercase letters, digits, hyphens are allowed in app_id. %s"
                % app_id)
            raise Exception("Invalid app_id")

        for idx in range(0, 10):
            try:
                a = self.mcli.create_app(app_id, attr)
                return a
            except marathon.exceptions.MarathonHttpError as e:
                if str(
                        e
                ).find('App is locked by one or more deployments. Override with the option'
                       ) >= 0:
                    time.sleep(1)
                else:
                    raise
        raise

    def wait_app_removal(self, app):
        cnt = 0
        while True:
            if not self.get_app(app):
                break
            time.sleep(0.2)
            cnt += 1
            if cnt > 0:
                l.info("Stuck waiting for %s to be deleted CNT=%d" %
                       (app, cnt))
        return True

    def wait_app_ready(self, app, running_count, sleep_before_next_try=1):
        cnt = 0
        while True:
            a1 = self.get_app(app)
            # if tasks_running are greater (due to whatever reason, scale down accordingly)
            if a1.tasks_running > running_count:
                delta = a1.tasks_running - running_count
                l.info("Found [%d] more apps, scaling down to [%d]", delta,
                       running_count)
                self.scale_app(app, running_count)
                # Allow for some time before next poll
                time.sleep(1)
                continue
            if a1.tasks_running == running_count:
                return a1
            cnt += 1
            time.sleep(sleep_before_next_try)
            if (cnt % 30) == 29:
                l.info(
                    "[%d]Waiting for task to move to running stage, " % cnt +
                    "current stat staged=%d running=%d expected Running=%d" %
                    (a1.tasks_staged, a1.tasks_running, running_count))

    def scale_app(self, app, scale, timeout=300):
        st_time = time.time()
        while (time.time() - st_time < timeout):
            try:
                self.mcli.scale_app(app, scale)
                return
            except:
                l.info("mcli: scale_app returned error")
                l.info(traceback.format_exc())
                l.info("Retrying after 10 secs timeout=%d", timeout)
                time.sleep(10)
        raise Exception(
            "mcli scale_app timed out, possible zookeper/marathon/mesos malfunction"
        )

    def ping(self):
        return self.mcli.ping()

    def kill_task(self, app_id, task_id):
        return self.mcli.kill_task(app_id, task_id)
コード例 #14
0
class MarathonAPI(object):
    user = None
    password = None
    host = None
    use_https = False
    port = None
    url = None
    marathon_cli = None
    dict_apps = {}

    def __init__(self,
                 host,
                 port=80,
                 use_https=False,
                 user=None,
                 password=None):
        self.user = user
        self.password = password
        self.host = host
        self.use_https = use_https
        self.port = str(port)
        self.url = '{}://{}:{}/'.format('https' if use_https else 'http', host,
                                        port)
        try:
            self.marathon_cli = MarathonClient([self.url],
                                               username=self.user,
                                               password=self.password)
        except Exception as e:
            logger.critical(e)
            raise e

    def scaleOneApp(self, app_id, delta=None):
        logger.info(
            'App: [{}] :: Scale {} Delta:[{}] Atual:[{}] Staged:[{}]'.format(
                app_id, 'up' if delta > 0 else 'down', delta,
                self.dict_apps[app_id].tasksRunning,
                self.dict_apps[app_id].tasksStaged))
        try:
            self.marathon_cli.scale_app(app_id=app_id, delta=delta)
        except MarathonHttpError as e:
            logger.error(e.error_message)
        except:
            raise

    def findAppsWithAutoscaleLabels(self):
        list = self.marathon_cli.list_apps(embed_counts=True,
                                           embed_task_stats=True)
        logger.debug('Lista recebida {}'.format(list))
        if len(list) == 0:
            logger.warning('0 apps loaded. Your marathon have apps?')
        for app in list:
            if LABEL_FOR_AUTOSCALE_ENABLE in app.labels:
                new_app = MarathonApp(app.id)
                new_app.tasksRunning = app.tasks_running
                new_app.tasksStaged = app.tasks_staged
                for label in MANDATORY_LABELS_APP:
                    if label in app.labels:
                        value = app.labels[label]
                        if value.isnumeric():
                            value = int(value)
                        new_app.__setattr__(label, value)
                    else:
                        logger.error(
                            'App: [{}] :: dont have MANDATORY_LABELS :: {}'.
                            format(app.id, label))
                for label in OPTIONAL_LABELS_APP:
                    if label in app.labels:
                        value = app.labels[label]
                        if value.isnumeric():
                            value = int(value)
                        new_app.__setattr__(label, value)
                self.dict_apps[app.id] = new_app
            else:
                logger.debug(
                    'App: [{}] :: dont have {} = True. If you want to scale, please add labels.'
                    .format(app.id, LABEL_FOR_AUTOSCALE_ENABLE))

    def scaleApps(self, rabbitmq):
        for app_id in self.dict_apps:
            app = self.dict_apps[app_id]
            required, delta = app.scaneRequired(rabbitmq)
            if required:
                self.scaleOneApp(app_id=app_id, delta=delta)
            else:
                logger.info('App: [{}] :: Not Required Scale'.format(app_id))
コード例 #15
0
ファイル: mesos_marathon.py プロジェクト: yun-an/deis
class MarathonHTTPClient(AbstractSchedulerClient):
    def __init__(self, target, auth, options, pkey):
        super(MarathonHTTPClient, self).__init__(target, auth, options, pkey)
        self.target = settings.MARATHON_HOST
        self.registry = settings.REGISTRY_HOST + ':' + settings.REGISTRY_PORT
        self.client = MarathonClient('http://' + self.target + ':8180')
        self.fleet = FleetHTTPClient('/var/run/fleet.sock', auth, options,
                                     pkey)

    # helpers
    def _app_id(self, name):
        return name.replace('_', '.')

    # container api
    def create(self, name, image, command='', **kwargs):
        """Create a new container"""
        app_id = self._app_id(name)
        l = locals().copy()
        l.update(re.match(MATCH, name).groupdict())
        image = self.registry + '/' + image
        mems = kwargs.get('memory', {}).get(l['c_type'])
        m = 0
        if mems:
            mems = mems.lower()
            if mems[-2:-1].isalpha() and mems[-1].isalpha():
                mems = mems[:-1]
            m = int(mems[:-1])
        c = 0.5
        cpu = kwargs.get('cpu', {}).get(l['c_type'])
        if cpu:
            c = cpu
        cmd = "docker run --name {name} -P {image} {command}".format(
            **locals())
        self.client.create_app(
            app_id, MarathonApp(cmd=cmd, mem=m, cpus=c, instances=0))
        for _ in xrange(POLL_ATTEMPTS):
            if self.client.get_app(self._app_id(name)).tasks_running == 0:
                return
            time.sleep(1)

    def start(self, name):
        """Start a container."""
        self.client.scale_app(self._app_id(name), 1, force=True)
        for _ in xrange(POLL_ATTEMPTS):
            if self.client.get_app(self._app_id(name)).tasks_running == 1:
                break
            time.sleep(1)
        host = self.client.get_app(self._app_id(name)).tasks[0].host
        self._waitforcontainer(host, name)

    def destroy(self, name):
        """Destroy a container."""
        try:
            host = self.client.get_app(self._app_id(name)).tasks[0].host
            self.client.delete_app(self._app_id(name), force=True)
            self._delete_container(host, name)
        except:
            self.client.delete_app(self._app_id(name), force=True)

    def _get_container_state(self, host, name):
        docker_cli = Client("tcp://{}:2375".format(host),
                            timeout=1200,
                            version='1.17')
        try:
            if docker_cli.inspect_container(name)['State']['Running']:
                return JobState.up
        except:
            return JobState.destroyed

    def _waitforcontainer(self, host, name):
        for _ in xrange(POLL_WAIT):
            if self._get_container_state(host, name) == JobState.up:
                return
            time.sleep(1)
        raise RuntimeError("App container Not Started")

    def _delete_container(self, host, name):
        docker_cli = Client("tcp://{}:2375".format(host),
                            timeout=1200,
                            version='1.17')
        if docker_cli.inspect_container(name)['State']:
            docker_cli.remove_container(name, force=True)

    def run(self, name, image, entrypoint, command):  # noqa
        """Run a one-off command."""
        return self.fleet.run(name, image, entrypoint, command)

    def state(self, name):
        """Display the given job's running state."""
        try:
            for _ in xrange(POLL_ATTEMPTS):
                if self.client.get_app(self._app_id(name)).tasks_running == 1:
                    return JobState.up
                elif self.client.get_app(
                        self._app_id(name)).tasks_running == 0:
                    return JobState.created
                time.sleep(1)
        except:
            return JobState.destroyed
コード例 #16
0
ファイル: mmapi.py プロジェクト: lorenzodavid/hydra
class MarathonIF(object):
    def __init__(self, marathon_addr, my_addr, mesos):
        self.mcli = MarathonClient(marathon_addr)
        self.myAddr = my_addr
        self.mesos = mesos

    def get_apps(self):
        listapps = self.mcli.list_apps()
        return listapps

    def get_app(self, app_id):
        try:
            a = self.mcli.get_app(app_id)
        except marathon.exceptions.NotFoundError as e:  # NOQA
            return None
        return a

    def delete_app(self, app_id, force=False):
        return self.mcli.delete_app(app_id, force)

    def delete_deployment(self, dep_id):
        return self.mcli.delete_deployment(dep_id)

    def get_deployments(self):
        return self.mcli.list_deployments()

    def delete_app_ifexisting(self, app_id, trys=4):
        for idx in range(0, trys):
            try:
                a = self.get_app(app_id)
                if a:
                    return self.delete_app(app_id)
                return None
            except:
                e = sys.exc_info()[0]
                pprint("<p>Error: %s</p>" % e)
                time.sleep(10)
        raise

    def create_app(self, app_id, attr):
        for idx in range(0, 10):
            try:
                a = self.mcli.create_app(app_id, attr)
                return a
            except marathon.exceptions.MarathonHttpError as e:
                if str(e).find('App is locked by one or more deployments. Override with the option') >= 0:
                    time.sleep(1)
                else:
                    raise
        raise

    def wait_app_removal(self, app):
        cnt = 0
        while True:
            if not self.get_app(app):
                break
            time.sleep(0.2)
            cnt += 1
            if cnt > 0:
                l.info("Stuck waiting for %s to be deleted CNT=%d" % (app, cnt))
        return True

    def wait_app_ready(self, app, running_count):
        cnt = 0
        while True:
            a1 = self.get_app(app)
            if a1.tasks_running == running_count:
                return a1
            cnt += 1
            time.sleep(1)
            if (cnt % 30) == 29:
                l.info("[%d]Waiting for task to move to running stage, " % cnt +
                       "current stat staged=%d running=%d expected Running=%d" %
                       (a1.tasks_staged, a1.tasks_running, running_count))

    def scale_app(self, app, scale):
        return self.mcli.scale_app(app, scale)

    def ping(self):
        return self.mcli.ping()
コード例 #17
0
ファイル: mesos_marathon.py プロジェクト: ngpestelos/deis
class MarathonHTTPClient(AbstractSchedulerClient):
    def __init__(self, target, auth, options, pkey):
        super(MarathonHTTPClient, self).__init__(target, auth, options, pkey)
        self.target = settings.MARATHON_HOST
        self.registry = settings.REGISTRY_HOST + ":" + settings.REGISTRY_PORT
        self.client = MarathonClient("http://" + self.target + ":8180")
        self.fleet = FleetHTTPClient("/var/run/fleet.sock", auth, options, pkey)

    # helpers
    def _app_id(self, name):
        return name.replace("_", ".")

    # container api
    def create(self, name, image, command="", **kwargs):
        """Create a new container"""
        app_id = self._app_id(name)
        l = locals().copy()
        l.update(re.match(MATCH, name).groupdict())
        image = self.registry + "/" + image
        mems = kwargs.get("memory", {}).get(l["c_type"])
        m = 0
        if mems:
            mems = mems.lower()
            if mems[-2:-1].isalpha() and mems[-1].isalpha():
                mems = mems[:-1]
            m = int(mems[:-1])
        c = 0.5
        cpu = kwargs.get("cpu", {}).get(l["c_type"])
        if cpu:
            c = cpu
        cmd = "docker run --name {name} -P {image} {command}".format(**locals())
        self.client.create_app(app_id, MarathonApp(cmd=cmd, mem=m, cpus=c, instances=0))
        for _ in xrange(POLL_ATTEMPTS):
            if self.client.get_app(self._app_id(name)).tasks_running == 0:
                return
            time.sleep(1)

    def start(self, name):
        """Start a container."""
        self.client.scale_app(self._app_id(name), 1, force=True)
        for _ in xrange(POLL_ATTEMPTS):
            if self.client.get_app(self._app_id(name)).tasks_running == 1:
                break
            time.sleep(1)
        host = self.client.get_app(self._app_id(name)).tasks[0].host
        self._waitforcontainer(host, name)

    def destroy(self, name):
        """Destroy a container."""
        try:
            host = self.client.get_app(self._app_id(name)).tasks[0].host
            self.client.delete_app(self._app_id(name), force=True)
            self._delete_container(host, name)
        except:
            self.client.delete_app(self._app_id(name), force=True)

    def _get_container_state(self, host, name):
        docker_cli = Client("tcp://{}:2375".format(host), timeout=1200, version="1.17")
        try:
            if docker_cli.inspect_container(name)["State"]["Running"]:
                return JobState.up
        except:
            return JobState.destroyed

    def _waitforcontainer(self, host, name):
        for _ in xrange(POLL_WAIT):
            if self._get_container_state(host, name) == JobState.up:
                return
            time.sleep(1)
        raise RuntimeError("App container Not Started")

    def _delete_container(self, host, name):
        docker_cli = Client("tcp://{}:2375".format(host), timeout=1200, version="1.17")
        if docker_cli.inspect_container(name)["State"]:
            docker_cli.remove_container(name, force=True)

    def run(self, name, image, entrypoint, command):  # noqa
        """Run a one-off command."""
        return self.fleet.run(name, image, entrypoint, command)

    def state(self, name):
        """Display the given job's running state."""
        try:
            for _ in xrange(POLL_ATTEMPTS):
                if self.client.get_app(self._app_id(name)).tasks_running == 1:
                    return JobState.up
                elif self.client.get_app(self._app_id(name)).tasks_running == 0:
                    return JobState.created
                time.sleep(1)
        except:
            return JobState.destroyed
コード例 #18
0
class Scaler:
    """Class for Scaling"""
    def __init__(self, app_name, config):
        self.logger = logging.getLogger("autoscaling")
        self.logger.setLevel(logging.DEBUG)

        self.logger.debug("Init object scaler...")
        self.config = config

        self.logger.debug("Connect RESTful mariadb and get policies...")
        conn = http.client.HTTPConnection(config["MARIA_RESTFUL"]['host'],
                                          config["MARIA_RESTFUL"]['port'])
        conn.request("GET", "/app/name/" + app_name)
        json_app = conn.getresponse().read().decode("utf-8")
        self.app = json.loads(json_app)
        conn.request("GET", "/app/name/" + app_name + "/policies")
        json_policies = conn.getresponse().read().decode("utf-8")
        self.app["policies"] = json.loads(json_policies)

        self.logger.debug("Connect influxdb and marathon...")
        self.influx_client = InfluxDBClient(config["INFLUXDB"]["host"],
                                            config["INFLUXDB"]["port"],
                                            config["INFLUXDB"]["username"],
                                            config["INFLUXDB"]["password"],
                                            config["INFLUXDB"]["db_name"])
        self.marathon_client = MarathonClient('http://' +
                                              config["MARATHON"]['host'] +
                                              ':' + config["MARATHON"]['port'])

        self.app["instance"] = self.marathon_client.get_app(app_name).instances
        self.app["mem"] = self.marathon_client.get_app(app_name).mem
        self.app["cpus"] = self.marathon_client.get_app(app_name).cpus

        self.logger.debug("Reconfig haproxy.cfg...")
        os.system("sudo ./servicerouter.py --marathon http://" +
                  config["MARATHON"]["host"] + ":" +
                  config["MARATHON"]["port"] +
                  " --haproxy-config /etc/haproxy/haproxy.cfg")

    def setup_logging(self,
                      log_file="autoscaling.log",
                      level=logging.INFO,
                      formatter=None):
        if (formatter == None):
            formatter = logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        fh = logging.FileHandler(log_file)
        fh.setLevel(level)
        fh.setFormatter(formatter)
        self.logger.addHandler(fh)

    def get_cpu_usage(self, container_name):
        """Return cpu usage of container_name

		@param string container_name container name  
		"""
        query = "select DERIVATIVE(cpu_cumulative_usage)  as cpu_usage from stats where container_name = '" + container_name + "' and time > now()-5m group by time(2s) "
        result = self.influx_client.query(query)
        points = result[0]["points"]
        return (points[0][1] / 1000000000 / self.app["cpus"]) * 100

    def get_container_name(self, mesos_task_id):
        """Return container name mapping with mesos_task_id in messos
		
		@param string mesos_task_id
		"""
        query = "select container_name from " + self.config["INFLUXDB"][
            "ts_mapping"] + " where time>now() - 5m and mesos_task_id = '" + mesos_task_id + "' limit 1"
        result = self.influx_client.query(query)
        points = result[0]["points"]
        return points[0][2]

    def get_containers_name(self):
        """Return list all containers name of application have name app_name
		
		@param string app_name name of application
		@return list all containers name of app_name
		"""
        tasks = self.marathon_client.list_tasks(self.app["name"])
        containers_name = []
        for task in tasks:
            containers_name.append(self.get_container_name(task.id))
        return containers_name

    def avg_mem_usage(self, containers_name):
        """Return avg memmory usage of all containers in list containers_name
		
		@param list containers_name list containers name
		@return float avg mem usage
		"""
        number_container = len(containers_name)
        containers_name = ["'" + x + "'" for x in containers_name]
        containers_name = ",".join(containers_name)
        query = "select memory_usage,container_name from stats where  time > now()-5m and  container_name in (" + containers_name + ")  limit " + str(
            number_container * 2)
        result = self.influx_client.query(query)
        points = result[0]["points"]
        sum_memory_usage = 0
        for point in points:
            if (point[3] != None):
                sum_memory_usage += point[3] / (self.app["mem"] *
                                                1048576) * 100
        return sum_memory_usage / number_container

    def avg_cpu_usage(self, containers_name):
        """Return avg cpu usage of all containers in list containers_name
		
		@param list containers_name list containers name
		@return float avg cpu usage
		"""
        number_container = len(containers_name)
        containers_name = ["'" + x + "'" for x in containers_name]
        containers_name = ",".join(containers_name)
        query = "select DERIVATIVE(cpu_cumulative_usage)  as cpu_usage,container_name from stats where  time > now()-5m and  container_name in (" + containers_name + ") group by time(10s),container_name limit " + str(
            number_container)
        result = self.influx_client.query(query)
        points = result[0]["points"]
        sum_cpu_usage = 0
        for point in points:
            sum_cpu_usage += point[1] / 1000000000 / self.app["cpus"] * 100
        return sum_cpu_usage / number_container

    def scale(self, delta):
        """sacle app_name (add or remove) delta intances
		
		@param string app_name name of application
		@param int delta number intances add or remove
		"""
        new_instance = self.app["instance"] + delta
        if (new_instance > self.app['max_instances']):
            new_instance = self.app['max_instances']
        if (new_instance < self.app['min_instances']):
            new_instance = self.app['min_instances']
        if (new_instance != self.app["instance"]):
            self.marathon_client.scale_app(self.app["name"], new_instance)
            self.logger.debug("Scaling " + self.app["name"] + " to: " +
                              str(new_instance))
            self.logger.debug("Waiting for config file haproxy.cfg...")
            time.sleep(self.config["TIME"]['w_config_ha'])
            self.logger.debug("Config file haproxy.cfg...")
            os.system("sudo ./servicerouter.py --marathon http://" +
                      self.config["MARATHON"]["host"] + ":" +
                      self.config["MARATHON"]["port"] +
                      " --haproxy-config /etc/haproxy/haproxy.cfg")
            self.app["instance"] = self.marathon_client.get_app(
                self.app["name"]).instances
            self.logger.debug("Sleep " +
                              str(self.config["TIME"]['after_scale']) + "s...")
            time.sleep(self.config["TIME"]['after_scale'])

    def check_rule(self, policie, value):
        """Check rule and return number intances need scale
		
		@param models.Policie policies
		@param tuple value values of metric
		@return integer number intances need scale
		"""
        delta = {}
        delta["up"] = 0
        delta["down"] = 0
        # Check upper_threshold
        if (value[policie["metric_type"]] > policie["upper_threshold"]):
            delta['up'] = policie["instances_in"]
        # Check lower_threshold
        if (value[policie["metric_type"]] < policie["lower_threshold"]):
            delta['down'] = policie["instances_out"]

        return delta

    def autoscaling(self):
        while True:
            try:
                containers_name = self.get_containers_name()
                avg_cpu = self.avg_cpu_usage(containers_name)
                avg_mem = self.avg_mem_usage(containers_name)
                self.logger.info(
                    "Avg cpu usage, avg memmory usage, current instance: %f %f %d",
                    avg_cpu, avg_mem, self.app["instance"])
                rs_detal = {}
                rs_detal['up'] = 0
                rs_detal['down'] = 10
                for policie in self.app["policies"]:
                    delta = self.check_rule(policie, (avg_cpu, avg_mem))
                    if (rs_detal['up'] < delta['up']):
                        rs_detal['up'] = delta['up']
                    if (rs_detal['down'] > delta['down']):
                        rs_detal['down'] = delta['down']

                if (rs_detal['up'] > 0):
                    self.scale(rs_detal['up'])
                elif (rs_detal['down'] > 0):
                    self.scale(0 - rs_detal['down'])
            except Exception as e:
                self.logger.debug(str(e))
            finally:
                time.sleep(self.config["TIME"]['monitor'])
コード例 #19
0
        full_config = config_read.read()
        if 'anonReload' in full_config:
            logging.info('Configuration already supports anonymous REST reloads.')
        # Only shim in anonymous reload and restart GeoServer if it hasn't been done before
        else:
            config_read.seek(0)
            with open('%s/security/config.xml-output' % GEOSERVER_DATA_DIR, 'w') as config_write:
                line_value = config_read.readline()
                while len(line_value):
                    config_write.write('%s' % line_value)
                    if '<filterChain' in line_value:
                        config_write.write('%s' % filter_inject)
                    line_value = config_read.readline()

            shutil.move('%s/security/config.xml-output' % GEOSERVER_DATA_DIR,
                        '%s/security/config.xml' % GEOSERVER_DATA_DIR)

            response = MARATHON_CLIENT.kill_tasks(GEOSERVER_APP)

            if not len(response) == 1:
                logging.critical('Error restarting GeoServer')
                sys.exit(1)

wait_for_deployment(MARATHON_CLIENT, GEOSERVER_APP)

MARATHON_CLIENT.scale_app(GEOSERVER_APP, GEOSERVER_INSTANCES)

block_for_healthy_app(MARATHON_CLIENT, GEOSERVER_APP, GEOSERVER_INSTANCES)

logging.info('Bootstrap complete.')
コード例 #20
0
ファイル: mesos_marathon.py プロジェクト: Kazanz/deis
class MarathonHTTPClient(object):

    def __init__(self, target, auth, options, pkey):
        self.target = settings.MARATHON_HOST
        self.auth = auth
        self.options = options
        self.pkey = pkey
        self.registry = settings.REGISTRY_HOST + ':' + settings.REGISTRY_PORT
        self.client = MarathonClient('http://'+self.target+':8180')
        self.fleet = FleetHTTPClient('/var/run/fleet.sock', auth, options, pkey)

    # helpers
    def _app_id(self, name):
        return name.replace('_', '.')

    # container api
    def create(self, name, image, command='', **kwargs):
        """Create a container"""
        app_id = self._app_id(name)
        l = locals().copy()
        l.update(re.match(MATCH, name).groupdict())
        image = self.registry + '/' + image
        mems = kwargs.get('memory', {}).get(l['c_type'])
        m = 0
        if mems:
            mems = mems.lower()
            if mems[-2:-1].isalpha() and mems[-1].isalpha():
                mems = mems[:-1]
            m = int(mems[:-1])
        c = 0.5
        cpu = kwargs.get('cpu', {}).get(l['c_type'])
        if cpu:
            c = cpu
        cmd = "docker run --name {name} -P {image} {command}".format(**locals())
        self.client.create_app(app_id, MarathonApp(cmd=cmd, mem=m, cpus=c))
        self.client.scale_app(app_id, 0, force=True)
        for _ in xrange(POLL_ATTEMPTS):
            if self.client.get_app(self._app_id(name)).tasks_running == 0:
                return
            time.sleep(1)

    def start(self, name):
        """Start a container"""
        self.client.scale_app(self._app_id(name), 1, force=True)
        for _ in xrange(POLL_ATTEMPTS):
            if self.client.get_app(self._app_id(name)).tasks_running == 1:
                break
            time.sleep(1)
        host = self.client.get_app(self._app_id(name)).tasks[0].host
        self._waitforcontainer(host, name)

    def stop(self, name):
        """Stop a container"""
        raise NotImplementedError

    def destroy(self, name):
        """Destroy a container"""
        try:
            host = self.client.get_app(self._app_id(name)).tasks[0].host
            self.client.delete_app(self._app_id(name), force=True)
            self._delete_container(host, name)
        except:
            self.client.delete_app(self._app_id(name), force=True)

    def _get_container_state(self, host, name):
        docker_cli = Client("tcp://{}:2375".format(host), timeout=1200, version='1.17')
        try:
            if docker_cli.inspect_container(name)['State']['Running']:
                return JobState.up
        except:
            return JobState.destroyed

    def _waitforcontainer(self, host, name):
        for _ in xrange(POLL_WAIT):
            if self._get_container_state(host, name) == JobState.up:
                return
            time.sleep(1)
        raise RuntimeError("App container Not Started")

    def _delete_container(self, host, name):
        docker_cli = Client("tcp://{}:2375".format(host), timeout=1200, version='1.17')
        if docker_cli.inspect_container(name)['State']:
            docker_cli.remove_container(name, force=True)

    def run(self, name, image, entrypoint, command):  # noqa
        """Run a one-off command"""
        return self.fleet.run(name, image, entrypoint, command)

    def state(self, name):
        try:
            for _ in xrange(POLL_ATTEMPTS):
                if self.client.get_app(self._app_id(name)).tasks_running == 1:
                    return JobState.up
                elif self.client.get_app(self._app_id(name)).tasks_running == 0:
                    return JobState.created
                time.sleep(1)
        except:
            return JobState.destroyed

    def attach(self, name):
        """
        Attach to a job's stdin, stdout and stderr
        """
        raise NotImplementedError
コード例 #21
0
class Scaler:
	"""Class for Scaling"""
	def __init__(self, app_name, config):
		self.logger = logging.getLogger("autoscaling")
		self.logger.setLevel(logging.DEBUG)

		self.logger.debug("Init object scaler...")
		self.config = config

		self.logger.debug("Connect RESTful mariadb and get policies...")
		conn = http.client.HTTPConnection(config["MARIA_RESTFUL"]['host'], config["MARIA_RESTFUL"]['port'])
		conn.request("GET", "/app/name/"+app_name)
		json_app = conn.getresponse().read().decode("utf-8")
		self.app = json.loads(json_app)
		conn.request("GET", "/app/name/"+app_name+"/policies")
		json_policies = conn.getresponse().read().decode("utf-8")
		self.app["policies"] = json.loads(json_policies)

		self.logger.debug("Connect influxdb and marathon...")
		self.influx_client = InfluxDBClient(config["INFLUXDB"]["host"], config["INFLUXDB"]["port"], config["INFLUXDB"]["username"], config["INFLUXDB"]["password"], config["INFLUXDB"]["db_name"])
		self.marathon_client = MarathonClient('http://'+config["MARATHON"]['host']+':'+config["MARATHON"]['port'])
		
		self.app["instance"] = self.marathon_client.get_app(app_name).instances
		self.app["mem"] = self.marathon_client.get_app(app_name).mem
		self.app["cpus"] = self.marathon_client.get_app(app_name).cpus

		self.logger.debug("Reconfig haproxy.cfg...")
		os.system("sudo ./servicerouter.py --marathon http://"+config["MARATHON"]["host"]+":"+config["MARATHON"]["port"]+" --haproxy-config /etc/haproxy/haproxy.cfg")

	def setup_logging(self, log_file = "autoscaling.log", level = logging.INFO, formatter = None):
		if(formatter == None):
			formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
		fh = logging.FileHandler(log_file)
		fh.setLevel(level)
		fh.setFormatter(formatter)
		self.logger.addHandler(fh)


	def get_cpu_usage(self, container_name):
		"""Return cpu usage of container_name

		@param string container_name container name  
		"""
		query = "select DERIVATIVE(cpu_cumulative_usage)  as cpu_usage from stats where container_name = '"+container_name+"' and time > now()-5m group by time(2s) "
		result = self.influx_client.query(query)
		points = result[0]["points"]
		return (points[0][1]/1000000000/self.app["cpus"])*100

	def get_container_name(self, mesos_task_id):
		"""Return container name mapping with mesos_task_id in messos
		
		@param string mesos_task_id
		"""
		query = "select container_name from "+self.config["INFLUXDB"]["ts_mapping"]+" where time>now() - 5m and mesos_task_id = '" +mesos_task_id+"' limit 1" 
		result = self.influx_client.query(query)
		points = result[0]["points"]
		return points[0][2]

	def get_containers_name(self):
		"""Return list all containers name of application have name app_name
		
		@param string app_name name of application
		@return list all containers name of app_name
		"""
		tasks = self.marathon_client.list_tasks(self.app["name"])
		containers_name = []
		for task in tasks:
			containers_name.append(self.get_container_name(task.id))
		return containers_name

	def avg_mem_usage(self, containers_name):
		"""Return avg memmory usage of all containers in list containers_name
		
		@param list containers_name list containers name
		@return float avg mem usage
		"""
		number_container = len(containers_name)
		containers_name = ["'"+x+"'" for x in containers_name]
		containers_name = ",".join(containers_name)
		query = "select memory_usage,container_name from stats where  time > now()-5m and  container_name in ("+containers_name+")  limit "+str(number_container*2)
		result = self.influx_client.query(query)
		points = result[0]["points"]
		sum_memory_usage = 0
		for point in points:
			if(point[3] != None):
				sum_memory_usage += point[3]/(self.app["mem"]*1048576)*100
		return sum_memory_usage / number_container

	def avg_cpu_usage(self, containers_name):
		"""Return avg cpu usage of all containers in list containers_name
		
		@param list containers_name list containers name
		@return float avg cpu usage
		"""
		number_container = len(containers_name)
		containers_name = ["'"+x+"'" for x in containers_name]
		containers_name = ",".join(containers_name)
		query = "select DERIVATIVE(cpu_cumulative_usage)  as cpu_usage,container_name from stats where  time > now()-5m and  container_name in ("+containers_name+") group by time(10s),container_name limit "+str(number_container)
		result = self.influx_client.query(query)
		points = result[0]["points"]
		sum_cpu_usage = 0
		for point in points:
			sum_cpu_usage += point[1]/1000000000/self.app["cpus"]*100
		return sum_cpu_usage / number_container

	def scale(self, delta):
		"""sacle app_name (add or remove) delta intances
		
		@param string app_name name of application
		@param int delta number intances add or remove
		"""
		new_instance = self.app["instance"] + delta
		if(new_instance > self.app['max_instances']):
			new_instance = self.app['max_instances']
		if(new_instance < self.app['min_instances']):
			new_instance = self.app['min_instances']
		if(new_instance != self.app["instance"]):
			self.marathon_client.scale_app(self.app["name"], new_instance)
			self.logger.debug("Scaling "+self.app["name"]+" to: "+str(new_instance))
			self.logger.debug("Waiting for config file haproxy.cfg...")
			time.sleep(self.config["TIME"]['w_config_ha'])
			self.logger.debug("Config file haproxy.cfg...")
			os.system("sudo ./servicerouter.py --marathon http://"+self.config["MARATHON"]["host"]+":"+self.config["MARATHON"]["port"]+" --haproxy-config /etc/haproxy/haproxy.cfg")
			self.app["instance"] =self.marathon_client.get_app(self.app["name"]).instances
			self.logger.debug("Sleep "+str(self.config["TIME"]['after_scale'])+"s...")
			time.sleep(self.config["TIME"]['after_scale'])

	def check_rule(self, policie, value):
		"""Check rule and return number intances need scale
		
		@param models.Policie policies
		@param tuple value values of metric
		@return integer number intances need scale
		"""
		delta = {}
		delta["up"] = 0
		delta["down"] = 0
		# Check upper_threshold
		if(value[policie["metric_type"]] > policie["upper_threshold"]):
			delta['up'] = policie["instances_in"]
		# Check lower_threshold
		if(value[policie["metric_type"]] < policie["lower_threshold"]):
			delta['down'] = policie["instances_out"]
		
		return delta


	def autoscaling(self):
		while True:
			try:
				containers_name = self.get_containers_name()
				avg_cpu = self.avg_cpu_usage(containers_name)
				avg_mem = self.avg_mem_usage(containers_name)
				self.logger.info("Avg cpu usage, avg memmory usage, current instance: %f %f %d", avg_cpu, avg_mem, self.app["instance"])
				rs_detal = {}
				rs_detal['up'] = 0
				rs_detal['down'] = 10
				for policie in self.app["policies"]:
					delta = self.check_rule(policie, (avg_cpu, avg_mem))
					if(rs_detal['up'] < delta['up']):
						rs_detal['up'] = delta['up']
					if(rs_detal['down'] > delta['down']):
						rs_detal['down'] = delta['down']

				if(rs_detal['up'] > 0):
					self.scale(rs_detal['up'])
				elif(rs_detal['down'] > 0):
					self.scale(0-rs_detal['down'])
			except Exception as e:
				self.logger.debug(str(e))
			finally:
				time.sleep(self.config["TIME"]['monitor'])
コード例 #22
0
def scale_application(client: MarathonClient, appid: str, instances: int):
    deployment = client.scale_app(appid, instances, force=True)
    wait_for_deployment(client, deployment)
コード例 #23
0
while True:
    print("Loop!")
    app = None
    while app is None:
        try:
            app = c.get_app(marathon_app)
        except MarathonError as err:
            print(err)
            app = None
            time.sleep(1)

    waitingDocs = r.llen("celery")

    instances = app.instances

    if waitingDocs == instances:
        pass
    elif waitingDocs > instances:
        instances += 1
    else:
        instances -= 1

    instances = min(max_instances, max(min_instances, instances))
    print("App instances: ", app.instances, " - New value: ", instances)
    if app.instances != instances:
        print("Delta: ", (app.instances - instances))
        c.scale_app(marathon_app, instances=instances, force=True)
    sys.stdout.flush()
    time.sleep(2)