コード例 #1
0
ファイル: cluster.py プロジェクト: Vlad777/memsql-mesos
def cluster_create(root, params):
    existing_cluster = root.clusters.find(display_name=params.display_name)
    if existing_cluster is not None:
        raise G.Invalid("Cluster name must be unique", "unique name", params.display_name, ["display_name"])

    try:
        params["status"] = ClusterStatus.CREATING
        params["created"] = unix_timestamp()
        cluster = root.clusters.create(params)
        return cluster.serialize()
    except RecordValidationError as e:
        raise ApiException(str(e))
コード例 #2
0
    def work(self):
        for cluster in self.root.clusters:
            timed_out = False
            if not cluster.data.successfully_started:
                timed_out = cluster.data.created + CLUSTER_TIMEOUT < unix_timestamp()

            if timed_out:
                logger.warning(
                    "Cluster %s has taken more than %d seconds to start up, "
                    "so we are rolling it back"
                    % (cluster.name, CLUSTER_TIMEOUT))

            if timed_out or cluster.data.status == const.ClusterStatus.DELETING:
                self._delete_cluster(cluster)
            elif cluster.data.status == const.ClusterStatus.WAITING_FOR_AGENTS:
                self._deploy_memsql(cluster)
            elif cluster.data.status == const.ClusterStatus.WAITING_FOR_MEMSQL:
                self._check_memsql(cluster)
コード例 #3
0
def run(scheduler_pid):
    log.setup()

    logger.info("Running resiliency test for MemSQL Mesos framework")

    num_initial_open_fds = len(os.listdir("/proc/%d/fd" % scheduler_pid))
    num_initial_open_sockets_out = run_shell_command(
        [ "netstat", "-nap", "tcp" ])
    num_initial_open_sockets = len(num_initial_open_sockets_out.split("\n"))

    created_clusters = []
    next_cluster_id = 1
    last_created_cluster_time = 0
    start_time = unix_timestamp()
    try:
        now = unix_timestamp()
        while now < start_time + TEST_RUN_TIME:
            now = unix_timestamp()
            time.sleep(5)
            # Create a new cluster every five minutes.
            if len(created_clusters) < MAX_CLUSTERS and last_created_cluster_time + CLUSTER_CREATE_INTERVAL < now:
                last_created_cluster_time = now
                logger.info("Creating new cluster")
                cluster_data = {
                    "display_name": "resiliency-test-cluster-%d" % next_cluster_id,
                    "num_leaves": 1,
                    "num_aggs": 1,
                    "flavor": "small",
                    "install_demo": True,
                    "high_availability": False
                }
                try:
                    new_cluster = call_scheduler_api("cluster/create", cluster_data)
                except Exception as e:
                    logger.warning("Exception when calling cluster/create: %s" % str(e))
                    continue

                next_cluster_id += 1
                created_clusters.append(new_cluster)

            if len(created_clusters) == MAX_CLUSTERS:
                cluster = created_clusters.pop(0)
                delete_cluster(cluster)
    except KeyboardInterrupt:
        pass

    for cluster in created_clusters:
        delete_cluster(cluster)

    print("Sleeping for 120 seconds to let TCP sockets close themselves")
    try:
        time.sleep(120)
    except KeyboardInterrupt:
        pass

    num_final_open_fds = len(os.listdir("/proc/%d/fd" % scheduler_pid))
    num_final_open_sockets_out = run_shell_command([ "netstat", "-nap", "tcp" ])
    num_final_open_sockets = len(num_final_open_sockets_out.split("\n"))

    now = unix_timestamp()
    print("MemSQL Mesos framework resiliency test ran for %s seconds" % (now - start_time))
    print("We initially had:")
    print("%d open file descriptors" % num_initial_open_fds)
    print("%d open TCP sockets" % num_initial_open_sockets)
    print("We finished with:")
    print("%d open file descriptors" % num_final_open_fds)
    print("%d open TCP sockets" % num_final_open_sockets)