def create_pod_config(self, sleep_time, dynamic_factor, host_limit_1=False):
        container_spec = pod.ContainerSpec(
            resource=pod.ResourceSpec(
                cpu_limit=0.1,
                mem_limit_mb=32,
                disk_limit_mb=32,
            ),
            command=mesos.CommandInfo(
                shell=True,
                value="echo %s && sleep %s"
                % (str(dynamic_factor), str(sleep_time)),
            ),
        )

        instance_label = v1alpha_peloton.Label(
            key="peloton/instance", value="instance-label"
        )
        host_limit_1_constraint = None
        if host_limit_1:
            host_limit_1_constraint = pod.Constraint(
                type=1,  # Label constraint
                label_constraint=pod.LabelConstraint(
                    kind=1,  # Label
                    condition=2,  # Equal
                    requirement=0,
                    label=instance_label,
                ),
            )

        containers = [container_spec]
        return pod.PodSpec(containers=containers,
                           labels=[instance_label],
                           constraint=host_limit_1_constraint)
Example #2
0
def test__update_with_host_maintenance_and_agent_down(stateless_job,
                                                      maintenance):
    """
    1. Create a large stateless job (that take up more than two-thirds of
       the cluster resources) with MaximumUnavailableInstances=2.
    2. Start host maintenance on one of the hosts (say A) having pods of the job.
       MaximumUnavailableInstances=2 ensures that not more than 2 pods are
       unavailable due to host maintenance at a time.
    3. Take down another host which has pods running on it. This will TASK_LOST
       to be sent for all pods on the host after 75 seconds.
    4. Start an update to modify the instance spec of one of the pods.
    5. Since TASK_LOST would cause the job SLA to be violated, instances on the
       host A should not be killed once LOST event is received. Verify that
       host A does not transition to DOWN.
    """
    stateless_job.job_spec.instance_count = 30
    stateless_job.job_spec.default_spec.containers[0].resource.cpu_limit = 0.3
    stateless_job.job_spec.sla.maximum_unavailable_instances = 2
    stateless_job.create()
    stateless_job.wait_for_all_pods_running()

    hosts = [h.hostname for h in query_hosts([]).host_infos]
    host_to_task_count = get_host_to_task_count(hosts, stateless_job)
    sorted_hosts = [
        t[0] for t in sorted(host_to_task_count.items(),
                             key=operator.itemgetter(1),
                             reverse=True)
    ]

    # Pick a host that has pods running on it to start maintenance on it.
    test_host = sorted_hosts[0]
    # pick another host which has pods of the job to take down
    host_container = get_container([sorted_hosts[1]])

    try:
        host_container.stop()
        maintenance["start"]([test_host])

        stateless_job.job_spec.instance_spec[10].containers.extend([
            pod_pb2.ContainerSpec(resource=pod_pb2.ResourceSpec(
                disk_limit_mb=20))
        ])
        update = StatelessUpdate(stateless_job,
                                 updated_job_spec=stateless_job.job_spec,
                                 batch_size=0)
        update.create()
        update.wait_for_state(goal_state="SUCCEEDED")

        stateless_job.stop()

        wait_for_host_state(test_host, host_pb2.HOST_STATE_DOWN)
        assert False, 'Host should not transition to DOWN'
    except:
        assert is_host_in_state(test_host, host_pb2.HOST_STATE_DRAINING)
        pass
    finally:
        host_container.start()
 def create_pod_config(self, sleep_time, dynamic_factor):
     container_spec = pod.ContainerSpec(
         resource=pod.ResourceSpec(
             cpu_limit=0.1, mem_limit_mb=32, disk_limit_mb=32
         ),
         command=mesos.CommandInfo(
             shell=True,
             value="echo %s && sleep %s"
             % (str(dynamic_factor), str(sleep_time)),
         ),
     )
     containers = [container_spec]
     return pod.PodSpec(containers=containers)
 def new_resource_constraint(self, cpu):
     return v1hostmgr.ResourceConstraint(
         minimum=pod.ResourceSpec(cpu_limit=cpu), )