Example #1
0
    def __init__(self, topology: Topology, cluster_context: ClusterContext = None, initial_time=0,
                 scheduler_params=None):
        super().__init__(initial_time)

        self.request_generator = object
        self.request_queue = simpy.Store(self)
        self.scheduler_queue = simpy.Store(self)
        self.topology: Topology = topology
        topology.create_index()

        # allows us to inject a pre-calculated bandwidth graph that was cached
        if cluster_context is None:
            self.cluster: ClusterContext = topology.create_cluster_context()
        else:
            self.cluster: ClusterContext = cluster_context

        if scheduler_params:
            self.scheduler = Scheduler(self.cluster, **scheduler_params)
        else:
            self.scheduler = Scheduler(self.cluster)

        self.faas_gateway = FaasGateway(self)
        self.execution_simulator = ExecutionSimulator(self)

        self.clock = SimulatedClock(self, start=datetime(1970, 1, 1, 0, 0, 0))
        self.metrics = Metrics(self, RuntimeLogger(self.clock))

        self.execution_time_oracle = oracles.FittedExecutionTimeOracle()
        self.startup_time_oracle = oracles.HackedFittedStartupTimeOracle()
Example #2
0
def run_scheduler_worker(env: simpy.Environment, queue: simpy.Store, context: ClusterContext, scheduler: Scheduler,
                         oracles: List[Oracle], log: List[LoggingRow]):
    while True:
        logging.debug('Scheduler waiting for pod...')
        pod = yield queue.get()

        # TODO fix time not changing (env.now)
        logging.debug('Pod received by scheduler at %.2f', env.now)
        log.append(LoggingRow(env.now, EventType.POD_RECEIVED, pod.name))

        # execute scheduling algorithm
        then = time.time()
        result = scheduler.schedule(pod)
        duration = time.time() - then

        yield env.timeout(duration)

        logging.debug('Pod scheduling took %.2f ms, and yielded %s', duration * 1000, result)

        # weight the startup
        metadata = dict([o.estimate(context, pod, result) for o in oracles])

        # also add the image name and the selected node to the metadata
        metadata['image'] = pod.spec.containers[0].image
        metadata['suggested_host'] = None if result.suggested_host is None else result.suggested_host.name

        log.append(LoggingRow(env.now, EventType.POD_SCHEDULED, pod.name, metadata))
Example #3
0
def configure_env(settings: EnvSettings, topology: Topology):
    env = Environment()
    env.simulator_factory = settings.simulator_factory
    if settings.null_logger:
        env.metrics = OurMetrics(env, log=NullLogger())
    else:
        env.metrics = OurMetrics(env, log=RuntimeLogger(SimulatedClock(env)))
    env.topology = topology
    env.faas = OurFaas(env, settings.scale_by_requests,
                       settings.scale_by_requests_per_replica,
                       settings.scale_by_queue_requests_per_replica)
    env.container_registry = ContainerRegistry()
    env.storage_index = settings.storage_index
    env.cluster = SimulationClusterContext(env)
    env.scheduler = Scheduler(env.cluster, **settings.sched_params)
    env.metrics_server = MetricsServer()

    # TODO inject resource oracle
    resource_monitor = ResourceMonitor(
        env, ResourceOracle(resources_per_node_image))
    env.background_processes.append(lambda env: resource_monitor.run())

    if settings.scale_by_resources:
        hpa_settings = settings.hpaSettings
        hpa = HorizontalPodAutoscaler(
            env,
            average_window=hpa_settings.average_window,
            reconcile_interval=hpa_settings.reconcile_interval,
            target_tolerance=hpa_settings.target_tolerance)
        env.background_processes.append(lambda env: hpa.run())

    if settings.label_problem_solver_settings is not None:
        solver = LabelSolverProcess(settings.label_problem_solver_settings)
        env.background_processes.append(lambda env: solver.solve(env))
    return env
Example #4
0
def run_experiment(num_nodes, sched_params):
    synth = CloudRegionsSynthesizer(regions=10,
                                    vms_per_region=int(num_nodes / 10))
    t = synth.create_topology()
    t._bandwidth_graph = RandomBwGraph()
    ctx = t.create_cluster_context()
    scheduler = Scheduler(ctx, **sched_params)
    run_loop(ctx, pod_gen(ctx), scheduler)
Example #5
0
 def create_scheduler(self, env):
     return Scheduler(env.cluster)
Example #6
0
def main():
    # Parse the arguments
    parser = argparse.ArgumentParser(description='Skippy - Navigating functions to the edge of the world (i.e. K8s)')
    parser.add_argument('-s', '--scheduler-name', action='store', dest='scheduler_name',
                        help='Change the name of the scheduler. New pods which should be placed by this scheduler need '
                             'to define this name. Set \'None\' to disable the name check (if this scheduler is '
                             'completely replacing the kube-scheduler).', default='skippy-scheduler')
    parser.add_argument('-n', '--namespace', action='store', dest='namespace',
                        help='Only watch pods of a specific namespace.')
    parser.add_argument('-w', '--weights', action='store', dest='weights',
                        help='An array of floats defining the weights of the different priority functions',
                        default=None)
    parser.add_argument('-u', '--use-default', action='store_true', dest='default',
                        help='Use the predicate and priority functions of the default scheduler.', default=False)
    parser.add_argument('-c', '--kube-config', action='store_true', dest='kube_config',
                        help='Load kube-config from home dir instead of in-cluster-config from envs.', default=False)
    parser.add_argument('-d', '--debug', action='store_true', dest='debug',
                        help='Enable debug logs.', default=False)
    args = parser.parse_args()
    level = logging.DEBUG if args.debug else logging.INFO
    scheduler_name = None if args.scheduler_name == 'None' else args.scheduler_name
    namespace = None if args.namespace == 'None' else args.namespace
    weights = None if args.weights is None else [float(x) for x in ast.literal_eval(args.weights)]

    # Set the log level
    logging.getLogger().setLevel(level)

    # Load the kubernetes API config
    if args.kube_config:
        # Load the configuration from ~/.kube
        logging.debug('Loading kube config...')
        config.load_kube_config()
    else:
        # Load the configuration when running inside the cluster (by reading envs set by k8s)
        logging.debug('Loading in-cluster config...')
        config.load_incluster_config()

    if weights:
        logging.info('Using custom weights: %s', weights)

    # Initialize the API, context and scheduler
    cluster_context = KubeClusterContext()
    api = client.CoreV1Api()
    if args.default:
        logging.debug('Using default scheduler priority functions')
        priorities = [(1.0, BalancedResourcePriority()),
                      (1.0, ImageLocalityPriority())] if weights is None \
            else [(weights[0], BalancedResourcePriority()),
                  (weights[1], ImageLocalityPriority())]
    else:
        priorities = None if weights is None \
            else [(weights[0], BalancedResourcePriority()),
                  (weights[1], LatencyAwareImageLocalityPriority()),
                  (weights[2], LocalityTypePriority()),
                  (weights[3], DataLocalityPriority()),
                  (weights[4], CapabilityPriority())]
    scheduler = Scheduler(cluster_context, priorities=priorities)

    # Either watch all namespaces or only the one set as argument
    w = watch.Watch()
    if namespace is not None:
        logging.debug('Watching for new pod events in namespace %s...', args.namespace)
        stream = w.stream(api.list_namespaced_pod, args.namespace)
    else:
        logging.debug('Watching for new pod events across all namespaces...')
        stream = w.stream(api.list_pod_for_all_namespaces)

    if scheduler_name:
        logging.debug('Watching for new pods with defined scheduler-name \'%s\'...', scheduler_name)
    else:
        logging.debug('Watching for pods without caring about defined scheduler-name...')

    # Start the liveness probe (used by kubernetes to restart the service if it's not responding anymore)
    logging.debug('Starting liveness / readiness probe...')
    LivenessProbe.start()

    # Main event loop watching for new pods
    logging.info('Everything is in place for new pods to be scheduled. Waiting for new events...')
    try:
        for event in stream:
            # noinspection PyBroadException
            try:
                if event['object'].status.phase == 'Pending' and \
                        (scheduler_name is None or event['object'].spec.scheduler_name == scheduler_name) and \
                        event['type'] == 'ADDED':
                    pod = create_pod(event['object'])
                    logging.debug('There\'s a new pod to schedule: ' + pod.name)
                    result = scheduler.schedule(pod)
                    logging.debug('Pod yielded %s', result)
            except ApiException as e:
                # Parse the JSON message body of the exception
                logging.exception('ApiExceptionMessage: %s', json.loads(e.body)['message'])
            except Exception:
                # We really don't want the scheduler to die, therefore we catch Exception here
                logging.exception('Exception in outer event loop caught. '
                                  'It will be ignored to make sure the scheduler continues to run.')
    except KeyboardInterrupt:
        logging.info('Shutting down after receiving a keyboard interrupt.')
Example #7
0
# Initialize topology
storage_index = StorageIndex()
topology = urban_sensing_topology(ether_nodes, storage_index)

# Initialize environment
env = Environment()

env.simulator_factory = AIPythonHTTPSimulatorFactory(
    get_raith21_function_characterizations(resource_oracle, fet_oracle))
env.metrics = Metrics(env, log=RuntimeLogger(SimulatedClock(env)))
env.topology = topology
env.faas = DefaultFaasSystem(env, scale_by_requests=True)
env.container_registry = ContainerRegistry()
env.storage_index = storage_index
env.cluster = SimulationClusterContext(env)
env.scheduler = Scheduler(env.cluster, **sched_params)

sim = Simulation(env.topology, benchmark, env=env)
result = sim.run()

dfs = {
    "invocations_df":
    sim.env.metrics.extract_dataframe('invocations'),
    "scale_df":
    sim.env.metrics.extract_dataframe('scale'),
    "schedule_df":
    sim.env.metrics.extract_dataframe('schedule'),
    "replica_deployment_df":
    sim.env.metrics.extract_dataframe('replica_deployment'),
    "function_deployments_df":
    sim.env.metrics.extract_dataframe('function_deployments'),