def start_self_optimizer_strategy(wf): """ executes a self optimizing strategy """ info("> ExecStrategy | SelfOptimizer", Fore.CYAN) optimizer_method = wf.execution_strategy["optimizer_method"] wf.totalExperiments = wf.execution_strategy["optimizer_iterations"] optimizer_random_starts = wf.execution_strategy["optimizer_random_starts"] info("> Optimizer | " + optimizer_method, Fore.CYAN) # we look at the ranges the user has specified in the knobs knobs = wf.execution_strategy["knobs"] # we create a list of variable names and a list of knob (from,to) variables = [] range_tuples = [] # we fill the arrays and use the index to map from gauss-optimizer-value to variable for key in knobs: variables += [key] range_tuples += [(knobs[key][0], knobs[key][1])] # we give the minimization function a callback to execute # it uses the return value (it tries to minimize it) to select new knobs to test optimizer_result = gp_minimize( lambda opti_values: self_optimizer_execution(wf, opti_values, variables ), range_tuples, n_calls=wf.totalExperiments, n_random_starts=optimizer_random_starts) # optimizer is done, print results info(">") info("> OptimalResult | Knobs: " + str( recreate_knob_from_optimizer_values(variables, optimizer_result.x))) info("> | Result: " + str(optimizer_result.fun))
def start_discrete_optimizer_strategy(wf): """ executes a discrete value optimizing strategy """ info("> ExecStrategy | DiscreteOptimizer", Fore.CYAN) optimizer_method = wf.execution_strategy["optimizer_method"] wf.totalExperiments = wf.execution_strategy["optimizer_iterations"] optimizer_random_starts = wf.execution_strategy["optimizer_random_starts"] info("> Optimizer | " + optimizer_method, Fore.CYAN) retrieve_true_knobs(wf) knobs = wf.execution_strategy["knobs"] list_categories = list(knobs[0].values())[0] print(type(list_categories)) # we give the minimization function a callback to execute # it uses the return value (it tries to minimize it) to select new knobs to test optimizer_result = gp_minimize(lambda opti_values: self_optimizer_execution(wf, opti_values), dimensions=list_categories, n_calls=wf.totalExperiments, n_random_starts=optimizer_random_starts) # optimizer is done, print results info(">") info("> OptimalResult | Knobs: " + str(recreate_knob_from_optimizer_values(variables, optimizer_result.x))) info("> | Result: " + str(optimizer_result.fun))
def __init__(self, wf, cp): # load config try: info("> EWSChangePro | ", Fore.CYAN) except KeyError: error("EWSChangePro was incomplete") exit(1)
def run_execution_strategy(wf): """ we run the correct execution strategy """ applyInitKnobs(wf) try: # start the right execution strategy if wf.execution_strategy["type"] == "sequential": log_results(wf.folder, wf.execution_strategy["knobs"][0].keys() + ["result"], append=False) start_sequential_strategy(wf) elif wf.execution_strategy["type"] == "self_optimizer": log_results(wf.folder, wf.execution_strategy["knobs"].keys() + ["result"], append=False) start_self_optimizer_strategy(wf) elif wf.execution_strategy["type"] == "uncorrelated_self_optimizer": log_results(wf.folder, wf.execution_strategy["knobs"].keys() + ["result"], append=False) start_uncorrelated_self_optimizer_strategy(wf) elif wf.execution_strategy["type"] == "step_explorer": log_results(wf.folder, wf.execution_strategy["knobs"].keys() + ["result"], append=False) start_step_strategy(wf) elif wf.execution_strategy["type"] == "forever": start_forever_strategy(wf) except RuntimeError: error("Stopped the whole workflow as requested by a RuntimeError") # finished info(">") applyDefaultKnobs(wf)
def start_step_strategy(wf): """ implements the step strategy, a way to explore a hole feature area """ info("> ExecStrategy | Step", Fore.CYAN) # we look at the ranges and the steps the user has specified in the knobs knobs = wf.execution_strategy["knobs"] # we create a list of variable names and a list of lists of values: # [[par1_val1, par1_val2, par1_val3], [par2_val1, par2_val2, par2_val3], [...],...] variables = [] parameters_values = [] # we create a list of parameters to look at for key in knobs: variables += [key] lower = knobs[key][0][0] upper = knobs[key][0][1] step = knobs[key][1] value = lower parameter_values = [] while value <= upper: # create a new list for each item parameter_values += [[value]] value += step parameters_values += [parameter_values] list_of_configurations = reduce( lambda list1, list2: [x + y for x in list1 for y in list2], parameters_values) # we run the list of experiments wf.totalExperiments = len(list_of_configurations) info("> Steps Created | Count: " + str(wf.totalExperiments), Fore.CYAN) for configuration in list_of_configurations: step_execution(wf, configuration, variables)
def evolutionary_execution(wf, opti_values, variables): global crowdnav_instance_number """ this is the function we call and that returns a value for optimization """ knob_object = recreate_knob_from_optimizer_values(variables, opti_values) # create a new experiment to run in execution exp = dict() # TODO where do we start multiple threads to call the experimentFunction concurrently, once for each experiment and crowdnav instance? # TODO should we create new/fresh CrowdNav instances for each iteration/generation? Otherwise, we use the same instance to evaluate across interations/generations to evaluate individiuals. if wf.execution_strategy["parallel_execution_of_individuals"]: wf.primary_data_provider[ "instance"].topic = original_primary_data_provider_topic + "-" + str( crowdnav_instance_number) wf.change_provider[ "instance"].topic = original_change_provider_topic + "-" + str( crowdnav_instance_number) info("Listering on " + wf.primary_data_provider["instance"].topic) info("Posting changes to " + wf.change_provider["instance"].topic) crowdnav_instance_number = crowdnav_instance_number + 1 if crowdnav_instance_number == wf.execution_strategy[ "population_size"]: crowdnav_instance_number = 0 exp["ignore_first_n_results"] = wf.execution_strategy[ "ignore_first_n_results"] exp["sample_size"] = wf.execution_strategy["sample_size"] exp["knobs"] = knob_object # the experiment function returns what the evaluator in definition.py is computing return experimentFunction(wf, exp)
def __init__(self, wf, p): try: self.submit_mode = p["submit_mode"] self.job_file = p["job_file"] self.job_class = p["job_class"] info( "> PreProcessor | Spark | Mode: " + str(self.submit_mode) + " | Args: " + str(self.job_class), Fore.CYAN) except KeyError as e: error("configuration.spark was incomplete: " + str(e)) exit(1) spark_home = os.environ.get("SPARK_HOME") spark_bin = "/bin/spark-submit" # now we start the spark to run the job in # http://stackoverflow.com/questions/13243807/popen-waiting-for-child-process-even-when-the-immediate-child-has-terminated/13256908#13256908 # set system/version dependent "start_new_session" analogs kwargs = {} if platform.system() == 'Windows': CREATE_NEW_PROCESS_GROUP = 0x00000200 # note: could get it from subprocess DETACHED_PROCESS = 0x00000008 # 0x8 | 0x200 == 0x208 kwargs.update(creationflags=DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP) elif sys.version_info < (3, 2): # assume posix kwargs.update(preexec_fn=os.setsid) else: # Python 3.2+ and Unix kwargs.update(start_new_session=True) # starting a subprocess to allow termination of spark after we are done self.process = subprocess.Popen(spark_home + spark_bin + ' --class ' + self.job_class + \ ' ./' + wf.folder + '/' + self.job_file, stdout=subprocess.PIPE, shell=True, **kwargs) # register a shutdown callback on this thread atexit.register(self.shutdown) # wait for some time to get spark time to boot up time.sleep(10)
def __init__(self, wf, cp): # load config try: self.kafka_uri = cp["kafka_uri"] self.topic = cp["topic"] self.serializer = cp["serializer"] info("> KafkaProducer | " + self.serializer + " | URI: " + self.kafka_uri + " | Topic: " + self.topic, Fore.CYAN) except KeyError: error("configuration.kafka_producer was incomplete") exit(1) # look at the serializer if self.serializer == "JSON": self.serialize_function = lambda v: json.dumps(v).encode('utf-8') else: error("serializer not implemented") exit(1) # try to connect try: # stop annoying logging logging.getLogger("kafka.coordinator.consumer").setLevel("ERROR") logging.getLogger("kafka.conn").setLevel("ERROR") self.producer = KafkaProducer(bootstrap_servers=self.kafka_uri, value_serializer=self.serialize_function, request_timeout_ms=5000) except: error("connection to kafka failed") exit(1)
def __init__(self, wf, cp): self.callBackFunction = None # load the configuration try: self.queue = [] self.host = cp["host"] self.port = cp["port"] self.topic = cp["topic"] self.serializer = cp["serializer"] info( "> MQTTListener | " + self.serializer + " | URI: " + self.host + ":" + self.port + " | Topic: " + self.topic, Fore.CYAN) except KeyError as e: error("mqttListener definition was incomplete: " + str(e)) exit(1) # create serializer if self.serializer == "JSON": self.serialize_function = lambda m: json.loads(m.decode('utf-8')) else: error("serializer not implemented") exit(1) try: # create mqtt client and connect self.mqtt = mqtt.Client() self.mqtt.connect(self.host, port=self.port) # register callback self.mqtt.on_message = self.on_message # subscribe and start listing on second thread self.mqtt.subscribe(self.topic, 0) self.mqtt.loop_start() except RuntimeError as e: error("connection to mqtt failed: " + str(e)) exit(1)
def recreate_knob_from_optimizer_values(variables, opti_values): """ recreates knob values from a variable """ knob_object = {} # create the knobObject based on the position of the opti_values and variables in their array for idx, val in enumerate(variables): knob_object[val] = opti_values[idx] info(">> knob object " + str(knob_object)) return knob_object
def kill_pre_processors(wf): """ after the experiment, we stop all preprocessors """ try: for p in wf.pre_processors: p["instance"].shutdown() info("> Shutting down Spark preprocessor") except AttributeError: pass
def start_simple_am(wf): """ executes forever - changes must come from definition file """ info("> ExecStrategy | simple_am ", Fore.CYAN) wf.totalExperiments = -1 server_state = effector(wf, 'initial') while True: # server_state = effector(wf, '') response_time = 0 print("current state:\n") print(server_state) print("end") if (not server_state): print("No more connection") wf.close_socket() return try: dimmer = float(server_state.get('dimmer')) response_time = float(server_state.get('average_rt')) activeServers = float(server_state.get("active_servers")) servers = float(server_state.get("servers")) max_servers = float(server_state.get("max_servers")) total_util = server_state.get("total_util") #["dimmer", "servers", "active_servers", "basic_rt", "optional_rt", "basic_throughput", "opt_throughput"] is_server_boot = (servers > activeServers) print("Is server boot?: " + str(is_server_boot)) except: continue if (response_time > RT_THRESHOLD): if ((not is_server_boot) and servers < max_servers): server_state = effector(wf, "add_server") elif (dimmer > 0.0): new_dimmer = max(0.0, dimmer - DIMMER_STEP) server_state = effector(wf, "set_dimmer " + str(new_dimmer)) else: server_state = effector(wf, 'data') elif (response_time < RT_THRESHOLD): spare_util = activeServers - total_util if (spare_util > 1): if (dimmer < 1.0): new_dimmer = min(1.0, dimmer + DIMMER_STEP) server_state = effector(wf, "set_dimmer " + str(new_dimmer)) elif ((not is_server_boot) and servers > 1): server_state = effector(wf, "remove_server") else: server_state = effector(wf, 'data') else: server_state = effector(wf, 'data') else: server_state = effector(wf, 'data')
def __init__(self, wf, dp): self.callBackFunction = None # load config try: self.chosen_metric = dp["chosen_metric"] info(">EWSDataPro | Metric: " + self.chosen_metric, Fore.CYAN) except KeyError as e: error("HTTPDataPro definition was incomplete: " + str(e)) exit(1)
def __init__(self, wf, cp): self.timer = 0 # load config try: self.seconds = cp["seconds"] info("> Interval | Seconds: " + str(self.seconds), Fore.CYAN) except KeyError as e: error("IntervalDataProvider definition was incomplete: " + str(e)) exit(1)
def nsga2(variables, range_tuples, wf): optimizer_iterations = wf.execution_strategy["optimizer_iterations"] population_size = wf.execution_strategy["population_size"] crossover_probability = wf.execution_strategy["crossover_probability"] mutation_probability = wf.execution_strategy["mutation_probability"] info("> Parameters:\noptimizer_iterations: " + str(optimizer_iterations) + "\npopulation_size: " + str(population_size) + "\ncrossover_probability: " + str(crossover_probability) + "\nmutation_probability: " + str(mutation_probability))
def applyDefaultKnobs(wf): """ we are done, so revert to default if given """ if "post_workflow_knobs" in wf.execution_strategy: try: info("> Applied the post_workflow_knobs") wf.change_provider["instance"] \ .applyChange(wf.change_event_creator(wf.execution_strategy["post_workflow_knobs"])) except: error("apply changes did not work")
def run_execution_strategy(wf): """ we run the correct execution strategy """ applyInitKnobs(wf) print(wf.execution_strategy["type"]) try: # start the right execution strategy if wf.execution_strategy["type"] == "sequential": log_results(wf.folder, list(wf.execution_strategy["knobs"][0].keys()) + ["result"], append=False) start_sequential_strategy(wf) elif wf.execution_strategy["type"] == "self_optimizer": log_results(wf.folder, list(wf.execution_strategy["knobs"].keys()) + ["result"], append=False) start_self_optimizer_strategy(wf) # elif wf.execution_strategy["type"] == "discrete_optimizer": # log_results(wf.folder, wf.execution_strategy["knobs"] + ["result"], append=False) # start_discrete_optimizer_strategy(wf) elif wf.execution_strategy["type"] == "uncorrelated_self_optimizer": log_results(wf.folder, list(wf.execution_strategy["knobs"].keys()) + ["result"], append=False) start_uncorrelated_self_optimizer_strategy(wf) elif wf.execution_strategy["type"] == "step_explorer": log_results(wf.folder, list(wf.execution_strategy["knobs"].keys()) + ["result"], append=False) start_step_strategy(wf) elif wf.execution_strategy["type"] == "forever": start_forever_strategy(wf) elif wf.execution_strategy["type"] == "simple_am": start_simple_am(wf) # elif wf.execution_strategy["type"] == "sequential_runtime": # print("got here") # log_results(wf.folder, wf.execution_strategy["knobs"] + ["result"], append=False) # start_seq_runtime_stategy(wf) # elif wf.execution_strategy["type"] == "mabandit_ucb1": # print(type(wf.execution_strategy["knobs"])) # log_results(wf.folder, wf.execution_strategy["knobs"] + ["result"], append=False) # start_mab_ucb1_strategy(wf) # elif wf.execution_strategy["type"] == "discount_ucb": # log_results(wf.folder, wf.execution_strategy["knobs"] + ["result"], append=False) # start_mab_discount_ucb_strategy(wf) # elif wf.execution_strategy["type"] == "sliding_ucb": # log_results(wf.folder, wf.execution_strategy["knobs"] + ["result"], append=False) # start_mab_sw_ucb_strategy(wf) except RuntimeError: error("Stopped the whole workflow as requested by a RuntimeError") # finished info(">") applyDefaultKnobs(wf)
def init_pre_processors(wf): """ we look into the workflows definition and run the required preprocessors """ if hasattr(wf, "pre_processors"): pp = wf.pre_processors for p in pp: if p["type"] == "spark": p["instance"] = SparkPreProcessor(wf, p) else: info("> Preprocessor | None", Fore.CYAN)
def __init__(self, wf, cp): # load config self.host = wf.host self.port = wf.port self.send_message = wf.send_message try: info("> SWIMChangePro | ", Fore.CYAN) except KeyError: error("SWIMChangePro was incomplete") exit(1)
def start_sequential_strategy(wf): """ executes all experiments from the definition file """ info("> ExecStrategy | Sequential", Fore.CYAN) wf.totalExperiments = len(wf.execution_strategy["knobs"]) for kn in wf.execution_strategy["knobs"]: experimentFunction(wf, { "knobs":kn, "ignore_first_n_results": wf.execution_strategy["ignore_first_n_results"], "sample_size": wf.execution_strategy["sample_size"], })
def execute_workflow(wf): """ this is the main workflow for executing a given workflow """ try: # check that the definition is correct info("######################################", Fore.CYAN) info("> Workflow | " + str(wf.name), Fore.CYAN) # check variables b = wf.change_provider c = wf.primary_data_provider d = wf.execution_strategy except KeyError as e: error("definition.py is missing value " + str(e)) exit(1) # initialize the test environment init_pre_processors(wf) init_change_provider(wf) init_data_providers(wf) # here we also execute the strategy run_execution_strategy(wf) # we are done, now we clean up kill_pre_processors(wf) info("> Finished workflow") print("\n") info(">start comparison of methods now") from compare_methods import regressor_compare_methods, classifier_compare_methods classifier_compare_methods() regressor_compare_methods()
def __init__(self, wf, cp): # load config try: self.url = cp["url"] self.serializer = cp["serializer"] info("> HTTPChangePro | " + self.serializer + " | URL: " + self.url, Fore.CYAN) except KeyError: error("HTTPChangePro was incomplete") exit(1) # look at the serializer if self.serializer == "JSON": self.serialize_function = lambda v: json.dumps(v).encode('utf-8') else: error("serializer not implemented") exit(1)
def start_forever_strategy(wf): """ executes forever - changes must come from definition file """ info("> ExecStrategy | Forever ", Fore.CYAN) wf.totalExperiments = -1 while True: experimentFunction( wf, { "knobs": { "forever": True }, "ignore_first_n_results": wf.execution_strategy["ignore_first_n_results"], "sample_size": wf.execution_strategy["sample_size"], })
def __init__(self, wf, dp): self.callBackFunction = None # load config try: self.host = wf.host self.port = wf.port self.metricslist = dp["server_metrics"] self.delays = wf.require_delays self.last_action = wf.last_action self.send_message = wf.send_message info( ">SWIMDataPro | Metric: " + self.host + ":" + str(self.port), Fore.CYAN) except KeyError as e: error("SWIMDataPro definition was incomplete: " + str(e)) exit(1)
def __init__(self, wf, cp): self.callBackFunction = None # load config try: self.url = cp["url"] self.serializer = cp["serializer"] info( "> HTTPDataPro | " + self.serializer + " | URL: " + self.url, Fore.CYAN) except KeyError as e: error("HTTPDataPro definition was incomplete: " + str(e)) exit(1) if self.serializer == "JSON": self.serialize_function = lambda m: json.loads(m.decode('utf-8')) else: error("serializer not implemented") exit(1)
def start_seq_runtime_stategy(wf): """ executes all experiments from the definition file """ info("> ExecStrategy | SequentialRuntimeConfigs", Fore.CYAN) retrieve_true_knobs(wf) #the regular sequential code starts here wf.totalExperiments = len(wf.execution_strategy["knobs"]) for kn in wf.execution_strategy["knobs"]: experimentFunction(wf, { "knobs":kn, "ignore_first_n_results": wf.execution_strategy["ignore_first_n_results"], "sample_size": wf.execution_strategy["sample_size"], })
def __init__(self, wf, cp): # load config try: self.queue = [] self.host = cp["host"] self.port = cp["port"] self.topic = cp["topic"] self.serializer = cp["serializer"] info("> MQTTPublisher | " + self.serializer + " | URI: " + self.host + ":" + self.port + " | Topic: " + self.topic, Fore.CYAN) except KeyError: error("mqttPublisher definition was incomplete") exit(1) # look at the serializer if self.serializer == "JSON": self.serialize_function = lambda v: json.dumps(v).encode('utf-8') else: error("serializer not implemented") exit(1)
def start_evolutionary_strategy(wf): global original_primary_data_provider_topic global original_change_provider_topic info("> ExecStrategy | Evolutionary", Fore.CYAN) optimizer_method = wf.execution_strategy["optimizer_method"] wf.totalExperiments = wf.execution_strategy["optimizer_iterations"] info("> Optimizer | " + optimizer_method, Fore.CYAN) original_primary_data_provider_topic = wf.primary_data_provider[ "instance"].topic original_change_provider_topic = wf.change_provider["instance"].topic # we look at the ranges the user has specified in the knobs knobs = wf.execution_strategy["knobs"] # we create a list of variable/knob names and a list of ranges (from,to) for each knob variables = [] range_tuples = [] # we fill the arrays and use the index to map from gauss-optimizer-value to variable for key in knobs: variables += [key] range_tuples += [(knobs[key][0], knobs[key][1])] info("> Run Optimizer | " + optimizer_method, Fore.CYAN) if optimizer_method == "GA": ga(variables, range_tuples, wf) elif optimizer_method == "NSGAII": nsga2(variables, range_tuples, wf)
def __init__(self, wf, cp): self.callBackFunction = None # load config try: self.kafka_uri = cp["kafka_uri"] self.topic = cp["topic"] self.serializer = cp["serializer"] info( "> KafkaConsumer | " + self.serializer + " | URI: " + self.kafka_uri + " | Topic: " + self.topic, Fore.CYAN) except KeyError as e: error("system.kafkaConsumer was incomplete: " + str(e)) exit(1) # look at the serializer if self.serializer == "JSON": self.serialize_function = lambda m: json.loads(m.decode('utf-8')) else: error("serializer not implemented") exit(1) # try to connect try: # disable annoying logging logging.getLogger("kafka.coordinator.consumer").setLevel("ERROR") logging.getLogger("kafka.conn").setLevel("ERROR") # connect to kafka self.consumer = KafkaConsumer( bootstrap_servers=self.kafka_uri, value_deserializer=self.serialize_function, enable_auto_commit=False, group_id=None, consumer_timeout_ms=3000) # subscribe to the requested topic self.consumer.subscribe([self.topic]) except RuntimeError as e: error("connection to kafka failed: " + str(e)) exit(1)
def start_uncorrelated_self_optimizer_strategy(wf): """ executes a self optimizing strategy """ optimizer_method = wf.execution_strategy["optimizer_method"] info("> ExecStrategy | UncorrelatedSelfOptimizer", Fore.CYAN) info("> Optimizer | " + optimizer_method, Fore.CYAN) knobs = wf.execution_strategy["knobs"] wf.totalExperiments = len( knobs) * wf.execution_strategy["optimizer_iterations"] total_result = dict() # we fill the arrays and use the index to map from gauss-optimizer-value to variable for key in knobs: optimal_knob_value = optimizeOneVariable( wf, wf.execution_strategy["optimizer_iterations"], key, (knobs[key][0], knobs[key][1])) total_result[key] = optimal_knob_value wf.change_provider["instance"].applyChange(total_result) info(">") info("> FinalResult | Best Values: " + str(total_result))