Example #1
0
def execute_workflow(wf):
    """ this is the main workflow for executing a given workflow """
    try:
        # check that the definition is correct
        info("######################################", Fore.CYAN)
        info("> Workflow       | " + str(wf.name), Fore.CYAN)
        # check variables
        b = wf.change_provider
        c = wf.primary_data_provider
        d = wf.execution_strategy
    except KeyError as e:
        error("definition.py is missing value " + str(e))
        exit(1)
    # initialize the test environment
    init_pre_processors(wf)
    init_change_provider(wf)
    init_data_providers(wf)
    # here we also execute the strategy
    run_execution_strategy(wf)
    # we are done, now we clean up
    kill_pre_processors(wf)
    info("> Finished workflow")
    print("\n")
    info(">start comparison of methods now")
    from compare_methods import regressor_compare_methods, classifier_compare_methods
    classifier_compare_methods()
    regressor_compare_methods()
Example #2
0
    def __init__(self, wf, p):
        try:
            self.submit_mode = p["submit_mode"]
            self.job_file = p["job_file"]
            self.job_class = p["job_class"]
            info(
                "> PreProcessor   | Spark  | Mode: " + str(self.submit_mode) +
                " | Args: " + str(self.job_class), Fore.CYAN)
        except KeyError as e:
            error("configuration.spark was incomplete: " + str(e))
            exit(1)
        spark_home = os.environ.get("SPARK_HOME")
        spark_bin = "/bin/spark-submit"

        # now we start the spark to run the job in
        # http://stackoverflow.com/questions/13243807/popen-waiting-for-child-process-even-when-the-immediate-child-has-terminated/13256908#13256908
        # set system/version dependent "start_new_session" analogs
        kwargs = {}
        if platform.system() == 'Windows':
            CREATE_NEW_PROCESS_GROUP = 0x00000200  # note: could get it from subprocess
            DETACHED_PROCESS = 0x00000008  # 0x8 | 0x200 == 0x208
            kwargs.update(creationflags=DETACHED_PROCESS
                          | CREATE_NEW_PROCESS_GROUP)
        elif sys.version_info < (3, 2):  # assume posix
            kwargs.update(preexec_fn=os.setsid)
        else:  # Python 3.2+ and Unix
            kwargs.update(start_new_session=True)
        # starting a subprocess to allow termination of spark after we are done
        self.process = subprocess.Popen(spark_home + spark_bin + ' --class ' + self.job_class + \
                                        ' ./' + wf.folder + '/' + self.job_file, stdout=subprocess.PIPE, shell=True,
                                        **kwargs)
        # register a shutdown callback on this thread
        atexit.register(self.shutdown)
        # wait for some time to get spark time to boot up
        time.sleep(10)
Example #3
0
def run_execution_strategy(wf):
    """ we run the correct execution strategy """
    applyInitKnobs(wf)
    try:
        # start the right execution strategy
        if wf.execution_strategy["type"] == "sequential":
            log_results(wf.folder, wf.execution_strategy["knobs"][0].keys() + ["result"], append=False)
            start_sequential_strategy(wf)

        elif wf.execution_strategy["type"] == "self_optimizer":
            log_results(wf.folder, wf.execution_strategy["knobs"].keys() + ["result"], append=False)
            start_self_optimizer_strategy(wf)

        elif wf.execution_strategy["type"] == "uncorrelated_self_optimizer":
            log_results(wf.folder, wf.execution_strategy["knobs"].keys() + ["result"], append=False)
            start_uncorrelated_self_optimizer_strategy(wf)

        elif wf.execution_strategy["type"] == "step_explorer":
            log_results(wf.folder, wf.execution_strategy["knobs"].keys() + ["result"], append=False)
            start_step_strategy(wf)

        elif wf.execution_strategy["type"] == "forever":
            start_forever_strategy(wf)
    except RuntimeError:
        error("Stopped the whole workflow as requested by a RuntimeError")
    # finished
    info(">")
    applyDefaultKnobs(wf)
Example #4
0
 def __init__(self, wf, cp):
     # load config
     try:
         info("> EWSChangePro  | ", Fore.CYAN)
     except KeyError:
         error("EWSChangePro was incomplete")
         exit(1)
Example #5
0
 def returnDataListNonBlocking(self):
     """ by logic this can not be non-blocking, so it is implemented as returnData """
     try:
         r = requests.get(self.url)
         return [self.serialize_function(r.content)]
     except:
         error("HTTP Connection Problems")
         return None
Example #6
0
 def returnData(self):
     """ does a http GET request and returns the result value """
     try:
         r = requests.get(self.url)
         return self.serialize_function(r.content)
     except:
         error("HTTP Connection Problems")
         return None
Example #7
0
def applyDefaultKnobs(wf):
    """ we are done, so revert to default if given """
    if "post_workflow_knobs" in wf.execution_strategy:
        try:
            info("> Applied the post_workflow_knobs")
            wf.change_provider["instance"] \
                .applyChange(wf.change_event_creator(wf.execution_strategy["post_workflow_knobs"]))
        except:
            error("apply changes did not work")
Example #8
0
def run_execution_strategy(wf):
    """ we run the correct execution strategy """
    applyInitKnobs(wf)
    print(wf.execution_strategy["type"])
    try:
        # start the right execution strategy
        if wf.execution_strategy["type"] == "sequential":
            log_results(wf.folder, list(wf.execution_strategy["knobs"][0].keys()) + ["result"], append=False)
            start_sequential_strategy(wf)

        elif wf.execution_strategy["type"] == "self_optimizer":
            log_results(wf.folder, list(wf.execution_strategy["knobs"].keys()) + ["result"], append=False)
            start_self_optimizer_strategy(wf)

        # elif wf.execution_strategy["type"] == "discrete_optimizer":
        #     log_results(wf.folder, wf.execution_strategy["knobs"] + ["result"], append=False)
        #     start_discrete_optimizer_strategy(wf)

        elif wf.execution_strategy["type"] == "uncorrelated_self_optimizer":
            log_results(wf.folder, list(wf.execution_strategy["knobs"].keys()) + ["result"], append=False)
            start_uncorrelated_self_optimizer_strategy(wf)

        elif wf.execution_strategy["type"] == "step_explorer":
            log_results(wf.folder, list(wf.execution_strategy["knobs"].keys()) + ["result"], append=False)
            start_step_strategy(wf)
    
        elif wf.execution_strategy["type"] == "forever":
            start_forever_strategy(wf)

        elif wf.execution_strategy["type"] == "simple_am":
            start_simple_am(wf)

        # elif wf.execution_strategy["type"] == "sequential_runtime":
        #     print("got here")
        #     log_results(wf.folder, wf.execution_strategy["knobs"] + ["result"], append=False)
        #     start_seq_runtime_stategy(wf)
            
        # elif wf.execution_strategy["type"] == "mabandit_ucb1":
        #     print(type(wf.execution_strategy["knobs"]))
        #     log_results(wf.folder, wf.execution_strategy["knobs"] + ["result"], append=False)
        #     start_mab_ucb1_strategy(wf)

        # elif wf.execution_strategy["type"] == "discount_ucb":
            
        #     log_results(wf.folder, wf.execution_strategy["knobs"] + ["result"], append=False)
        #     start_mab_discount_ucb_strategy(wf)

        # elif wf.execution_strategy["type"] == "sliding_ucb":
            
        #     log_results(wf.folder, wf.execution_strategy["knobs"] + ["result"], append=False)
        #     start_mab_sw_ucb_strategy(wf)
            
    except RuntimeError:
        error("Stopped the whole workflow as requested by a RuntimeError")
    # finished
    info(">")
    applyDefaultKnobs(wf)
Example #9
0
 def __init__(self, wf, cp):
     self.timer = 0
     # load config
     try:
         self.seconds = cp["seconds"]
         info("> Interval       | Seconds: " + str(self.seconds), Fore.CYAN)
     except KeyError as e:
         error("IntervalDataProvider definition was incomplete: " + str(e))
         exit(1)
Example #10
0
 def __init__(self, wf, dp):
     self.callBackFunction = None
     # load config
     try:
         self.chosen_metric = dp["chosen_metric"]
         info(">EWSDataPro    | Metric: " +  self.chosen_metric, Fore.CYAN)
     except KeyError as e:
         error("HTTPDataPro definition was incomplete: " + str(e))
         exit(1)
Example #11
0
 def __init__(self, wf, cp):
     # load config
     self.host = wf.host
     self.port = wf.port
     self.send_message = wf.send_message
     try:
         info("> SWIMChangePro  | ", Fore.CYAN)
     except KeyError:
         error("SWIMChangePro was incomplete")
         exit(1)
Example #12
0
 def reset(self):
     """ creates a new consumer to get to the current position of the queue """
     try:
         self.consumer = KafkaConsumer(
             bootstrap_servers=self.kafka_uri,
             value_deserializer=self.serialize_function,
             group_id=None,
             consumer_timeout_ms=3000)
         self.consumer.subscribe([self.topic])
     except RuntimeError as e:
         error("connection to kafka failed: " + str(e))
         exit(1)
Example #13
0
def init_change_provider(wf):
    """ loads the specified change provider into the workflow """
    cp = wf.change_provider
    if cp["type"] == "kafka_producer":
        cp["instance"] = KafkaProducerChangeProvider(wf, cp)
    elif cp["type"] == "mqtt_publisher":
        cp["instance"] = MQTTPublisherChangeProvider(wf, cp)
    elif cp["type"] == "http_request":
        cp["instance"] = HTTPRequestChangeProvider(wf, cp)
    elif cp["type"] == "dummy":
        cp["instance"] = DummyChangeChangeProvider(wf, cp)
    else:
        error("Not a valid changeProvider")
        exit(1)
Example #14
0
def plot(wf):
    """ here we try to generate automatic plotting of the experiments results """

    info("######################################", Fore.CYAN)
    info("> Reporting on   | " + str(wf.name), Fore.CYAN)

    plot_file_dir = './' + str(wf.folder)

    # try to access the results.csv values
    try:
        with open('./' + str(plot_file_dir) + '/results.csv', 'r') as csv_file:
            reader = csv.reader(csv_file, dialect='excel')
            header = next(reader)
    except IOError:
        error('Please first generate a "' + str(plot_file_dir) +
              '/results.csv" file by running the start command')
        return
    results_data_frame = pd.read_csv(str(plot_file_dir) + '/results.csv')

    # 1 input -> 1 output variable case
    if len(header) == 2:
        info("> Found 1 knob, creating scatter plot...", Fore.CYAN)
        plot_file = plot_file_dir + '/scatter_plot.png'
        ax = sns.regplot(x=header[0],
                         y=header[1],
                         data=results_data_frame,
                         fit_reg=False)
        fig = ax.get_figure()
        fig.savefig(plot_file)
        info("> Plot saved at " + plot_file, Fore.CYAN)

    # 2 input -> 1 output variables case
    elif len(header) == 3:

        info("> Found 2 knobs, creating heat map...", Fore.CYAN)
        plot_file = plot_file_dir + '/heatmap.png'
        results = results_data_frame.pivot(*header)
        try:
            ax = sns.heatmap(results, annot=True, fmt=".1f", linewidths=.5)
            fig = ax.get_figure()
            fig.savefig(plot_file)
            fig.show()
            info("> Plot saved at " + plot_file, Fore.CYAN)
        except ValueError:
            info("> This strategy does not support heatmap generation")

    else:
        info(
            "> Cannot plot these results (RTX can only plot experiments of one of two variables for now)",
            Fore.CYAN)
Example #15
0
 def __init__(self, wf, cp):
     # load config
     try:
         self.url = cp["url"]
         self.serializer = cp["serializer"]
         info("> HTTPChangePro  | " + self.serializer + " | URL: " + self.url, Fore.CYAN)
     except KeyError:
         error("HTTPChangePro was incomplete")
         exit(1)
     # look at the serializer
     if self.serializer == "JSON":
         self.serialize_function = lambda v: json.dumps(v).encode('utf-8')
     else:
         error("serializer not implemented")
         exit(1)
Example #16
0
 def __init__(self, wf, dp):
     self.callBackFunction = None
     # load config
     try:
         self.host = wf.host
         self.port = wf.port
         self.metricslist = dp["server_metrics"]
         self.delays = wf.require_delays
         self.last_action = wf.last_action
         self.send_message = wf.send_message
         info(
             ">SWIMDataPro    | Metric: " + self.host + ":" +
             str(self.port), Fore.CYAN)
     except KeyError as e:
         error("SWIMDataPro definition was incomplete: " + str(e))
         exit(1)
Example #17
0
 def __init__(self, wf, cp):
     self.callBackFunction = None
     # load config
     try:
         self.url = cp["url"]
         self.serializer = cp["serializer"]
         info(
             "> HTTPDataPro    | " + self.serializer + " | URL: " +
             self.url, Fore.CYAN)
     except KeyError as e:
         error("HTTPDataPro definition was incomplete: " + str(e))
         exit(1)
     if self.serializer == "JSON":
         self.serialize_function = lambda m: json.loads(m.decode('utf-8'))
     else:
         error("serializer not implemented")
         exit(1)
 def __init__(self, wf, cp):
     # load config
     try:
         self.queue = []
         self.host = cp["host"]
         self.port = cp["port"]
         self.topic = cp["topic"]
         self.serializer = cp["serializer"]
         info("> MQTTPublisher  | " + self.serializer + " | URI: " + self.host + ":" + self.port + " | Topic: " +
              self.topic, Fore.CYAN)
     except KeyError:
         error("mqttPublisher definition was incomplete")
         exit(1)
     # look at the serializer
     if self.serializer == "JSON":
         self.serialize_function = lambda v: json.dumps(v).encode('utf-8')
     else:
         error("serializer not implemented")
         exit(1)
Example #19
0
 def __init__(self, wf, cp):
     # load config
     try:
         self.kafka_uri = cp["kafka_uri"]
         self.topic = cp["topic"]
         self.serializer = cp["serializer"]
         info("> KafkaProducer  | " + self.serializer + " | URI: " + self.kafka_uri + " | Topic: " +
              self.topic, Fore.CYAN)
     except KeyError:
         error("configuration.kafka_producer was incomplete")
         exit(1)
     # look at the serializer
     if self.serializer == "JSON":
         self.serialize_function = lambda v: json.dumps(v).encode('utf-8')
     else:
         error("serializer not implemented")
         exit(1)
     # try to connect
     try:
         # stop annoying logging
         logging.getLogger("kafka.coordinator.consumer").setLevel("ERROR")
         logging.getLogger("kafka.conn").setLevel("ERROR")
         self.producer = KafkaProducer(bootstrap_servers=self.kafka_uri,
                                       value_serializer=self.serialize_function,
                                       request_timeout_ms=5000)
     except:
         error("connection to kafka failed")
         exit(1)
Example #20
0
def retrieve_true_knobs(wf):
    knobs = wf.execution_strategy["knobs"][0]

    if "source" in knobs:

        wf.execution_strategy["knobs"].clear()

        #SOURCE SHOULD STILL BE LOADED IN (replace the IP used somehow)

        config_model = ConfigurationModel()

        list_rel_w_alt = config_model.relations_with_alternatives()


        for rel_w_alt in list_rel_w_alt:
            rel_alts = config_model.relation_alternative(rel_w_alt)

            rel_alts.append(rel_w_alt)

            values = []
            for alt in rel_alts:
                values.append(alt.child_comp)
            
            variable = {rel_w_alt.get_name() : values}


            wf.execution_strategy["knobs"].append(variable)

        print(wf.execution_strategy["knobs"])

        #global_vars.IP = "wrong" #knobs["source"] #this doesn't work
        #configs = eRI.get_all_configs()
        


        #for config_obj in configs:
        #   wf.execution_strategy["knobs"].append({"config":config_obj.original_json})
    else:
        error("source of configurations not included")
        exit(1)
Example #21
0
 def __init__(self, wf, cp):
     self.callBackFunction = None
     # load the configuration
     try:
         self.queue = []
         self.host = cp["host"]
         self.port = cp["port"]
         self.topic = cp["topic"]
         self.serializer = cp["serializer"]
         info(
             "> MQTTListener   | " + self.serializer + " | URI: " +
             self.host + ":" + self.port + " | Topic: " + self.topic,
             Fore.CYAN)
     except KeyError as e:
         error("mqttListener definition was incomplete: " + str(e))
         exit(1)
     # create serializer
     if self.serializer == "JSON":
         self.serialize_function = lambda m: json.loads(m.decode('utf-8'))
     else:
         error("serializer not implemented")
         exit(1)
     try:
         # create mqtt client and connect
         self.mqtt = mqtt.Client()
         self.mqtt.connect(self.host, port=self.port)
         # register callback
         self.mqtt.on_message = self.on_message
         # subscribe and start listing on second thread
         self.mqtt.subscribe(self.topic, 0)
         self.mqtt.loop_start()
     except RuntimeError as e:
         error("connection to mqtt failed: " + str(e))
         exit(1)
Example #22
0
def loadDefinition(folder):
    """ opens the given folder and searches for a definition.py file and checks if it looks valid"""
    if len(sys.argv) != 3:
        error("missing experiment folder")
        exit(1)
    try:
        wf = imp.load_source('wf', './' + folder + '/definition.py')
        wf.folder = sys.argv[2]
        testName = wf.name
        return wf
    except IOError:
        error("Folder is not a valid experiment folder (does not contain definition.py)")
        exit(1)
    except AttributeError:
        error("Workflow did not had a name attribute")
        exit(1)
    except ImportError as e:
        error("Import failed: " + str(e))
        exit(1)
Example #23
0
 def __init__(self, wf, cp):
     self.callBackFunction = None
     # load config
     try:
         self.kafka_uri = cp["kafka_uri"]
         self.topic = cp["topic"]
         self.serializer = cp["serializer"]
         info(
             "> KafkaConsumer  | " + self.serializer + " | URI: " +
             self.kafka_uri + " | Topic: " + self.topic, Fore.CYAN)
     except KeyError as e:
         error("system.kafkaConsumer was incomplete: " + str(e))
         exit(1)
     # look at the serializer
     if self.serializer == "JSON":
         self.serialize_function = lambda m: json.loads(m.decode('utf-8'))
     else:
         error("serializer not implemented")
         exit(1)
     # try to connect
     try:
         # disable annoying logging
         logging.getLogger("kafka.coordinator.consumer").setLevel("ERROR")
         logging.getLogger("kafka.conn").setLevel("ERROR")
         # connect to kafka
         self.consumer = KafkaConsumer(
             bootstrap_servers=self.kafka_uri,
             value_deserializer=self.serialize_function,
             enable_auto_commit=False,
             group_id=None,
             consumer_timeout_ms=3000)
         # subscribe to the requested topic
         self.consumer.subscribe([self.topic])
     except RuntimeError as e:
         error("connection to kafka failed: " + str(e))
         exit(1)
Example #24
0
def experimentFunction(wf, exp):
    """ executes a given experiment """
    start_time = current_milli_time()
    # remove all old data from the queues
    wf.primary_data_provider["instance"].reset()

    # load change event creator or use a default
    if hasattr(wf, "change_event_creator"):
        change_creator = wf.change_event_creator
    else:
        change_creator = _defaultChangeProvider

    # start
    info(">")
    info("> KnobValues     | " + str(exp["knobs"]))
    # create new state
    exp["state"] = wf.state_initializer(dict(),wf)

    # apply changes to system
    try:
        wf.change_provider["instance"].applyChange(change_creator(exp["knobs"],wf))
    except:
        error("apply changes did not work")

    # ignore the first data sets
    to_ignore = exp["ignore_first_n_results"]
    if to_ignore > 0:
        i = 0
        while i < to_ignore:
            new_data = wf.primary_data_provider["instance"].returnData()
            if new_data is not None:
                i += 1
                process("IgnoreSamples  | ", i, to_ignore)
        print("")

    # start collecting data
    sample_size = exp["sample_size"]
    i = 0
    try:
        while i < sample_size:
            # we start with the primary data provider using blocking returnData
            new_data = wf.primary_data_provider["instance"].returnData()
            if new_data is not None:
                try:
                    # print(new_data)
                    exp["state"] = wf.primary_data_provider["data_reducer"](exp["state"], new_data,wf)
                except StopIteration:
                    raise StopIteration()  # just fwd
                except RuntimeError:
                    raise RuntimeError()  # just fwd
                except:
                    error("could not reducing data set: " + str(new_data))
                i += 1
                process("CollectSamples | ", i, sample_size)
            # now we use returnDataListNonBlocking on all secondary data providers
            if hasattr(wf, "secondary_data_providers"):
                for cp in wf.secondary_data_providers:
                    new_data = cp["instance"].returnDataListNonBlocking()
                    for nd in new_data:
                        try:
                            exp["state"] = cp["data_reducer"](exp["state"], nd,wf)
                        except StopIteration:
                            raise StopIteration()  # just
                        except RuntimeError:
                            raise RuntimeError()  # just fwd
                        except:
                            error("could not reducing data set: " + str(nd))
        print("")
    except StopIteration:
        # this iteration should stop asap
        error("This experiment got stopped as requested by a StopIteration exception")
    try:
        result = wf.evaluator(exp["state"],wf)
    except:
        result = 0
        error("evaluator failed")
    # we store the counter of this experiment in the workflow
    if hasattr(wf, "experimentCounter"):
        wf.experimentCounter += 1
    else:
        wf.experimentCounter = 1
    # print the results
    duration = current_milli_time() - start_time
    # do not show stats for forever strategy
    if wf.totalExperiments > 0:
        info("> Statistics     | " + str(wf.experimentCounter) + "/" + str(wf.totalExperiments)
             + " took " + str(duration) + "ms" + " - remaining ~" + str(
            (wf.totalExperiments - wf.experimentCounter) * duration / 1000) + "sec")
    info("> FullState      | " + str(exp["state"]))
    info("> ResultValue    | " + str(result))
    # log the result values into a csv file
    log_results(wf.folder, exp["knobs"].values() + [result])
    # return the result value of the evaluator
    return result
Example #25
0
 def returnDataListNonBlocking(self):
     """ does wait for x seconds and then return the timer counter value """
     error(
         "IntervalDataProvider is not able to work as a secondary data provider"
     )
     exit(1)