Esempio n. 1
0
def main(experimentname):
    #
    f_write = open(experimentname + ".txt", "a")

    # set up the base config
    mlepConfig = io_utils.load_json("./MLEPServer.json")

    # update as per experiment requires
    mlepConfig["config"]["weight_method"] = "unweighted"
    mlepConfig["config"]["select_method"] = "recent"
    mlepConfig["config"]["filter_select"] = "nearest"

    # we are not updating internal timer...
    streamData = StreamLocal.StreamLocal(
        data_source="./data/realisticStreamComb_2013_feb19.json",
        data_mode="single",
        data_set_class=PseudoJsonTweets.PseudoJsonTweets)

    augmentation = BatchedLocal.BatchedLocal(
        data_source="./data/collectedIrrelevant.json",
        data_mode="single",
        data_set_class=PseudoJsonTweets.PseudoJsonTweets)
    augmentation.load_by_class()

    trainingData = BatchedLocal.BatchedLocal(
        data_source="./data/initialTrainingData.json",
        data_mode="single",
        data_set_class=PseudoJsonTweets.PseudoJsonTweets)
    trainingData.load()

    MLEPLearner = MLEPModelDriftAdaptor.MLEPModelDriftAdaptor(
        config_dict=mlepConfig)
    MLEPLearner.initialTrain(traindata=trainingData)
    io_utils.std_flush("Completed training at", time_utils.readable_time())
    MLEPLearner.addAugmentation(augmentation)
    io_utils.std_flush("Added augmentation at", time_utils.readable_time())

    totalCounter = 0
    implicit_mistakes = 0.0
    implicit_count = 0
    explicit_mistakes = 0.0
    explicit_count = 0
    implicit_error_rate = []
    explicit_error_rate = []
    while streamData.next():
        if streamData.getLabel() is None:
            classification = MLEPLearner.classify(streamData.getObject(),
                                                  classify_mode="implicit")
            if classification != streamData.getObject().getValue("true_label"):
                implicit_mistakes += 1.0
            implicit_count += 1
        else:
            classification = MLEPLearner.classify(streamData.getObject(),
                                                  classify_mode="explicit")
            if classification != streamData.getLabel():
                explicit_mistakes += 1.0
            explicit_count += 1

        totalCounter += 1

        if totalCounter % 100 == 0 and totalCounter > 0.0:
            implicit_running_error = 2.00
            explicit_running_error = 2.00
            if implicit_count:
                implicit_running_error = implicit_mistakes / float(
                    implicit_count)
            if explicit_count:
                explicit_running_error = explicit_mistakes / float(
                    explicit_count)
            io_utils.std_flush(
                "Fin: %6i samples\t\texplicit error: %2.4f\t\t implicit error: %2.4f"
                %
                (totalCounter, explicit_running_error, implicit_running_error))
            implicit_error_rate.append(implicit_running_error)
            explicit_error_rate.append(explicit_running_error)
            implicit_mistakes = 0.0
            implicit_count = 0
            explicit_mistakes = 0.0
            explicit_count = 0
    f_write.write(experimentname + ",implicit," +
                  ",".join([str(item) for item in implicit_error_rate]) + "\n")
    f_write.write(experimentname + ",explicit," +
                  ",".join([str(item) for item in explicit_error_rate]) + "\n")
    f_write.close()
Esempio n. 2
0
def main(experimentname, allow_explicit_drift, explicit_drift_class,
         explicit_drift_mode, explicit_update_mode, allow_unlabeled_drift,
         unlabeled_drift_class, unlabeled_drift_mode, unlabeled_update_mode,
         allow_update_schedule, update_schedule, schedule_update_mode,
         weight_method, select_method, filter_method, kval, update_prune):

    # Tracking URI -- yeah it's not very secure, but w/e
    # mlflow.set_tracking_uri("mysql://*****:*****@127.0.0.1:3306/mlflow_runs")
    # Where to save data:
    # mlflow.start_run(run_name=experimentname)

    # We'll load the config file, make changes, and write a secondary file for experiments
    mlepConfig = io_utils.load_json('./MLEPServer.json')

    # Get the option if in args, else use the one present in the config...
    for _item in mlepConfig["config"]:
        try:
            mlepConfig["config"][_item] = eval(
                _item
            )  # If option not given furing launch, eval(*) will return NameError
        except NameError:
            pass  # Here the value of config in the MLEPServer.json file is used

    internalTimer = 0
    streamData = StreamLocal.StreamLocal(
        data_source="data/realisticStreamComb_2013_feb19.json",
        data_mode="single",
        data_set_class=PseudoJsonTweets.PseudoJsonTweets)

    augmentation = BatchedLocal.BatchedLocal(
        data_source='data/collectedIrrelevant.json',
        data_mode="single",
        data_set_class=PseudoJsonTweets.PseudoJsonTweets)
    augmentation.load_by_class()

    trainingData = BatchedLocal.BatchedLocal(
        data_source='data/initialTrainingData.json',
        data_mode="single",
        data_set_class=PseudoJsonTweets.PseudoJsonTweets)
    trainingData.load()

    # Now we have the data
    MLEPLearner = MLEPDriftAdaptor.MLEPDriftAdaptor(config_dict=mlepConfig,
                                                    safe_mode=False)

    # Perform initial traininig
    MLEPLearner.initialTrain(traindata=trainingData)
    io_utils.std_flush("Completed training at", time_utils.readable_time())
    MLEPLearner.addAugmentation(augmentation)
    io_utils.std_flush("Added augmentation at", time_utils.readable_time())

    totalCounter = 0
    implicit_mistakes = 0.0
    implicit_count = 0
    explicit_mistakes = 0.0
    explicit_count = 0

    while streamData.next():
        if internalTimer < streamData.getObject().getValue("timestamp"):
            internalTimer = streamData.getObject().getValue("timestamp")
            MLEPLearner.updateTime(internalTimer)

        if streamData.getLabel() is None:
            classification = MLEPLearner.classify(streamData.getObject(),
                                                  "implicit")
        else:
            classification = MLEPLearner.classify(streamData.getObject(),
                                                  "explicit")

        if streamData.getLabel() is None:
            if classification != streamData.getObject().getValue("true_label"):
                implicit_mistakes += 1.0
            implicit_count += 1
        else:
            if classification != streamData.getLabel():
                explicit_mistakes += 1.0
            explicit_count += 1
        totalCounter += 1

        if totalCounter % 100 == 0 and totalCounter > 0.0:
            implicit_running_error = 2.00
            explicit_running_error = 2.00
            if implicit_count:
                implicit_running_error = implicit_mistakes / float(
                    implicit_count)
            if explicit_count:
                explicit_running_error = explicit_mistakes / float(
                    explicit_count)
            io_utils.std_flush(
                "Fin: %6i samples\t\texplicit error: %2.4f\t\t implicit error: %2.4f"
                %
                (totalCounter, explicit_running_error, implicit_running_error))

            implicit_mistakes = 0.0
            implicit_count = 0
            explicit_mistakes = 0.0
            explicit_count = 0

    MLEPLearner.shutdown()

    io_utils.std_flush(
        "\n-----------------------------\nCOMPLETED\n-----------------------------\n"
    )
Esempio n. 3
0
def main(runname, expstatslog, mlflowlog, earlystop):
    if mlflowlog:
        pass
    else:
        global mlflow
        mlflow = dumbflow()
    if expstatslog:
        exp_status_write = open(EXP_STATUS, "a")
    else:
        exp_status_write = sys.stdout

    exp_status_write.write("\n\n\n\n")
    exp_status_write.write("--------------------------")
    exp_status_write.write("  BEGINNING NEW EXECUTION (" + runname + ") AT " +
                           str(time_utils.readable_time("%Y-%m-%d %H:%M:%S")))
    exp_status_write.write("  ------------------------" + "\n\n")
    # We are tracking drift adaptivity
    # namely labeled drift detection

    # Set up explicit drift detection params
    explicit_drift_param_grid = {
        "allow_explicit_drift": [(True, "ExpDr")],
        "explicit_drift_class": [("LabeledDriftDetector", "LDD")],
        "explicit_drift_mode": [("PageHinkley", "PageHinkley"),
                                ("ADWIN", "ADWIN"), ("EDDM", "EDDM"),
                                ("DDM", "DDM")],
        "explicit_update_mode": [("all", "A"), ("errors", "E")],
        "allow_unlabeled_drift": [(False, "")],
        "allow_update_schedule": [(False, "")],
        "weight_method": [("unweighted", "U"), ("performance", "P")],
        "select_method": [("recent", "RR"), ("recent-new", "RN"),
                          ("recent-updates", "RU")],
        "filter_method": [("no-filter", "F"), ("top-k", "T"),
                          ("nearest", "N")],
        "kval": [(5, "5"), (10, "10")]
    }
    explicit_drift_params = ParameterGrid(explicit_drift_param_grid)

    for param_set in explicit_drift_params:
        # This is an experiment
        if param_set["explicit_update_mode"][0] == "all":
            continue
        # Load up configuration file
        mlepConfig = io_utils.load_json('./MLEPServer.json')

        # Update config file and generate an experiment name
        experiment_name = ''
        for _param in param_set:
            if param_set[_param][1] != "":
                experiment_name += param_set[_param][1] + '-'
            mlepConfig["config"][_param] = param_set[_param][0]
        experiment_name = experiment_name[:-1]

        # Now we have the Experimental Coonfig we can use for running an experiment
        # generate an experiment name
        exp_status_write.write("--STATUS-- " + experiment_name + "   ")
        exp_status_write.flush()
        try:
            runExperiment(runname, mlepConfig, experiment_name, expstatslog,
                          earlystop)
            exp_status_write.write("SUCCESS\n")
        except Exception as e:
            exp_status_write.write("FAILED\n")
            exp_status_write.write(traceback.format_exc())
            exp_status_write.write(str(e))
            exp_status_write.write("\n")
            exp_status_write.flush()
            mlflow.end_run()
        exp_status_write.flush()

    exp_status_write.write("\n\n")
    exp_status_write.write("--------------------------")
    exp_status_write.write("  FINISHED EXECUTION OF (" + runname + ") AT " +
                           str(time_utils.readable_time("%Y-%m-%d %H:%M:%S")))
    exp_status_write.write("  ------------------------" + "\n\n")
    exp_status_write.close()
Esempio n. 4
0
def main(experimentname, allow_explicit_drift, explicit_drift_class,
         explicit_drift_mode, explicit_update_mode, allow_unlabeled_drift,
         unlabeled_drift_class, unlabeled_drift_mode, unlabeled_update_mode,
         allow_update_schedule, update_schedule, schedule_update_mode,
         weight_method, select_method, filter_method, kval, update_prune):

    # Tracking URI -- yeah it's not very secure, but w/e
    # mlflow.set_tracking_uri("mysql://*****:*****@127.0.0.1:3306/mlflow_runs")
    # Where to save data:
    # mlflow.start_run(run_name=experimentname)

    # We'll load the config file, make changes, and write a secondary file for experiments
    mlepConfig = io_utils.load_json('./MLEPServer.json')

    for _item in mlepConfig["config"]:
        try:
            mlepConfig["config"][_item] = eval(_item)
        except NameError:
            pass

    # Log relevant details
    """
    for _key in mlepConfig["config"]:
        # possible error
        if _key != "drift_metrics":
            mlflow.log_param(_key, mlepConfig["config"][_key])
    """

    internalTimer = 0
    streamData = StreamLocal.StreamLocal(
        data_source="data/2014_to_dec2018.json",
        data_mode="single",
        data_set_class=PseudoJsonTweets.PseudoJsonTweets)

    augmentation = BatchedLocal.BatchedLocal(
        data_source='data/collectedIrrelevant.json',
        data_mode="single",
        data_set_class=PseudoJsonTweets.PseudoJsonTweets)
    augmentation.load_by_class()

    trainingData = BatchedLocal.BatchedLocal(
        data_source='data/initialTrainingData.json',
        data_mode="single",
        data_set_class=PseudoJsonTweets.PseudoJsonTweets)
    trainingData.load()

    # Now we have the data
    MLEPLearner = MLEPServer.MLEPLearningServer(config_dict=mlepConfig,
                                                safe_mode=False)

    # Perform initial traininig
    MLEPLearner.initialTrain(traindata=trainingData)
    io_utils.std_flush("Completed training at", time_utils.readable_time())
    MLEPLearner.addAugmentation(augmentation)
    io_utils.std_flush("Added augmentation at", time_utils.readable_time())

    totalCounter = 0.0
    mistakes = []
    while streamData.next():
        if internalTimer < streamData.getObject().getValue("timestamp"):
            internalTimer = streamData.getObject().getValue("timestamp")
            MLEPLearner.updateTime(internalTimer)

        classification = MLEPLearner.classify(streamData.getObject())
        totalCounter += 1.0
        if classification != streamData.getLabel():
            mistakes.append(1.0)
        else:
            mistakes.append(0.0)
        if totalCounter % 1000 == 0 and totalCounter > 0.0:
            io_utils.std_flush("Completed", int(totalCounter),
                               " samples, with running error (past 100) of",
                               sum(mistakes[-100:]) / 100.0)
        if totalCounter % 100 == 0 and totalCounter > 0.0:
            running_error = sum(mistakes[-100:]) / 100.0
            io_utils.std_flush("\tCompleted", int(totalCounter),
                               " samples, with running error (past 100) of",
                               running_error)
            #mlflow.log_metric("running_err"+str(int(totalCounter/100)), running_error)

    MLEPLearner.shutdown()

    io_utils.std_flush(
        "\n-----------------------------\nCOMPLETED\n-----------------------------\n"
    )