def main(): io_utils.std_flush("Initialized at %s" % time_utils.readable_time("%H:%M:%S")) _encoder = w2vGoogleNews.w2vGoogleNews() _encoder.setup() io_utils.std_flush("Set up encoder at %s" % time_utils.readable_time("%H:%M:%S")) trainingData = BatchedLocal.BatchedLocal( data_source='./data/pure_new_dataset.json', data_mode="single", data_set_class=PseudoJsonTweets.PseudoJsonTweets) trainingData.load() io_utils.std_flush("Loaded training data at %s" % time_utils.readable_time("%H:%M:%S")) X_train = _encoder.batchEncode(trainingData.getData()) y_train = trainingData.getLabels() io_utils.std_flush("Batch encoded data at %s" % time_utils.readable_time("%H:%M:%S")) model = kerasComplex.kerasComplex() io_utils.std_flush("Generated model at %s" % time_utils.readable_time("%H:%M:%S")) io_utils.std_flush("Starting training at %s" % time_utils.readable_time("%H:%M:%S")) precision, recall, score = model.fit_and_test(X_train, y_train) io_utils.std_flush( "Completed training with precision: %f\trecall: %f\tscore: %f" % (precision, recall, score)) pdb.set_trace()
def main(experimentname): # f_write = open(experimentname + ".txt", "a") # set up the base config mlepConfig = io_utils.load_json("./MLEPServer.json") # update as per experiment requires mlepConfig["config"]["weight_method"] = "unweighted" mlepConfig["config"]["select_method"] = "recent" mlepConfig["config"]["filter_select"] = "nearest" # we are not updating internal timer... streamData = StreamLocal.StreamLocal( data_source="./data/realisticStreamComb_2013_feb19.json", data_mode="single", data_set_class=PseudoJsonTweets.PseudoJsonTweets) augmentation = BatchedLocal.BatchedLocal( data_source="./data/collectedIrrelevant.json", data_mode="single", data_set_class=PseudoJsonTweets.PseudoJsonTweets) augmentation.load_by_class() trainingData = BatchedLocal.BatchedLocal( data_source="./data/initialTrainingData.json", data_mode="single", data_set_class=PseudoJsonTweets.PseudoJsonTweets) trainingData.load() MLEPLearner = MLEPModelDriftAdaptor.MLEPModelDriftAdaptor( config_dict=mlepConfig) MLEPLearner.initialTrain(traindata=trainingData) io_utils.std_flush("Completed training at", time_utils.readable_time()) MLEPLearner.addAugmentation(augmentation) io_utils.std_flush("Added augmentation at", time_utils.readable_time()) totalCounter = 0 implicit_mistakes = 0.0 implicit_count = 0 explicit_mistakes = 0.0 explicit_count = 0 implicit_error_rate = [] explicit_error_rate = [] while streamData.next(): if streamData.getLabel() is None: classification = MLEPLearner.classify(streamData.getObject(), classify_mode="implicit") if classification != streamData.getObject().getValue("true_label"): implicit_mistakes += 1.0 implicit_count += 1 else: classification = MLEPLearner.classify(streamData.getObject(), classify_mode="explicit") if classification != streamData.getLabel(): explicit_mistakes += 1.0 explicit_count += 1 totalCounter += 1 if totalCounter % 100 == 0 and totalCounter > 0.0: implicit_running_error = 2.00 explicit_running_error = 2.00 if implicit_count: implicit_running_error = implicit_mistakes / float( implicit_count) if explicit_count: explicit_running_error = explicit_mistakes / float( explicit_count) io_utils.std_flush( "Fin: %6i samples\t\texplicit error: %2.4f\t\t implicit error: %2.4f" % (totalCounter, explicit_running_error, implicit_running_error)) implicit_error_rate.append(implicit_running_error) explicit_error_rate.append(explicit_running_error) implicit_mistakes = 0.0 implicit_count = 0 explicit_mistakes = 0.0 explicit_count = 0 f_write.write(experimentname + ",implicit," + ",".join([str(item) for item in implicit_error_rate]) + "\n") f_write.write(experimentname + ",explicit," + ",".join([str(item) for item in explicit_error_rate]) + "\n") f_write.close()
def main(runname, expstatslog, mlflowlog, earlystop): if mlflowlog: pass else: global mlflow mlflow = dumbflow() if expstatslog: exp_status_write = open(EXP_STATUS, "a") else: exp_status_write = sys.stdout exp_status_write.write("\n\n\n\n") exp_status_write.write("--------------------------") exp_status_write.write(" BEGINNING NEW EXECUTION (" + runname + ") AT " + str(time_utils.readable_time("%Y-%m-%d %H:%M:%S"))) exp_status_write.write(" ------------------------" + "\n\n") # We are tracking drift adaptivity # namely labeled drift detection # Set up explicit drift detection params explicit_drift_param_grid = { "allow_explicit_drift": [(True, "ExpDr")], "explicit_drift_class": [("LabeledDriftDetector", "LDD")], "explicit_drift_mode": [("PageHinkley", "PageHinkley"), ("ADWIN", "ADWIN"), ("EDDM", "EDDM"), ("DDM", "DDM")], "explicit_update_mode": [("all", "A"), ("errors", "E")], "allow_unlabeled_drift": [(False, "")], "allow_update_schedule": [(False, "")], "weight_method": [("unweighted", "U"), ("performance", "P")], "select_method": [("recent", "RR"), ("recent-new", "RN"), ("recent-updates", "RU")], "filter_method": [("no-filter", "F"), ("top-k", "T"), ("nearest", "N")], "kval": [(5, "5"), (10, "10")] } explicit_drift_params = ParameterGrid(explicit_drift_param_grid) for param_set in explicit_drift_params: # This is an experiment if param_set["explicit_update_mode"][0] == "all": continue # Load up configuration file mlepConfig = io_utils.load_json('./MLEPServer.json') # Update config file and generate an experiment name experiment_name = '' for _param in param_set: if param_set[_param][1] != "": experiment_name += param_set[_param][1] + '-' mlepConfig["config"][_param] = param_set[_param][0] experiment_name = experiment_name[:-1] # Now we have the Experimental Coonfig we can use for running an experiment # generate an experiment name exp_status_write.write("--STATUS-- " + experiment_name + " ") exp_status_write.flush() try: runExperiment(runname, mlepConfig, experiment_name, expstatslog, earlystop) exp_status_write.write("SUCCESS\n") except Exception as e: exp_status_write.write("FAILED\n") exp_status_write.write(traceback.format_exc()) exp_status_write.write(str(e)) exp_status_write.write("\n") exp_status_write.flush() mlflow.end_run() exp_status_write.flush() exp_status_write.write("\n\n") exp_status_write.write("--------------------------") exp_status_write.write(" FINISHED EXECUTION OF (" + runname + ") AT " + str(time_utils.readable_time("%Y-%m-%d %H:%M:%S"))) exp_status_write.write(" ------------------------" + "\n\n") exp_status_write.close()
def runExperiment(runname, mlepConfig, experiment_name, expstatuslog, earlystop): # set up mlflow access # mlflow.set_tracking_uri -- not needed, defaults to mlruns # mlflow.create_experiment -- need experiment name. Should I programmatically create one? or go by timestamp if expstatuslog: sys.stdout = open(LOG_FILE, "w") else: sys.stdout = dumbwrite() mlflow.set_tracking_uri("mysql://*****:*****@127.0.0.1:3306/mlflow_runs") mlflow.start_run(run_name=runname) # Log relevant details for _key in mlepConfig["config"]: # possible error if _key != "drift_metrics": mlflow.log_param(_key, mlepConfig["config"][_key]) mlflow.log_param("experiment_name", experiment_name) internalTimer = 0 streamData = StreamLocal.StreamLocal( data_source="data/2014_to_dec2018.json", data_mode="single", data_set_class=PseudoJsonTweets.PseudoJsonTweets) augmentation = BatchedLocal.BatchedLocal( data_source='data/collectedIrrelevant.json', data_mode="single", data_set_class=PseudoJsonTweets.PseudoJsonTweets) augmentation.load_by_class() trainingData = BatchedLocal.BatchedLocal( data_source='data/initialTrainingData.json', data_mode="single", data_set_class=PseudoJsonTweets.PseudoJsonTweets) trainingData.load() # Now we have the data MLEPLearner = MLEPServer.MLEPLearningServer(config_dict=mlepConfig, safe_mode=False) # Perform initial traininig MLEPLearner.initialTrain(traindata=trainingData) io_utils.std_flush("Completed training at", time_utils.readable_time()) MLEPLearner.addAugmentation(augmentation) io_utils.std_flush("Added augmentation at", time_utils.readable_time()) totalCounter = 0.0 mistakes = [] _earlystopcond = False while streamData.next() and not _earlystopcond: if internalTimer < streamData.getObject().getValue("timestamp"): internalTimer = streamData.getObject().getValue("timestamp") MLEPLearner.updateTime(internalTimer) classification = MLEPLearner.classify(streamData.getObject()) totalCounter += 1.0 if classification != streamData.getLabel(): mistakes.append(1.0) else: mistakes.append(0.0) if totalCounter % 1000 == 0 and totalCounter > 0.0: io_utils.std_flush("Completed", int(totalCounter), " samples, with running error (past 100) of", sum(mistakes[-100:]) / 100.0) if earlystop and totalCounter == earlystop: _earlystopcond = True if totalCounter % 100 == 0 and totalCounter > 0.0: running_error = sum(mistakes[-100:]) / 100.0 mlflow.log_metric("running_err" + str(int(totalCounter / 100)), running_error) MLEPLearner.shutdown() io_utils.std_flush( "\n-----------------------------\nCOMPLETED\n-----------------------------\n" ) mlflow.log_param("total_samples", totalCounter) if expstatuslog: mlflow.log_artifact(LOG_FILE) mlflow.log_param("run_complete", True) mlflow.end_run() if expstatuslog: sys.stdout.close() sys.stdout = sys.__stdout__ else: sys.stdout = sys.__stdout__
def main(experimentname, allow_explicit_drift, explicit_drift_class, explicit_drift_mode, explicit_update_mode, allow_unlabeled_drift, unlabeled_drift_class, unlabeled_drift_mode, unlabeled_update_mode, allow_update_schedule, update_schedule, schedule_update_mode, weight_method, select_method, filter_method, kval, update_prune): # Tracking URI -- yeah it's not very secure, but w/e # mlflow.set_tracking_uri("mysql://*****:*****@127.0.0.1:3306/mlflow_runs") # Where to save data: # mlflow.start_run(run_name=experimentname) # We'll load the config file, make changes, and write a secondary file for experiments mlepConfig = io_utils.load_json('./MLEPServer.json') # Get the option if in args, else use the one present in the config... for _item in mlepConfig["config"]: try: mlepConfig["config"][_item] = eval( _item ) # If option not given furing launch, eval(*) will return NameError except NameError: pass # Here the value of config in the MLEPServer.json file is used internalTimer = 0 streamData = StreamLocal.StreamLocal( data_source="data/realisticStreamComb_2013_feb19.json", data_mode="single", data_set_class=PseudoJsonTweets.PseudoJsonTweets) augmentation = BatchedLocal.BatchedLocal( data_source='data/collectedIrrelevant.json', data_mode="single", data_set_class=PseudoJsonTweets.PseudoJsonTweets) augmentation.load_by_class() trainingData = BatchedLocal.BatchedLocal( data_source='data/initialTrainingData.json', data_mode="single", data_set_class=PseudoJsonTweets.PseudoJsonTweets) trainingData.load() # Now we have the data MLEPLearner = MLEPDriftAdaptor.MLEPDriftAdaptor(config_dict=mlepConfig, safe_mode=False) # Perform initial traininig MLEPLearner.initialTrain(traindata=trainingData) io_utils.std_flush("Completed training at", time_utils.readable_time()) MLEPLearner.addAugmentation(augmentation) io_utils.std_flush("Added augmentation at", time_utils.readable_time()) totalCounter = 0 implicit_mistakes = 0.0 implicit_count = 0 explicit_mistakes = 0.0 explicit_count = 0 while streamData.next(): if internalTimer < streamData.getObject().getValue("timestamp"): internalTimer = streamData.getObject().getValue("timestamp") MLEPLearner.updateTime(internalTimer) if streamData.getLabel() is None: classification = MLEPLearner.classify(streamData.getObject(), "implicit") else: classification = MLEPLearner.classify(streamData.getObject(), "explicit") if streamData.getLabel() is None: if classification != streamData.getObject().getValue("true_label"): implicit_mistakes += 1.0 implicit_count += 1 else: if classification != streamData.getLabel(): explicit_mistakes += 1.0 explicit_count += 1 totalCounter += 1 if totalCounter % 100 == 0 and totalCounter > 0.0: implicit_running_error = 2.00 explicit_running_error = 2.00 if implicit_count: implicit_running_error = implicit_mistakes / float( implicit_count) if explicit_count: explicit_running_error = explicit_mistakes / float( explicit_count) io_utils.std_flush( "Fin: %6i samples\t\texplicit error: %2.4f\t\t implicit error: %2.4f" % (totalCounter, explicit_running_error, implicit_running_error)) implicit_mistakes = 0.0 implicit_count = 0 explicit_mistakes = 0.0 explicit_count = 0 MLEPLearner.shutdown() io_utils.std_flush( "\n-----------------------------\nCOMPLETED\n-----------------------------\n" )
def main(experimentname, allow_explicit_drift, explicit_drift_class, explicit_drift_mode, explicit_update_mode, allow_unlabeled_drift, unlabeled_drift_class, unlabeled_drift_mode, unlabeled_update_mode, allow_update_schedule, update_schedule, schedule_update_mode, weight_method, select_method, filter_method, kval, update_prune): # Tracking URI -- yeah it's not very secure, but w/e # mlflow.set_tracking_uri("mysql://*****:*****@127.0.0.1:3306/mlflow_runs") # Where to save data: # mlflow.start_run(run_name=experimentname) # We'll load the config file, make changes, and write a secondary file for experiments mlepConfig = io_utils.load_json('./MLEPServer.json') for _item in mlepConfig["config"]: try: mlepConfig["config"][_item] = eval(_item) except NameError: pass # Log relevant details """ for _key in mlepConfig["config"]: # possible error if _key != "drift_metrics": mlflow.log_param(_key, mlepConfig["config"][_key]) """ internalTimer = 0 streamData = StreamLocal.StreamLocal( data_source="data/2014_to_dec2018.json", data_mode="single", data_set_class=PseudoJsonTweets.PseudoJsonTweets) augmentation = BatchedLocal.BatchedLocal( data_source='data/collectedIrrelevant.json', data_mode="single", data_set_class=PseudoJsonTweets.PseudoJsonTweets) augmentation.load_by_class() trainingData = BatchedLocal.BatchedLocal( data_source='data/initialTrainingData.json', data_mode="single", data_set_class=PseudoJsonTweets.PseudoJsonTweets) trainingData.load() # Now we have the data MLEPLearner = MLEPServer.MLEPLearningServer(config_dict=mlepConfig, safe_mode=False) # Perform initial traininig MLEPLearner.initialTrain(traindata=trainingData) io_utils.std_flush("Completed training at", time_utils.readable_time()) MLEPLearner.addAugmentation(augmentation) io_utils.std_flush("Added augmentation at", time_utils.readable_time()) totalCounter = 0.0 mistakes = [] while streamData.next(): if internalTimer < streamData.getObject().getValue("timestamp"): internalTimer = streamData.getObject().getValue("timestamp") MLEPLearner.updateTime(internalTimer) classification = MLEPLearner.classify(streamData.getObject()) totalCounter += 1.0 if classification != streamData.getLabel(): mistakes.append(1.0) else: mistakes.append(0.0) if totalCounter % 1000 == 0 and totalCounter > 0.0: io_utils.std_flush("Completed", int(totalCounter), " samples, with running error (past 100) of", sum(mistakes[-100:]) / 100.0) if totalCounter % 100 == 0 and totalCounter > 0.0: running_error = sum(mistakes[-100:]) / 100.0 io_utils.std_flush("\tCompleted", int(totalCounter), " samples, with running error (past 100) of", running_error) #mlflow.log_metric("running_err"+str(int(totalCounter/100)), running_error) MLEPLearner.shutdown() io_utils.std_flush( "\n-----------------------------\nCOMPLETED\n-----------------------------\n" )