def run_all(self): arg_parse = self.arg_parse() parsed_cli = arg_parse.parse_args() parsed_cli_dict = parsed_cli.__dict__ logging.debug("command line arguments: %s", parsed_cli_dict) parameter_assignments, num_epochs = self.get_run_arguments(parsed_cli_dict) training_data, validation_data, augmented_data_directory = self.load_datasets( parsed_cli_arguments=parsed_cli_dict, dataset_parameters=parameter_assignments) sigopt.log_metadata('augmented_directory_name', augmented_data_directory) logging.info("augmentation data directory at: {}".format(augmented_data_directory)) parameter_assignments.update(parsed_cli_dict) self.run(parameter_assignments, num_epochs, training_data, validation_data) if parsed_cli_dict[AugmentCLI.STORE_TO_DISK.value] is False or parsed_cli_dict[AugmentCLI.STORE_TO_S3.value] \ is True: logging.info("deleting augmented data directory stored on disk: {}".format(augmented_data_directory)) shutil.rmtree(augmented_data_directory)
def train_keras_model(dataset): tf.debugging.set_log_device_placement(True) # set tf seed seed_value = sigopt.get_parameter('random_seed', default=1) tf.compat.v1.set_random_seed(seed_value) print("loading and transforming data") load_transform_data = LoadTransformData() trainX, testX, trainY, testY = load_transform_data.load_split_dataset( dataset) scaled_trainX, scaled_testX = load_transform_data.scale_dataset( trainX, testX) # logging to sigopt Run sigopt.log_model("keras.Sequential") # model_keras.__class__ sigopt.log_dataset('Scaled Anomaly detection') sigopt.log_metadata('Training Records', len(scaled_trainX)) sigopt.log_metadata('Testing Reccords', len(scaled_testX)) sigopt.log_metadata("Platform", platform.uname()) learning_rate = sigopt.get_parameter('learning_rate', default=0.01) loss_fn = sigopt.get_parameter('loss_function', default='binary_crossentropy') batch_size = sigopt.get_parameter('batch_size', default=4096) sigopt.get_parameter('layers', 3) # tracking number of layers to SigOpt Run num_epochs = sigopt.get_parameter('epochs', default=6) keras_model = KerasModel() model_keras = keras_model.create_model(trainX) model_keras.compile(optimizer=Adam(lr=learning_rate), loss=loss_fn, metrics=[tf.keras.metrics.AUC()]) model_keras.fit( scaled_trainX, trainY, batch_size=batch_size, epochs=num_epochs, callbacks=[CheckpointCB()], validation_data=(scaled_testX, testY), ) # Collect model metrics start = time.perf_counter() probability = model_keras.predict(scaled_testX).flatten() prediction = probability > 0.5 sigopt.log_metric('Inference Time', time.perf_counter() - start) log_inference_metrics(prediction, probability, testY, testX)
def train_xgboost_model(dataset, random_state=1): print("loading and transforming data") load_transform_data = LoadTransformData() trainX, testX, trainY, testY = load_transform_data.load_split_dataset( dataset) # model architecture sigopt.log_model("XGBClassifier") # model_keras.__class__ sigopt.log_dataset('Unscaled') sigopt.log_metadata('Training Records', len(trainX)) sigopt.log_metadata('Testing Reccords', len(testX)) sigopt.log_metadata("Platform", platform.uname()) parameters = { 'objective': 'binary:logistic', 'learning_rate': sigopt.get_parameter('learning_rate', default=0.3), 'n_estimators': sigopt.get_parameter('n_estimators', default=20), 'max_depth': sigopt.get_parameter('max_depth', default=5), 'gamma': sigopt.get_parameter('gamma', default=0), 'min_child_weight': sigopt.get_parameter('min_child_weight', default=1), 'random_state': random_state, 'importance_type': 'gain', 'missing': None, 'verbosity': 2 } model = XGBClassifier(**parameters) modelfit = model.fit(trainX, trainY) # Collect model metrics start = time.perf_counter() prediction = modelfit.predict(testX) sigopt.log_metric("Inference Time", time.perf_counter() - start) probability = modelfit.predict_proba(testX)[:, 1] log_inference_metrics(prediction, probability, testY, testX)
def execute_model(run): # train a model # evaluate a model # return the accuracy raise NotImplementedError("Return a number, which represents your metric for this run") if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--budget', type=int, default=20) parser.add_argument('--client_token', required=True, help="Find your CLIENT_TOKEN at https://sigopt.com/tokens") the_args = parser.parse_args() # Descriptor of what kind of dataset you are modeling sigopt.log_dataset("Example dataset") # Useful for keeping track of where you got the data sigopt.log_metadata(key="Dataset Source", value="Example Source") # e.g. Sklern, xgboost, etc. sigopt.log_metadata(key="Feature Pipeline Name", value="Example Pipeline") # What kind of learning you are attemping sigopt.log_model("Example Model Technique") # Create an experiment with one paramter, x experiment = sigopt.create_experiment( name="Basic Test experiment", parameters=[{'name': 'x', 'bounds': {'max': 50.0, 'min': 0.0}, 'type': 'double'}], metrics=[{"name":"holdout_accuracy", "objective":"maximize"}], parallel_bandwidth=1, budget=the_args.budget, ) print('Created experiment id {0}'.format(experiment.id)) # In a loop: receive a suggestion, evaluate the metric, report an observation
diff = df["diff"].to_numpy() if (des == "vae"): temp = df["mat"].tolist() mat = list([i.flatten() for i in temp]) elif (des == "auto"): temp = df["mat"].tolist() mat = list([i.flatten() for i in temp]) else: mat = df["mat"].to_numpy() if (sigopt_tf == True): sigopt.log_dataset(name=dir_temp + " " + des) sigopt.log_model(type=algo) sigopt.log_metadata('input_features', np.shape(mat[0])) try: mat = preprocessing.scale(np.array(mat)) except: mat = list(mat) mat = preprocessing.scale(np.array(mat)) print("Using " + des + " as the descriptor") print("Matrix Dimensions: {0}".format(np.shape(mat))) # finish optimization if (homo_tf == True): des = des + "_homo" print(".........................H**O..................") scale_HOMO = (np.max(H**O) - np.min(H**O)) H**O = (H**O - np.min(H**O)) / scale_HOMO