def run_all(self):
     arg_parse = self.arg_parse()
     parsed_cli = arg_parse.parse_args()
     parsed_cli_dict = parsed_cli.__dict__
     logging.debug("command line arguments: %s", parsed_cli_dict)
     parameter_assignments, num_epochs = self.get_run_arguments(parsed_cli_dict)
     training_data, validation_data, augmented_data_directory = self.load_datasets(
         parsed_cli_arguments=parsed_cli_dict, dataset_parameters=parameter_assignments)
     sigopt.log_metadata('augmented_directory_name', augmented_data_directory)
     logging.info("augmentation data directory at: {}".format(augmented_data_directory))
     parameter_assignments.update(parsed_cli_dict)
     self.run(parameter_assignments, num_epochs, training_data, validation_data)
     if parsed_cli_dict[AugmentCLI.STORE_TO_DISK.value] is False or parsed_cli_dict[AugmentCLI.STORE_TO_S3.value] \
         is True:
         logging.info("deleting augmented data directory stored on disk: {}".format(augmented_data_directory))
         shutil.rmtree(augmented_data_directory)
Example #2
0
def train_keras_model(dataset):
    tf.debugging.set_log_device_placement(True)

    # set tf seed
    seed_value = sigopt.get_parameter('random_seed', default=1)
    tf.compat.v1.set_random_seed(seed_value)

    print("loading and transforming data")
    load_transform_data = LoadTransformData()
    trainX, testX, trainY, testY = load_transform_data.load_split_dataset(
        dataset)
    scaled_trainX, scaled_testX = load_transform_data.scale_dataset(
        trainX, testX)

    # logging to sigopt Run
    sigopt.log_model("keras.Sequential")  # model_keras.__class__
    sigopt.log_dataset('Scaled Anomaly detection')
    sigopt.log_metadata('Training Records', len(scaled_trainX))
    sigopt.log_metadata('Testing Reccords', len(scaled_testX))
    sigopt.log_metadata("Platform", platform.uname())

    learning_rate = sigopt.get_parameter('learning_rate', default=0.01)
    loss_fn = sigopt.get_parameter('loss_function',
                                   default='binary_crossentropy')
    batch_size = sigopt.get_parameter('batch_size', default=4096)
    sigopt.get_parameter('layers',
                         3)  # tracking number of layers to SigOpt Run
    num_epochs = sigopt.get_parameter('epochs', default=6)

    keras_model = KerasModel()
    model_keras = keras_model.create_model(trainX)
    model_keras.compile(optimizer=Adam(lr=learning_rate),
                        loss=loss_fn,
                        metrics=[tf.keras.metrics.AUC()])

    model_keras.fit(
        scaled_trainX,
        trainY,
        batch_size=batch_size,
        epochs=num_epochs,
        callbacks=[CheckpointCB()],
        validation_data=(scaled_testX, testY),
    )

    # Collect model metrics
    start = time.perf_counter()
    probability = model_keras.predict(scaled_testX).flatten()
    prediction = probability > 0.5

    sigopt.log_metric('Inference Time', time.perf_counter() - start)
    log_inference_metrics(prediction, probability, testY, testX)
Example #3
0
def train_xgboost_model(dataset, random_state=1):
    print("loading and transforming data")
    load_transform_data = LoadTransformData()
    trainX, testX, trainY, testY = load_transform_data.load_split_dataset(
        dataset)

    # model architecture
    sigopt.log_model("XGBClassifier")  # model_keras.__class__
    sigopt.log_dataset('Unscaled')
    sigopt.log_metadata('Training Records', len(trainX))
    sigopt.log_metadata('Testing Reccords', len(testX))
    sigopt.log_metadata("Platform", platform.uname())

    parameters = {
        'objective': 'binary:logistic',
        'learning_rate': sigopt.get_parameter('learning_rate', default=0.3),
        'n_estimators': sigopt.get_parameter('n_estimators', default=20),
        'max_depth': sigopt.get_parameter('max_depth', default=5),
        'gamma': sigopt.get_parameter('gamma', default=0),
        'min_child_weight': sigopt.get_parameter('min_child_weight',
                                                 default=1),
        'random_state': random_state,
        'importance_type': 'gain',
        'missing': None,
        'verbosity': 2
    }

    model = XGBClassifier(**parameters)

    modelfit = model.fit(trainX, trainY)

    # Collect model metrics
    start = time.perf_counter()
    prediction = modelfit.predict(testX)
    sigopt.log_metric("Inference Time", time.perf_counter() - start)
    probability = modelfit.predict_proba(testX)[:, 1]
    log_inference_metrics(prediction, probability, testY, testX)
Example #4
0
def execute_model(run):
  # train a model
  # evaluate a model
  # return the accuracy
  raise NotImplementedError("Return a number, which represents your metric for this run")

if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument('--budget', type=int, default=20)
  parser.add_argument('--client_token', required=True, help="Find your CLIENT_TOKEN at https://sigopt.com/tokens")
  the_args = parser.parse_args()

  # Descriptor of what kind of dataset you are modeling
  sigopt.log_dataset("Example dataset")
  # Useful for keeping track of where you got the data
  sigopt.log_metadata(key="Dataset Source", value="Example Source")
  # e.g. Sklern, xgboost, etc.
  sigopt.log_metadata(key="Feature Pipeline Name", value="Example Pipeline")
  # What kind of learning you are attemping
  sigopt.log_model("Example Model Technique")
  # Create an experiment with one paramter, x
  experiment = sigopt.create_experiment(
    name="Basic Test experiment",
    parameters=[{'name': 'x', 'bounds': {'max': 50.0, 'min': 0.0}, 'type': 'double'}],
    metrics=[{"name":"holdout_accuracy", "objective":"maximize"}],
    parallel_bandwidth=1,
    budget=the_args.budget,
  )
  print('Created experiment id {0}'.format(experiment.id))

  # In a loop: receive a suggestion, evaluate the metric, report an observation
Example #5
0
    diff = df["diff"].to_numpy()

    if (des == "vae"):
        temp = df["mat"].tolist()
        mat = list([i.flatten() for i in temp])

    elif (des == "auto"):
        temp = df["mat"].tolist()
        mat = list([i.flatten() for i in temp])
    else:
        mat = df["mat"].to_numpy()

    if (sigopt_tf == True):
        sigopt.log_dataset(name=dir_temp + " " + des)
        sigopt.log_model(type=algo)
        sigopt.log_metadata('input_features', np.shape(mat[0]))
    try:
        mat = preprocessing.scale(np.array(mat))
    except:
        mat = list(mat)
        mat = preprocessing.scale(np.array(mat))

    print("Using " + des + " as the descriptor")
    print("Matrix Dimensions: {0}".format(np.shape(mat)))

    # finish optimization
    if (homo_tf == True):
        des = des + "_homo"
        print(".........................H**O..................")
        scale_HOMO = (np.max(H**O) - np.min(H**O))
        H**O = (H**O - np.min(H**O)) / scale_HOMO