Example #1
0
def train_keras_model(dataset):
    tf.debugging.set_log_device_placement(True)

    # set tf seed
    seed_value = sigopt.get_parameter('random_seed', default=1)
    tf.compat.v1.set_random_seed(seed_value)

    print("loading and transforming data")
    load_transform_data = LoadTransformData()
    trainX, testX, trainY, testY = load_transform_data.load_split_dataset(
        dataset)
    scaled_trainX, scaled_testX = load_transform_data.scale_dataset(
        trainX, testX)

    # logging to sigopt Run
    sigopt.log_model("keras.Sequential")  # model_keras.__class__
    sigopt.log_dataset('Scaled Anomaly detection')
    sigopt.log_metadata('Training Records', len(scaled_trainX))
    sigopt.log_metadata('Testing Reccords', len(scaled_testX))
    sigopt.log_metadata("Platform", platform.uname())

    learning_rate = sigopt.get_parameter('learning_rate', default=0.01)
    loss_fn = sigopt.get_parameter('loss_function',
                                   default='binary_crossentropy')
    batch_size = sigopt.get_parameter('batch_size', default=4096)
    sigopt.get_parameter('layers',
                         3)  # tracking number of layers to SigOpt Run
    num_epochs = sigopt.get_parameter('epochs', default=6)

    keras_model = KerasModel()
    model_keras = keras_model.create_model(trainX)
    model_keras.compile(optimizer=Adam(lr=learning_rate),
                        loss=loss_fn,
                        metrics=[tf.keras.metrics.AUC()])

    model_keras.fit(
        scaled_trainX,
        trainY,
        batch_size=batch_size,
        epochs=num_epochs,
        callbacks=[CheckpointCB()],
        validation_data=(scaled_testX, testY),
    )

    # Collect model metrics
    start = time.perf_counter()
    probability = model_keras.predict(scaled_testX).flatten()
    prediction = probability > 0.5

    sigopt.log_metric('Inference Time', time.perf_counter() - start)
    log_inference_metrics(prediction, probability, testY, testX)
Example #2
0
def train_xgboost_model(dataset, random_state=1):
    print("loading and transforming data")
    load_transform_data = LoadTransformData()
    trainX, testX, trainY, testY = load_transform_data.load_split_dataset(
        dataset)

    # model architecture
    sigopt.log_model("XGBClassifier")  # model_keras.__class__
    sigopt.log_dataset('Unscaled')
    sigopt.log_metadata('Training Records', len(trainX))
    sigopt.log_metadata('Testing Reccords', len(testX))
    sigopt.log_metadata("Platform", platform.uname())

    parameters = {
        'objective': 'binary:logistic',
        'learning_rate': sigopt.get_parameter('learning_rate', default=0.3),
        'n_estimators': sigopt.get_parameter('n_estimators', default=20),
        'max_depth': sigopt.get_parameter('max_depth', default=5),
        'gamma': sigopt.get_parameter('gamma', default=0),
        'min_child_weight': sigopt.get_parameter('min_child_weight',
                                                 default=1),
        'random_state': random_state,
        'importance_type': 'gain',
        'missing': None,
        'verbosity': 2
    }

    model = XGBClassifier(**parameters)

    modelfit = model.fit(trainX, trainY)

    # Collect model metrics
    start = time.perf_counter()
    prediction = modelfit.predict(testX)
    sigopt.log_metric("Inference Time", time.perf_counter() - start)
    probability = modelfit.predict_proba(testX)[:, 1]
    log_inference_metrics(prediction, probability, testY, testX)
Example #3
0
# Take a suggestion from sigopt and evaluate your function
def execute_model(run):
  # train a model
  # evaluate a model
  # return the accuracy
  raise NotImplementedError("Return a number, which represents your metric for this run")

if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument('--budget', type=int, default=20)
  parser.add_argument('--client_token', required=True, help="Find your CLIENT_TOKEN at https://sigopt.com/tokens")
  the_args = parser.parse_args()

  # Descriptor of what kind of dataset you are modeling
  sigopt.log_dataset("Example dataset")
  # Useful for keeping track of where you got the data
  sigopt.log_metadata(key="Dataset Source", value="Example Source")
  # e.g. Sklern, xgboost, etc.
  sigopt.log_metadata(key="Feature Pipeline Name", value="Example Pipeline")
  # What kind of learning you are attemping
  sigopt.log_model("Example Model Technique")
  # Create an experiment with one paramter, x
  experiment = sigopt.create_experiment(
    name="Basic Test experiment",
    parameters=[{'name': 'x', 'bounds': {'max': 50.0, 'min': 0.0}, 'type': 'double'}],
    metrics=[{"name":"holdout_accuracy", "objective":"maximize"}],
    parallel_bandwidth=1,
    budget=the_args.budget,
  )
  print('Created experiment id {0}'.format(experiment.id))
import sigopt 
sigopt.log_dataset("rdkit")
Example #5
0
# model.py  
import sklearn.datasets 
import sklearn.metrics 
from xgboost import XGBClassifier 
import sigopt 
 
# Data preparation required to run and evaluate the sample model 
X, y = sklearn.datasets.load_iris(return_X_y=True) 
Xtrain, ytrain = X[:100], y[:100] 

# Track the name of the dataset used for your Run 
sigopt.log_dataset('iris 2/3 training, full test') 
# Set n_estimators as the hyperparameter to explore for your Experiment 
sigopt.params.setdefault("n_estimators", 100) 
# Track the name of the model used for your Run 
sigopt.log_model('xgboost') 

# Instantiate and train your sample model 
model = XGBClassifier( 
  n_estimators=sigopt.params.n_estimators, 
  use_label_encoder=False, 
  eval_metric='logloss', 
) 
model.fit(Xtrain, ytrain) 
pred = model.predict(X) 

# Track the metric value and metric name for each Run 
sigopt.log_metric("accuracy", sklearn.metrics.accuracy_score(pred, y)) 
Example #6
0
    H**O = df["H**O"].to_numpy()
    HOMO_1 = df["H**O-1"].to_numpy()
    diff = df["diff"].to_numpy()

    if (des == "vae"):
        temp = df["mat"].tolist()
        mat = list([i.flatten() for i in temp])

    elif (des == "auto"):
        temp = df["mat"].tolist()
        mat = list([i.flatten() for i in temp])
    else:
        mat = df["mat"].to_numpy()

    if (sigopt_tf == True):
        sigopt.log_dataset(name=dir_temp + " " + des)
        sigopt.log_model(type=algo)
        sigopt.log_metadata('input_features', np.shape(mat[0]))
    try:
        mat = preprocessing.scale(np.array(mat))
    except:
        mat = list(mat)
        mat = preprocessing.scale(np.array(mat))

    print("Using " + des + " as the descriptor")
    print("Matrix Dimensions: {0}".format(np.shape(mat)))

    # finish optimization
    if (homo_tf == True):
        des = des + "_homo"
        print(".........................H**O..................")