def main(): global model_def global dataset global train args = argument_parser() config_module = importlib.import_module(args.config) configs = config_module.config model_def = importlib.import_module(args.model).model dataset = importlib.import_module(args.dataset).dataset train = importlib.import_module(args.trainer).train if 'algorithm' in configs.keys(): comet_config = parse_comet_config(configs) opt = Optimizer(comet_config, api_key=configs['API_KEY'], project_name=configs['project_name']) for exp in opt.get_experiments(): experiment = exp config = get_parameters(experiment, configs) train( **{ 'config': config, 'model_def': model_def, 'dataset': dataset, 'experiment': experiment }) else: if args.experiment: experiment = Experiment(api_key=configs['API_KEY'], project_name=configs['project_name'], workspace=configs['workspace']) else: experiment = None tried_configs = [] end = False while True: importlib.reload(config_module) configs = config_module.config possible_configs = get_configurations(configs) for config_idx, config in enumerate(possible_configs): if config_idx == len(possible_configs) - 1: end = True if config in tried_configs: continue else: tried_configs.append(config) train( **{ 'config': config, 'model_def': model_def, 'dataset': dataset, 'experiment': experiment }) break if end: break print("******************End of the training session****************")
def main(): num_classes = 10 # the data, shuffled and split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(60000, 784) x_test = x_test.reshape(10000, 784) x_train = x_train.astype("float32") x_test = x_test.astype("float32") x_train /= 255 x_test /= 255 print(x_train.shape[0], "train samples") print(x_test.shape[0], "test samples") # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) config = { "algorithm": "bayes", "name": "Optimize MNIST Network", "spec": { "maxCombo": 10, "objective": "minimize", "metric": "loss" }, "parameters": { "first_layer_units": { "type": "integer", "min": 1, "max": 1000 } }, "trials": 1, } opt = Optimizer(config) for experiment in opt.get_experiments(): flu = experiment.get_parameter("first_layer_units") loss = fit(experiment, x_train, y_train, x_test, y_test, 3, 120, flu) # Reverse the score for minimization experiment.log_metric("loss", loss)
def main(cfg: DictConfig): train_length = 500 val_length = 30 test_length = 585 opt = Optimizer( config, api_key='SWhvV0XPkHV8tPdU8Nv67EXxU', project_name=f'stock-gcn-experiment-sequences-{train_length}days') for experiment in opt.get_experiments(): seed = 1 seed_everything(seed) hidden_layer = experiment.get_parameter('hidden_layer') hidden_feature = experiment.get_parameter('hidden_feature') activation = experiment.get_parameter('activation') sequence_length = experiment.get_parameter('sequence_length') KOSPI200Dataset.setup(train_length=train_length, val_length=val_length, test_length=test_length, sequence_length=sequence_length ) # 2020.07.03 615일(개장일 기준) 이전이 2018.01.02 comet_logger = CometLogger( api_key="SWhvV0XPkHV8tPdU8Nv67EXxU", workspace="dldjwls3", # Optional project_name=f'stock-gcn-experiment-sequences-{train_length}days', experiment_name= f'gcn_{sequence_length}_{activation}_{hidden_layer}_{hidden_feature}', experiment_key=experiment.get_key()) model = Baseline(seed=seed, sequence_length=sequence_length, num_feature=5, hidden_layer=hidden_layer, hidden_feature=hidden_feature, activation=activation) trainer = Trainer(max_epochs=120, gpus=-1, logger=comet_logger) trainer.fit(model)
def optimize(max_n, model_name, dataset_name, batch_size, n_epochs, use_comet, comet_name, model_module_index=None): dataset_config, model = initialize_model(dataset_name, model_name, model_module_index) rocket = None #if use_comet: # rocket = CometConnection(comet_name=comet_name, dataset_config=dataset_config) # TODO: add the optimizer code to comet_connection.py params_range = get_params_range(model) params_range['spec']['maxCombo'] = max_n optimizer = Optimizer(params_range, api_key=COMET_KEY) for experiment in optimizer.get_experiments(project_name=PROJECT_NAME): experiment.set_name(comet_name) experiment.add_tag("optimizer_experiment") model_exp = model p = { k: experiment.get_parameter(k) for k in params_range['parameters'].keys() } model_exp.params = p model_exp = train_model(model, dataset_name, dataset_config, batch_size, n_epochs, compile_dict=COMPILE_DICT) loss = model_exp.test_results[0] experiment.log_metric("loss", loss)
"spec": { "metric": "ROC", "objective": "maximize", }, } parameters = open("parameters.yml") yamlparameters = yaml.load(parameters, Loader=yaml.FullLoader) opt = Optimizer(config, api_key=yamlparameters["comet_api_key"], project_name="NNqhmv6", auto_metric_logging=True) X_train, X_test, y_train, y_test = get_features(yamlparameters["DataDir"]) for experiment in opt.get_experiments(): keras_model = models.qdense_model( Input(shape=X_train.shape[1:]), l1Reg=experiment.get_parameter("Regularization"), bits=14, ints=2) #keras_model = models.dense_model(Input(shape=X_train.shape[1:]), l1Reg=experiment.get_parameter("Regularization")) startlearningrate = experiment.get_parameter("learning_rate") adam = Adam(lr=startlearningrate, beta_1=experiment.get_parameter("learning_beta1"), beta_2=experiment.get_parameter("learning_beta2"), amsgrad=experiment.get_parameter("Adagrad")) keras_model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['binary_accuracy'])
drug_feature_type=DrugFeatureType.GRAPH, drug_nan_processing=NanProcessing.NONE, drug_scaling_method=ScalingMethod.NONE, drug_featurizer_kwargs=None, # Random split disjoint_cells=False, disjoint_drugs=False, summary=True) node_attr_dim = trn_dset[0].x.shape[1] edge_attr_dim = trn_dset[0].edge_attr.shape[1] cell_input_dim = trn_dset[0].cell_data.shape[0] # Iterate through all different experiment configurations for experiment in comet_opt.get_experiments(): # Disable auto-collection of any metrics experiment.disable_mp() graph_model = experiment.get_parameter(name='graph_model') graph_state_dim = experiment.get_parameter(name='graph_state_dim') graph_num_conv = experiment.get_parameter(name='graph_num_conv') graph_out_dim = experiment.get_parameter(name='graph_out_dim') graph_attention_pooling = \ (experiment.get_parameter(name='graph_attention_pooling') == 'True') uno_dropout = experiment.get_parameter(name='uno_dropout') uno_state_dim = experiment.get_parameter(name='uno_state_dim') cell_state_dim = experiment.get_parameter(name='cell_state_dim')
self.fc1 = nn.Linear(4 * 4 * 50, 500) self.fc2 = nn.Linear(500, 10) def forward(self, x): x = F.relu(self.conv1(x)) x = F.max_pool2d(x, 2, 2) x = F.relu(self.conv2(x)) x = F.max_pool2d(x, 2, 2) x = x.view(-1, 4 * 4 * 50) x = F.relu(self.fc1(x)) x = self.fc2(x) return F.log_softmax(x, dim=1) # experiment = Experiment(project_name="pytorch") for experiment in optimizer.get_experiments(): # MNIST Dataset train_dataset = dsets.MNIST(root='./data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = dsets.MNIST(root='./data/', train=False, transform=transforms.ToTensor()) # Data Loader (Input Pipeline) train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=experiment.get_parameter('batch_size'),
def optimise(self): config = { # We pick the Bayes algorithm: "algorithm": "bayes", # Declare your hyperparameters in the Vizier-inspired format: "parameters": { "n_estimators": { "type": "integer", "min": self.n_estimators["min"], "max": self.n_estimators["max"], "scalingType": "uniform" }, "max_depth": { "type": "integer", "min": self.max_depth["min"], "max": self.max_depth["max"], "scalingType": "uniform" }, "learning_rate": { "type": "float", "min": self.learning_rate["min"], "max": self.learning_rate["max"], "scalingType": "uniform" }, "gamma": { "type": "float", "min": self.gamma["min"], "max": self.gamma["max"], "scalingType": "uniform" }, "subsample": { "type": "float", "min": self.subsample["min"], "max": self.subsample["max"], "scalingType": "uniform" }, "min_child_weight": { "type": "float", "min": self.min_child_weight["min"], "max": self.min_child_weight["max"], "scalingType": "uniform" }, "reg_alpha": { "type": "float", "min": self.alpha["min"], "max": self.alpha["max"], "scalingType": "uniform" }, }, # Declare what we will be optimizing, and how: "spec": { "metric": "ROC", "objective": "maximize", }, } opt = Optimizer(config, api_key=self.comet_api_key, project_name=self.comet_project_name, auto_metric_logging=True) for experiment in opt.get_experiments(): self.model.learning_rate = experiment.get_parameter( "learning_rate") self.model.n_estimators = experiment.get_parameter("n_estimators") self.model.subsample = experiment.get_parameter("subsample") self.model.max_depth = experiment.get_parameter("max_depth") self.model.gamma = experiment.get_parameter("gamma") self.model.reg_alpha = experiment.get_parameter("reg_alpha") self.model.min_child_weight = experiment.get_parameter( "min_child_weight") self.train() self.test() auc, binary_accuracy = self.evaluate() experiment.log_metric("ROC", auc) experiment.log_metric("Binary_Accuracy", binary_accuracy)
def optimise(self): config = { # We pick the Bayes algorithm: "algorithm": "bayes", # Declare your hyperparameters in the Vizier-inspired format: "parameters": { "max_depth": { "type": "integer", "min": self.max_depth["min"], "max": self.max_depth["max"], "scalingType": "uniform" }, "learning_rate": { "type": "float", "min": self.learning_rate["min"], "max": self.learning_rate["max"], "scalingType": "uniform" }, "gamma": { "type": "float", "min": self.gamma["min"], "max": self.gamma["max"], "scalingType": "uniform" }, "subsample": { "type": "float", "min": self.subsample["min"], "max": self.subsample["max"], "scalingType": "uniform" }, "min_child_weight": { "type": "float", "min": self.min_child_weight["min"], "max": self.min_child_weight["max"], "scalingType": "uniform" }, "reg_alpha": { "type": "float", "min": self.alpha["min"], "max": self.alpha["max"], "scalingType": "uniform" }, "early_stopping": { "type": "integer", "min": self.early_stopping["min"], "max": self.early_stopping["max"], "scalingType": "uniform" }, "rate_drop": { "type": "float", "min": self.rate_drop["min"], "max": self.rate_drop["max"], "scalingType": "uniform" }, "skip_drop": { "type": "float", "min": self.skip_drop["min"], "max": self.skip_drop["max"], "scalingType": "uniform" } }, # Declare what we will be optimizing, and how: "spec": { "metric": "ROC", "objective": "maximize", }, } opt = Optimizer(config, api_key=self.comet_api_key, project_name=self.comet_project_name, auto_metric_logging=True) for experiment in opt.get_experiments(): self.param["learning_rate"] = experiment.get_parameter( "learning_rate") self.num_rounds = self.n_estimators self.param["subsample"] = experiment.get_parameter("subsample") self.param["max_depth"] = experiment.get_parameter("max_depth") self.param["gamma"] = experiment.get_parameter("gamma") self.param["reg_alpha"] = experiment.get_parameter("reg_alpha") self.param["min_child_weight"] = experiment.get_parameter( "min_child_weight") self.param["rate_drop"] = experiment.get_parameter("rate_drop") self.param["skip_drop"] = experiment.get_parameter("skip_drop") self.early_stopping_rounds = experiment.get_parameter( "early_stopping") self.train() self.test() auc, binary_accuracy = self.evaluate() experiment.log_metric("ROC", auc) experiment.log_metric("Binary_Accuracy", binary_accuracy) experiment.log_metric("Best Boost Round", self.boost_rounds) experiment.log_metric("score", (auc / 0.5 - 1) + (1 - self.boost_rounds / self.n_estimators))
def main(alpha=0.5): scaler, yscaler, X_train, X_test, y_train, y_test = process_data() config = { # We pick the Bayes algorithm: 'algorithm': 'bayes', 'name': 'fine tune LightGBM', # Declare your hyperparameters in the Vizier-inspired format: 'parameters': { 'n_estimators': { 'type': 'integer', 'min': 50, 'max': 5000 }, 'max_depth': { 'type': 'integer', 'min': 10, 'max': 50 }, 'num_leaves': { 'type': 'integer', 'min': 100, 'max': 500 }, 'reg_alpha': { 'type': 'float', 'min': 0.00001, 'max': 0.2, 'scalingType': 'loguniform', }, 'reg_lambda': { 'type': 'float', 'min': 0.00001, 'max': 0.2, 'scalingType': 'loguniform', }, 'subsample': { 'type': 'float', 'min': 0.2, 'max': 1.0 }, 'colsample_bytree': { 'type': 'float', 'min': 0.2, 'max': 1.0 }, 'min_child_weight': { 'type': 'float', 'min': 0.001, 'max': 0.1, 'scalingType': 'loguniform', }, }, # Declare what we will be optimizing, and how: 'spec': { 'metric': 'loss', 'objective': 'minimize' }, } # Next, create an optimizer, passing in the config: # (You can leave out API_KEY if you already set it) opt = Optimizer(config, api_key=os.environ['COMET_API_KEY'], project_name='color-ml') for _, experiment in enumerate(opt.get_experiments()): experiment.log_parameter('colorspace', 'rgb') params = { 'n_estimators': experiment.get_parameter('n_estimators'), 'colsample_bytree': experiment.get_parameter('colsample_bytree'), 'num_leaves': experiment.get_parameter('num_leaves'), 'max_depth': experiment.get_parameter('max_depth'), 'reg_alpha': experiment.get_parameter('reg_alpha'), 'reg_lambda': experiment.get_parameter('reg_lambda'), 'subsample': experiment.get_parameter('subsample'), 'min_child_weight': experiment.get_parameter('min_child_weight'), } loss, std = fit(experiment, X_train, y_train, params, alpha) experiment.log_metric('loss', loss) experiment.log_metric('std', std)
def train(self): if (self.config.model.name == "encoder"): import keras else: import tensorflow.keras as keras start_time = time.time() ## bayes optimization, refer to https://www.comet.ml/docs/python-sdk/introduction-optimizer/ config = { "algorithm": "bayes", "parameters": { "batch": { "type": "integer", "min": 8, "max": 10 } }, "spec": { "metric": "val_loss", "objective": "minimize" }, } opt = Optimizer(config, api_key=self.config.comet_api_key, project_name=self.config.exp_name) for exp in opt.get_experiments(): history = self.model.fit( self.data[0], self.data[1], epochs=self.config.trainer.num_epochs, verbose=self.config.trainer.verbose_training, batch_size=exp.get_parameter('batch'), validation_split=self.config.trainer.validation_split, callbacks=self.callbacks, ) val_loss = min(history.history['val_loss']) print(val_loss) exp.log_metric("val_loss", val_loss) # history = self.model.fit( # self.data[0], self.data[1], # epochs=self.config.trainer.num_epochs, # verbose=self.config.trainer.verbose_training, # batch_size=self.config.trainer.batch_size, # validation_split=self.config.trainer.validation_split, # callbacks=self.callbacks, # ) self.duration = time.time() - start_time self.history = history # if(self.config.model.name == "encoder"): # # self.best_model = keras.models.load_model(os.path.join(self.config.callbacks.checkpoint_dir,'best_model-%s.hdf5'%self.config.callbacks.checkpoint_monitor), # custom_objects={'precision': precision, 'recall': recall,'f1': f1, # 'InstanceNormalization': keras_contrib.layers.InstanceNormalization()}) # else: self.best_model = keras.models.load_model(os.path.join( self.config.callbacks.checkpoint_dir, 'best_model-%s.hdf5' % self.config.callbacks.checkpoint_monitor), custom_objects={ 'precision': precision, 'recall': recall, 'f1': f1 }) self.loss.extend(history.history['loss']) self.acc.extend(history.history['accuracy']) self.val_loss.extend(history.history['val_loss']) self.val_acc.extend(history.history['val_accuracy']) self.precision.extend(history.history['precision']) self.recall.extend(history.history['recall']) self.f1.extend(history.history['f1']) self.val_precision.extend(history.history['val_precision']) self.val_recall.extend(history.history['val_recall']) self.val_f1.extend(history.history['val_f1']) best_model = save_training_logs(self.config.log_dir, history) self.best_model_train_loss = best_model.loc[0, 'best_model_train_loss'] self.best_model_val_loss = best_model.loc[0, 'best_model_val_loss'] self.best_model_train_acc = best_model.loc[0, 'best_model_train_acc'] self.best_model_val_acc = best_model.loc[0, 'best_model_val_acc'] self.best_model_train_precision = best_model.loc[ 0, 'best_model_train_precision'] self.best_model_val_precision = best_model.loc[ 0, 'best_model_val_precision'] self.best_model_train_recall = best_model.loc[ 0, 'best_model_train_recall'] self.best_model_val_recall = best_model.loc[0, 'best_model_val_recall'] self.best_model_train_f1 = best_model.loc[0, 'best_model_train_f1'] self.best_model_val_f1 = best_model.loc[0, 'best_model_val_f1'] self.best_model_learning_rate = best_model.loc[ 0, 'best_model_learning_rate'] self.best_model_nb_epoch = best_model.loc[0, 'best_model_nb_epoch']
if __name__ == '__main__': repo = Repo("./") assert not repo.bare project_name = "candidate-tests" if len(sys.argv) > 1: # get the config file from the arguments print("-> Loading the optimizer...") opt = CometOptimizer(config=sys.argv[1]) active_parallel_experiements = [] for experiment in opt.get_experiments(project_name=project_name, workspace="olibd"): print("-> Registering experiment {} with Comet...".format( experiment.get_key())) active_parallel_experiements.append(experiment) if len(active_parallel_experiements) == torch.cuda.device_count( ) or (not cuda_is_available() and len(active_parallel_experiements) == 1): launch_experiment(active_parallel_experiements, repo) active_parallel_experiements = [] # If the last batch of experiments had a lower experiment count # than the number of GPUs, then it hasn't run yet. So we need to # run them now. if len(active_parallel_experiements) > 0: launch_experiment(active_parallel_experiements, repo)
def main(): experiment = Experiment(api_key="API_KEY", project_name="PROJECT", workspace="WORKSPACE") raw_df = pd.read_csv(f'{DATA}Tweets.csv') df = raw_df[['tweet_id', 'text', 'airline_sentiment']] # Preprocess text and put it in a new column preprocessor = PreProcessor(df, 'text') df['cleaned_text'] = preprocessor.full_preprocess() # Shuffling so we can get random tweets for the test set df = shuffle(df, random_state=seed) # Keep 1000 samples of the data as test set test_set = df[:1000] # Get training and validation data X_train, X_val, y_train, y_val = train_test_split( df['cleaned_text'][1000:], df['airline_sentiment'][1000:], test_size=0.2, random_state=seed) # Get sentiment labels for test set y_test = test_set['airline_sentiment'] # Create matrix based on word frequency in tweets vectorizer = TfidfVectorizer() X_train = vectorizer.fit_transform(X_train) X_val = vectorizer.transform(X_val) X_test = vectorizer.transform(test_set['cleaned_text']) # Onehot encoding of target variable # Negative = [1,0,0], Neutral = [0,1,0], Positive = [0,0,1] # Initialize sklearn's one-hot encoder class onehot_encoder = OneHotEncoder(sparse=False) # One hot encoding for training set integer_encoded_train = np.array(y_train).reshape(len(y_train), 1) onehot_encoded_train = onehot_encoder.fit_transform(integer_encoded_train) # One hot encoding for validation set integer_encoded_val = np.array(y_val).reshape(len(y_val), 1) onehot_encoded_val = onehot_encoder.fit_transform(integer_encoded_val) # One hot for test_set integer_encoded_test = np.array(y_test).reshape(len(y_test), 1) onehot_encoded_test = onehot_encoder.fit_transform(integer_encoded_test) from comet_ml import Optimizer config = { "algorithm": "bayes", "parameters": { "batch_size": { "type": "integer", "min": 16, "max": 128 }, "dropout": { "type": "float", "min": 0.1, "max": 0.5 }, "lr": { "type": "float", "min": 0.0001, "max": 0.001 }, "beta1": { "type": "float", "min": 0.95, "max": 0.999 }, "beta2": { "type": "float", "min": 0.95, "max": 0.999 }, "epsilon": { "type": "float", "min": 1e-9, "max": 1e-7 }, "patience": { "type": "integer", "min": 3, "max": 7 } }, "spec": { "metric": "loss", "objective": "minimize", }, } opt = Optimizer(config, api_key="ERPBfa6mmwJzQnk61oiqLOCie", project_name="nlp-airline", workspace="demo") for experiment in opt.get_experiments(): experiment.add_tag('LR-Optimizer') # Neural network architecture initializer = keras.initializers.he_normal(seed=seed) activation = keras.activations.elu optimizer = keras.optimizers.Adam( lr=experiment.get_parameter("lr"), beta_1=experiment.get_parameter("beta1"), beta_2=experiment.get_parameter("beta2"), epsilon=experiment.get_parameter('epsilon')) es = EarlyStopping(monitor='val_acc', mode='max', verbose=1, patience=4) batch_size = experiment.get_parameter("batch_size") # Build model architecture model = Sequential() model.add( Dense(20, activation=activation, kernel_initializer=initializer, input_dim=X_train.shape[1])) model.add(Dropout(experiment.get_parameter("dropout"))) model.add( Dense(3, activation='softmax', kernel_initializer=initializer)) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) # Fit the model using the batch_generator hist = model.fit_generator( generator=batch_generator(X_train, onehot_encoded_train, batch_size=batch_size, shuffle=True), epochs=5, validation_data=(X_val, onehot_encoded_val), steps_per_epoch=X_train.shape[0] / batch_size, callbacks=[es]) score = model.evaluate(X_test, onehot_encoded_test, verbose=0) logging.info("Score %s", score)
from comet_ml import Optimizer import sys import os import train import evaluate params_file = "config/parameters.json" opt = Optimizer(sys.argv[1], trials=1, api_key="jBFVYFo9VUsy0kb0lioKXfTmM") for experiment in opt.get_experiments(project_name="fastdepth"): params = train.get_params(params_file) # try: # pid = os.environ["COMET_OPTIMIZER_PROCESS_ID"] # params["device"] = pid # except KeyError: # pass params["batch_size"] = experiment.get_parameter("batch_size") params["optimizer"]["lr"] = experiment.get_parameter("learning_rate") params["optimizer"]["momentum"] = experiment.get_parameter("momentum") params["optimizer"]["weight_decay"] = experiment.get_parameter( "weight_decay") print("Batch Size: ", params["batch_size"]) print("Learning Rate: ", params["optimizer"]["lr"]) print("Momentum: ", params["optimizer"]["momentum"]) print("Weight Decay: ", params["optimizer"]["weight_decay"]) print("Device: ", params["device"])
def main(): os.environ['CUDA_VISIBLE_DEVICES'] = '2' # capture the config path from the run arguments # then process the json configuration file # try: args = get_args() config, _ = get_config_from_json(args.config) bayes_config = { "algorithm": "bayes", "parameters": { # "model": {"type": "categorical", "values": ['cnn','mlp']}, "learning_rate": { "type": "float", "min": 0.001, "max": 0.01 }, # "batch_size": {"type": "integer", "min": 16, "max": 32}, # "num_epochs": {"type": "integer", "min": 5, "max": 10}, }, "spec": { "maxCombo": 10, "objective": "minimize", "metric": "test_f1", "minSampleSize": 100, "retryAssignLimit": 0, }, "trials": 1, "name": "Bayes", } opt = Optimizer(bayes_config, api_key=config.comet_api_key, project_name=config.exp_name) for exp in opt.get_experiments(): args = get_args() # config = process_config_UtsClassification_bayes_optimization(args.config, exp.get_parameter('model'),exp.get_parameter('learning_rate'), # exp.get_parameter('batch_size'), exp.get_parameter('num_epochs')) config = process_config_UtsClassification_bayes_optimization( args.config, exp.get_parameter('learning_rate')) # except: # print("missing or invalid arguments") # exit(0) # create the experiments dirs print('Create the data generator.') data_loader = UtsClassificationDataLoader(config) print('Create the model.') model = UtsClassificationModel(config, data_loader.get_inputshape(), data_loader.get_nbclasses()) print('Create the trainer') trainer = UtsClassificationTrainer(model.model, data_loader.get_train_data(), config) print('Start training the model.') trainer.train() # print('Create the evaluater.') # evaluater = UtsClassificationEvaluater(trainer.best_model, data_loader.get_test_data(), data_loader.get_nbclasses(), # config) # # print('Start evaluating the model.') # evaluater.evluate() exp.log_metric("test_f1", trainer.best_model_val_loss) print('done')
def main(): """ Main tracking script """ args = parse_args() print(args) from comet_ml import Optimizer # We only need to specify the algorithm and hyperparameters to use: config = { # We pick the Bayes algorithm: "algorithm": "bayes", # Declare your hyperparameters in the Vizier-inspired format: "parameters": { "alpha": { "type": "float", "min": 0.01, "max": 0.3 }, "lr": { "type": "float", "min": 5e-5, "max": 5e-2 }, "gamma": { "type": "float", "min": 0.75, "max": 0.99 }, }, # Declare what we will be optimizing, and how: "spec": { "metric": "VC", "objective": "maximize", "seed": args.rng_seed, }, } # Next, create an optimizer, passing in the config: opt = Optimizer(config, project_name=args.experiment) for experiment in opt.get_experiments(): experiment.auto_metric_logging = False experiment.workspace = 'TrackToLearn' experiment.parse_args = False experiment.disabled = not args.use_comet gamma = experiment.get_parameter("gamma") alpha = experiment.get_parameter("alpha") lr = experiment.get_parameter("lr") td3_experiment = SACTrackToLearnTraining( # Dataset params args.path, args.experiment, args.name, args.dataset_file, args.subject_id, args.test_dataset_file, args.test_subject_id, args.reference_file, args.ground_truth_folder, # RL params args.max_ep, args.log_interval, args.action_std, args.valid_noise, lr, gamma, alpha, # TD3 params args.training_batch_size, # Env params args.n_seeds_per_voxel, args.max_angle, args.min_length, args.max_length, args.step_size, # Step size (in mm) args.tracking_batch_size, args.n_signal, args.n_dirs, # Model params args.n_latent_var, args.hidden_layers, args.add_neighborhood, # Experiment params args.use_gpu, args.rng_seed, experiment, args.render, args.run_tractometer, args.load_teacher, args.load_policy, ) td3_experiment.run()
# Initialize the dataset and embedding processor data_set = data_processor.DataProcessor(task_name, dataset_dir, max_seq_length, vocab_size=vocab_size, to_tokens=to_tokens) embedding = embedding_processor.get_embedding(embedding_type) # If dataset folder is empty get the metadata and datasets if not os.listdir(dataset_dir): data_set.get_dataset() # Load the metadata vocabulary, labels = data_set.load_metadata() # Generate the embedding matrix embedding_matrix = embedding.get_embedding_matrix(embeddings_dir, embedding_source, embedding_dim, vocabulary) # Loop over each experiment in the optimiser for experiment in model_optimiser.get_experiments(project_name=experiment_params['project_name'], workspace="nathanduran", auto_output_logging='simple'): # Set up comet experiment experiment.set_name(experiment_name) # Get model params from optimiser experiment model_params = {} for key in optimiser_config['parameters'].keys(): model_params[key] = experiment.get_parameter(key) # Log parameters experiment.log_parameters(model_params) for key, value in experiment_params.items(): experiment.log_other(key, value) # Build tensorflow datasets from .npz files
def score_bayes_trainer_harness(self): """ Returns ------- :return: ``None`` None, but results in saved models suitable for scoring and trained on all available data. """ self.__log.info("Starting generic score train loop") train_data = self.train_data val_data = self.val_data models = self.model out_path = self.out_path type_time_series = self.type_time_series param_search = self.param_search trainer = self.trainer api_key = self.api_key rest_api_key = self.rest_api_key workspace = self.workspace for models_ in models.get(type_time_series): for key, value in models_.items(): model_name = key model = value[0] model_kwarg = value[1] if param_search == 'bayes': search_space = GluonTSBayesEstimatorKwargs.BayesModelLookup.get( model_name) # comet-ml hyperparameter optimization configuration (bayes in this case) config = { "algorithm": "bayes", "spec": { "maxCombo": 5, # no of combinations to try "objective": "minimize", "metric": "loss", "seed": 42, "gridSize": 10, "minSampleSize": 100, "retryLimit": 20, "retryAssignLimit": 0, }, "name": "My Bayesian Search", "trials": 1, } config['parameters'] = search_space # current time timestr = time.strftime("%Y%m%d-%H%M%S") # comet-ml project name for the optimization project_name = f"optimizer-{model_name}-{timestr}" # initialize the comet-ml optimizer optimizer = Optimizer(config=config, api_key=api_key, project_name=project_name) # loop through the parameter combinations that the bayes optimizer suggests for experiment in optimizer.get_experiments(): # explicitly set the model parameters (should be generic for any model) if model_name == "SimpleFeedForward": hidden1 = experiment.get_parameter( "hidden_layer_size") hidden2 = experiment.get_parameter( "hidden2_layer_size") model_kwarg['num_hidden_dimensions'] = [ hidden1, hidden2 ] self.__log.info( f"model_kwarg['num_hidden_dimensions'] : {model_kwarg['num_hidden_dimensions']}" ) elif model_name == "DeepAREstimate": model_kwarg[ 'num_layers'] = experiment.get_parameter( "num_layers") model_kwarg[ 'num_cells'] = experiment.get_parameter( "num_cells") model_kwarg[ 'cell_type'] = experiment.get_parameter( "cell_type") model_kwarg[ 'dropout_rate'] = experiment.get_parameter( "dropout_rate") # set trainer params trainer.learning_rate = experiment.get_parameter( "learning_rate") trainer.batch_size = experiment.get_parameter( "batch_size") trainer.epochs = 2 # initialize model from the suggested hyperparameters model = model.from_hyperparameters(**model_kwarg) # set the trainer model.trainer = trainer self.__log.info( f'\n model.trainer.lr : {model.trainer.learning_rate}' ) self.__log.info( f'model.trainer.epochs : {model.trainer.epochs}\n') # train the model predictor = model.train(train_data) # make predictions forecast_it, ts_it = make_evaluation_predictions( dataset=val_data, # test dataset predictor=predictor, # predictor num_eval_samples= 1, # number of sample paths we want for evaluation ) # convert gluonts objects to lists forecasts = list(forecast_it) tss = list(ts_it) # get prediction length prediction_length = forecasts[0].mean.shape[0] y_test_ = list(val_data)[0]['target'] y_preds_ = forecasts[0].mean y_test_ = y_test_[-prediction_length:] mae_ = mean_absolute_error(y_test_, y_preds_) # Report the loss to comet experiment.log_metric("loss", mae_) experiment.end() # initialize comet REST API to retrieve the best hyperparameters comet_api = comet_ml.API(rest_api_key=rest_api_key) project = comet_api.get(workspace=workspace, project_name=optimizer. experiment_kwargs['project_name'].lower()) # get the experiment ids exp_ids = [x.id for x in project] scores_df = pd.DataFrame(index=exp_ids, columns=['metric']) # loop through the experiments within the comet project for exp_id in exp_ids: exp = comet_api.get( f"{workspace}/{project_name.lower()}/{exp_id}") scores_df.at[exp_id, 'metric'] = exp.get_metrics()[0]['metricValue'] scores_df.metric = scores_df.metric.map(float) # get experiment_id of the best score best_exp_id = scores_df.metric.idxmin() # get the best experiment exp = comet_api.get( f"{workspace}/{project_name.lower()}/{best_exp_id}") # get the best hyperparameters best_params = { x['name']: x['valueCurrent'] for x in exp.get_parameters_summary() if x['name'] != 'f' } # save best params in model_name-keyed dictionary for later use self.best_params[model_name] = best_params