print("Creating Param List") lsm_nodes = [32] loss = ["mean_squared_error"] activation = ["relu", "tanh", "sigmoid", "linear"] recurrent_activation = ["hard_sigmoid", "sigmoid", "relu"] param_grid = { "nodes": lsm_nodes, "loss": loss, "input_dim": [input_dim], "output_dim": [12], "timesteps": [timesteps], "activation": activation, "recurrent_activation": recurrent_activation, "batch_size": [batch_size] } params = ms.ParameterGrid(param_grid) print("Evaluating Models") scores = [] for param in params: print(param) np.random.seed(42) model = create_lstm(**param) if stateful: for i in range(epochs): print("Epoch ", i) model.reset_states() model.fit(X_train, y_train,
grid_lk = selection.ParameterGrid( dict( winSize=[(5, 5), (15, 15), (31, 31)], maxLevel=[0, 3, 6], criteria=[ (3, 4, 0.5), (3, 10, 0.03), (3, 30, 0.03), ], # termination criteria, epsilon AND count of iterations is used. detec=[('sift', { 'sigma': 2.5, 'nOctaveLayers': 3, 'edgeThreshold': 2, 'contrastThreshold': 0.1 }), ('sift', { 'sigma': 2.5, 'nOctaveLayers': 5, 'edgeThreshold': 2, 'contrastThreshold': 0.1 }), ('sift', { 'sigma': 1.6, 'nOctaveLayers': 3, 'edgeThreshold': 2, 'contrastThreshold': 0.1 }), ('surf', { 'nOctaves': 0, 'nOctaveLayers': 0, 'hessianThreshold': 0 }), ('surf', { 'nOctaves': 0, 'nOctaveLayers': 0, 'hessianThreshold': 0 }), ('surf', { 'nOctaves': 0, 'nOctaveLayers': 0, 'hessianThreshold': 0 }), ('good', { 'qualityLevel': 0.5, 'minDistance': 61, 'maxCorners': 510, 'blockSize': 5 }), ('good', { 'qualityLevel': 0.5, 'minDistance': 61, 'maxCorners': 10, 'blockSize': 5 })]))
# f'model-{int(time.time())}.hdf5', 'seed': [42, 0], 'val_split': [0.2], # 0.3], 'generator': [ generators.luke.standard_generators, ], 'model': model_selection.ParameterGrid({ # The callable must take in **kwargs as an argument 'model_callable': [ models.luke.resnet, models.luke.inception, models.luke.inception_resnet ], 'dropout_rate1': [0.8], 'dropout_rate2': [0.7, 0.8], # TODO: These are coupled with the generator too 'batch_size': [8, 32], 'rotation_range': [20], # , 30], 'optimizer': [ keras.optimizers.Adam(lr=1e-4), keras.optimizers.Adam(lr=1e-5), # TODO: Learning rate seems too low # keras.optimizers.SGD(lr=1e-5, momentum=0.9) ], 'loss': [ keras.losses.categorical_crossentropy, keras.losses.binary_crossentropy, ] }), }
LOG_DIR = f'{BLUENO_HOME}logs/' SLACK_TOKEN = 'xoxp-314216549302-331430419907-396979178437-' \ 'ae769a026a3c0f91623e9a6565f0d9ee' NUM_GPUS = 1 GPU_OFFSET = 2 # a lot of .npy files in data/ and preprocessed/ and labels.csv (patient_id, label 0 or 1) model_list = list( model_selection.ParameterGrid({ 'model_callable': [models.luke.resnet], 'dropout_rate1': [0.8], 'dropout_rate2': [0.8], 'optimizer': [ keras.optimizers.Adam(lr=1e-5), ], 'loss': [ keras.losses.categorical_crossentropy, ], 'freeze': [False], })) model_list = [blueno.ModelConfig(**m) for m in model_list] PARAM_GRID = model_selection.ParameterGrid({ 'data': [ blueno.DataConfig( data_dir=str( pathlib.Path(DATA_DIR) / 'processed-new-training-2/arrays/'), labels_path=str( pathlib.Path(DATA_DIR) /
def ParameterGrid(self, param_grid): return list(_sklearn_model_selection.ParameterGrid(param_grid))
with open(save_path + video + '_test' + '.pickle', 'rb') as file: with open(save_path + 'last_stop' + video + '.npy', 'r') as stop_file: last_stop = np.fromfile(stop_file, dtype=np.uint32) print(last_stop) grid_prep = pickle.load(file) print('No error in filehandling') except (EOFError, FileNotFoundError) as e: print('First time for this video.') last_stop = [np.uint32(0)] grid_prep = selection.ParameterGrid(dict( resize = [(1/2,1/2),(3/4,3/4),(1,1)], filterType = ['median', 'gauss'], filterSize = [(5,5), (15,15), (21,21)], detec=[('sift', {'sigma': 1.0, 'nOctaveLayers': 5, 'edgeThreshold': 10, 'contrastThreshold': 0.01}), ('surf',{'nOctaves': 1 , 'nOctaveLayers': 5, 'hessianThreshold': 100}), ('good',{'qualityLevel': 0.5, 'minDistance': 1, 'maxCorners': 260, 'blockSize': 2}), ] ) ) print(len([p for p in grid_prep])) print(grid_prep[1]) with open(save_path + video + '_test' + '.pickle', 'wb') as file: pickle.dump(grid_prep, file) video_path = folder + video print('video_path: ' + video_path) if last_stop[-1] < len(grid_prep)-1: for y in range(last_stop[-1], len(grid_prep)):
resize = (3/4, 3/4) try: with open(save_path + video + '_test' + '.pickle', 'rb') as file: with open(save_path + 'last_stop' + video + '.npy', 'r') as stop_file: last_stop = np.fromfile(stop_file, dtype=np.uint32) print(last_stop) grid = pickle.load(file) print('No error in filehandling') except (EOFError, FileNotFoundError) as e: print('First time for this video.') last_stop = [np.uint32(0)] grid = selection.ParameterGrid(dict(hessianThreshold=np.array([10, 50, 100, 200, 500]), nOctaves=np.array([1, 3, 5]), nOctaveLayers=np.array([1, 3, 5]), ) ) with open(save_path + video + '_test' + '.pickle', 'wb') as file: pickle.dump(grid, file) video_path = folder + video print('video_path: ' + video_path) if last_stop[-1] < len(grid)-1: for y in range(last_stop[-1], len(grid)): print('Job %s of %s' %(str(y), str(len(grid)))) kt = tr.keypoint_tracker(video_path, start_frame=start, end_frame=end) results = [video_path, [grid[y]], [lk_params], filterType, filterSize, resize] start_t = timeit.default_timer()
random_state=42, n_jobs=1))]) ''' gs_cluster=GridSearchCV(estimator=texf_cluster, param_grid=parms, scoring="v_measure_score", cv=[(range(0,len(data)), range(0,len(data)))]) # do not need CV parms_result=gs_cluster.fit(data,text_data.labels_true()) print(parms_result.best_score_) print(parms_result.best_params_) ''' result = [] for g in list(model_selection.ParameterGrid(params)): print() print(g) texf_cluster.set_params(**g) labels_pred = texf_cluster.fit_predict(data) print(labels_pred) count_table = score_data.count_table(text_data.init_num_by_cls, labels_pred, g['KMeans__n_clusters']) print(count_table) #total_entropy=score_data.total_entropy(count_table) #print("Total Entropy:",total_entropy) print( "homogeneity score, completeness score, v score:", metrics.homogeneity_completeness_v_measure(text_data.labels_true(), labels_pred))
DATA_DIR = '' LOG_DIR = '' NUM_GPUS = 1 GPU_OFFSET = 3 SLACK_TOKEN = '' model_list = list( model_selection.ParameterGrid({ 'model_callable': [models.luke.resnet], 'dropout_rate1': [0.8], 'dropout_rate2': [0.8], 'optimizer': [ keras.optimizers.Adam(lr=1e-5), ], 'loss': [ keras.losses.categorical_crossentropy, ], 'freeze': [False], })) model_list = [blueno.ModelConfig(**m) for m in model_list] data_list = list( model_selection.ParameterGrid({ 'pipeline_callable': [preprocessors.luke.preprocess_data], 'data_dir': [str(pathlib.Path(DATA_DIR) / 'numpy_compressed')], 'labels_path': [str(pathlib.Path(DATA_DIR) / 'metadata')], 'index_col': ['Anon ID'], 'label_col': ['occlusion_exists'],
def __init__(self, estimator, param_grid, verbose=0): self.param_grid = cv.ParameterGrid(param_grid) self.parent_estimator = estimator self.verbose = verbose
resize = (3/4, 3/4) try: with open(save_path + video + '_test' + '.pickle', 'rb') as file: with open(save_path + 'last_stop' + video + '.npy', 'r') as stop_file: last_stop = np.fromfile(stop_file, dtype=np.uint32) print(last_stop) grid = pickle.load(file) print('No error in filehandling') except (EOFError, FileNotFoundError) as e: print('First time for this video.') last_stop = [np.uint32(0)] grid = selection.ParameterGrid(dict(nOctaveLayers=np.array([1, 3, 5]), contrastThreshold=np.array([0.01, 0.04, 0.1]), edgeThreshold=np.array([2, 10, 15]), sigma=np.array([1.0, 1.6, 2.5]) ) ) with open(save_path + video + '_test' + '.pickle', 'wb') as file: pickle.dump(grid, file) video_path = folder + video print('video_path: ' + video_path) if last_stop[-1] < len(grid)-1: for y in range(last_stop[-1], len(grid)): print('Job %s of %s' %(str(y), str(len(grid)))) kt = tr.keypoint_tracker(video_path, start_frame=start, end_frame=end) results = [video_path, [grid[y]], [lk_params], filterType, filterSize, resize] start_t = timeit.default_timer()
rfest.fit(trainX[varsUsed], trainY[list(['click_bool', 'booking_bool'])]) a4 = rfest.predict(testX[varsUsed]) print(mean_squared_error(testY['click_bool']+5*testY['booking_bool'], a4[:,0]+5*a4[:,1] )) a2 = rfest.predict(testX[varsUsed]) rfest_tuned_parameters = [{'max_depth':[1,2,4,5],'n_estimators':[50,100,150,200,300]}] rfestGS = ms.GridSearchCV(RandomForestClassifier(),rfest_tuned_parameters,cv=5,scoring='neg_mean_squared_error') rfestGS.fit(trainX[varsUsed],trainY[list(['click_bool', 'booking_bool'])]) rfReg = RandomForestRegressor(n_estimators=100) rfReg.fit(trainX[varsUsed], trainY['score']) a3 = rfReg.predict(testX[varsUsed]) print(mean_squared_error(testY['score'], est.predict(testX[varsUsed]))) rfreg_tuned_parameters = [{'max_depth':[1,2,3],'n_estimators':[50,100,150,200]}] rfregGS = ms.GridSearchCV(RandomForestRegressor(),rfreg_tuned_parameters,cv=5,scoring='neg_mean_squared_error') rfregGS.fit(trainX[varsUsed],trainY['score']) a1 = ms.ParameterGrid(rfreg_tuned_parameters) scoresave = np.zeros(len(a1)) for i in range(len(a1)): rfregmgs = RandomForestRegressor(**a1[i]) rfregmgs.fit(trainX[varsUsed],trainY['score']) y_pred = rfregmgs.predict(testX[varsUsed]) ndcg = util.ndcg.ndcg(testX[['srch_id','prop_id']], testY['score'], y_pred) ndcg scoresave[i] = ndcg adaReg = AdaBoostRegressor() adaReg.fit(trainX[varsUsed],trainY['score']) print(mean_squared_error(testY['score'],adaReg.predict(testX[varsUsed]))) ada_tuned_parameters = [{'loss':['linear','square'],'learning_rate':[0.5,1,2],'n_estimators':[50,100,150,25]}] adaGS = ms.GridSearchCV(AdaBoostRegressor(),ada_tuned_parameters,cv=5,scoring='neg_mean_squared_error') adaGS.fit(trainX[varsUsed],trainY['score'])
DATA_DIR = '' LOG_DIR = '' NUM_GPUS = 1 GPU_OFFSET = 3 SLACK_TOKEN = '' model_list = list( model_selection.ParameterGrid({ 'model_callable': [models.luke.resnet], 'dropout_rate1': [0.8], 'dropout_rate2': [0.8], 'optimizer': [ keras.optimizers.Adam(lr=1e-5), ], 'loss': [ keras.losses.categorical_crossentropy, ], 'freeze': [False], })) model_list = [blueno.ModelConfig(**m) for m in model_list] data_list = list( model_selection.ParameterGrid({ 'data_dir': [str(pathlib.Path(DATA_DIR) / 'processed-lower' / 'arrays')], 'labels_path': [str(pathlib.Path(DATA_DIR) / 'processed-lower' / 'labels.csv')], 'index_col': ['Anon ID'],
def _get_param_iterator(self): """Return ParameterGrid instance for the given param_grid""" return model_selection.ParameterGrid(self.param_grid)
def run_search(path, window_sizes, angles, models, size=0, result_name="search_results", novelty=False, kfold_splits=5): DATA_PATH = path grid = model_selection.ParameterGrid(get_search_parameter()) results = pd.DataFrame( columns=["model", "model_parameter", "minimal_movement", "sma", "window_overlap", "pls", "window_size", "angle", "sensitivity", "specificity"] ) if not os.path.exists("tmp"): os.mkdir("tmp") for i, params in enumerate(grid): print(f"Running with params: \n{params}") if run_done(i, len(window_sizes) * len(angles)): print(f"Found parameters in checkpoints, skipping...") continue generate_fourier(DATA_PATH, window_sizes, size, params) # print(f"The number of methods without k-folding are: {str(len(models))}") pbar = tqdm(total=len(models) * len(window_sizes) * len(angles) * kfold_splits) def update_progress(*a): pbar.update() for window_size, angle in iterate_angles(): if result_exist(result_name, i, window_size, angle): print(f"Found this combination in checkpoints, skipping...") pbar.update(len(models)*kfold_splits) continue with Manager() as manager: synced_results = manager.list() data, labels = load_fourier_angle(window_size, angle) data_amount = data.shape[0] print(f"Data amount: {data_amount}") for batch in chunkify(models, 1): pool = Pool() kfold_parameters = { "batch": batch, "pool": pool, "angle": angle, "splits": kfold_splits, "pls_components": params["pls"], "novelty": novelty } async_kfold(data, labels, kfold_parameters, synced_results, update_progress) pool.close() pool.join() print("\nCheckpoint created.") checkpoint_name = f"{result_name.split('/')[-1]}_{str(window_size)}_{angle}_{i}.csv" dump_results(params, synced_results, window_size, checkpoint_name) pbar.close() save_and_clean(result_name)
def hyperoptimize(hyperparams: Union[blueno.ParamGrid, List[blueno.ParamConfig]], username: str, slack_token: str = None, num_gpus=1, gpu_offset=0, log_dir: str = None) -> None: """ Runs training jobs on input hyperparameter grid. :param hyperparams: a dictionary of parameters. See blueno/types for a specification :param username: your name :param slack_token: a slack token for uploading to GitHub :param num_gpus: the number of gpus you will use :param gpu_offset: your gpu offset :param log_dir: the directory you will too. This directory should already exist :return: """ if isinstance(hyperparams, blueno.ParamGrid): param_list = model_selection.ParameterGrid(hyperparams.__dict__) else: param_list = hyperparams logging.info( 'optimizing grid with {} configurations'.format(len(param_list))) gpu_index = 0 processes = [] for params in param_list: if isinstance(params, dict): params = blueno.ParamConfig(**params) check_data_in_sync(params) # This is where we'd run preprocessing. To run in a reasonable amount # of time, the raw data must be cached in-memory. arrays = preprocessing.prepare_data(params, train_test_val=False) x_train, x_valid, y_train, y_valid, id_train, id_valid = arrays # Start the model training job # Run in a separate process to avoid memory issues # Note how this depends on offset os.environ['CUDA_VISIBLE_DEVICES'] = f'{gpu_index + gpu_offset}' if params.job_fn is None: job_fn = start_job else: job_fn = params.job_fn logging.debug('using job fn {}'.format(job_fn)) # Uses the parent of the data_dir to name the job, # which may not work for all data formats. if params.job_name: job_name = params.job_name else: job_name = str(pathlib.Path(params.data.data_dir).parent.name) job_name += f'_{y_train.shape[1]}-classes' process = multiprocessing.Process(target=job_fn, args=(x_train, y_train, x_valid, y_valid), kwargs={ 'params': params, 'job_name': job_name, 'username': username, 'slack_token': slack_token, 'log_dir': log_dir, 'id_valid': id_valid, }) gpu_index += 1 gpu_index %= num_gpus logging.debug(f'gpu_index is now {gpu_index + gpu_offset}') process.start() processes.append(process) if gpu_index == 0: logging.info(f'all gpus used, calling join on processes:' f' {processes}') p: multiprocessing.Process for p in processes: p.join() processes = [] time.sleep(60)
resize = (3 / 4, 3 / 4) try: with open(save_path + video + '_test' + '.pickle', 'rb') as file: with open(save_path + 'last_stop' + video + '.npy', 'r') as stop_file: last_stop = np.fromfile(stop_file, dtype=np.uint32) print(last_stop) grid = pickle.load(file) print('No error in filehandling') except (EOFError, FileNotFoundError) as e: print('First time for this video.') last_stop = [np.uint32(0)] grid = selection.ParameterGrid( dict(maxCorners=np.arange(10, 511, 250), qualityLevel=np.array([0.1, 0.5, 0.9]), minDistance=np.arange(1, 62, 30), blockSize=np.array([2, 3, 5]))) with open(save_path + video + '_test' + '.pickle', 'wb') as file: pickle.dump(grid, file) video_path = folder + video print('video_path: ' + video_path) if last_stop[-1] < len(grid) - 1: for y in range(last_stop[-1], len(grid)): print('Job %s of %s' % (str(y), str(len(grid)))) kt = tr.keypoint_tracker(video_path, start_frame=start, end_frame=end) results = [ video_path, [grid[y]], [lk_params], filterType, filterSize, resize