Ejemplo n.º 1
0
print("Creating Param List")
lsm_nodes = [32]
loss = ["mean_squared_error"]
activation = ["relu", "tanh", "sigmoid", "linear"]
recurrent_activation = ["hard_sigmoid", "sigmoid", "relu"]
param_grid = {
    "nodes": lsm_nodes,
    "loss": loss,
    "input_dim": [input_dim],
    "output_dim": [12],
    "timesteps": [timesteps],
    "activation": activation,
    "recurrent_activation": recurrent_activation,
    "batch_size": [batch_size]
}
params = ms.ParameterGrid(param_grid)

print("Evaluating Models")

scores = []
for param in params:
    print(param)
    np.random.seed(42)
    model = create_lstm(**param)

    if stateful:
        for i in range(epochs):
            print("Epoch ", i)
            model.reset_states()
            model.fit(X_train,
                      y_train,
Ejemplo n.º 2
0
 grid_lk = selection.ParameterGrid(
     dict(
         winSize=[(5, 5), (15, 15), (31, 31)],
         maxLevel=[0, 3, 6],
         criteria=[
             (3, 4, 0.5),
             (3, 10, 0.03),
             (3, 30, 0.03),
         ],
         # termination criteria, epsilon AND count of iterations is used.
         detec=[('sift', {
             'sigma': 2.5,
             'nOctaveLayers': 3,
             'edgeThreshold': 2,
             'contrastThreshold': 0.1
         }),
                ('sift', {
                    'sigma': 2.5,
                    'nOctaveLayers': 5,
                    'edgeThreshold': 2,
                    'contrastThreshold': 0.1
                }),
                ('sift', {
                    'sigma': 1.6,
                    'nOctaveLayers': 3,
                    'edgeThreshold': 2,
                    'contrastThreshold': 0.1
                }),
                ('surf', {
                    'nOctaves': 0,
                    'nOctaveLayers': 0,
                    'hessianThreshold': 0
                }),
                ('surf', {
                    'nOctaves': 0,
                    'nOctaveLayers': 0,
                    'hessianThreshold': 0
                }),
                ('surf', {
                    'nOctaves': 0,
                    'nOctaveLayers': 0,
                    'hessianThreshold': 0
                }),
                ('good', {
                    'qualityLevel': 0.5,
                    'minDistance': 61,
                    'maxCorners': 510,
                    'blockSize': 5
                }),
                ('good', {
                    'qualityLevel': 0.5,
                    'minDistance': 61,
                    'maxCorners': 10,
                    'blockSize': 5
                })]))
Ejemplo n.º 3
0
    #                    f'model-{int(time.time())}.hdf5',
    'seed': [42, 0],
    'val_split': [0.2],  # 0.3],
    'generator': [
        generators.luke.standard_generators,
    ],
    'model':
    model_selection.ParameterGrid({
        # The callable must take in **kwargs as an argument
        'model_callable': [
            models.luke.resnet, models.luke.inception,
            models.luke.inception_resnet
        ],
        'dropout_rate1': [0.8],
        'dropout_rate2': [0.7, 0.8],
        # TODO: These are coupled with the generator too
        'batch_size': [8, 32],
        'rotation_range': [20],  # , 30],
        'optimizer': [
            keras.optimizers.Adam(lr=1e-4),
            keras.optimizers.Adam(lr=1e-5),
            # TODO: Learning rate seems too low
            # keras.optimizers.SGD(lr=1e-5, momentum=0.9)
        ],
        'loss': [
            keras.losses.categorical_crossentropy,
            keras.losses.binary_crossentropy,
        ]
    }),
}
Ejemplo n.º 4
0
LOG_DIR = f'{BLUENO_HOME}logs/'
SLACK_TOKEN = 'xoxp-314216549302-331430419907-396979178437-' \
              'ae769a026a3c0f91623e9a6565f0d9ee'

NUM_GPUS = 1
GPU_OFFSET = 2

# a lot of .npy files in data/ and preprocessed/ and labels.csv (patient_id, label 0 or 1)

model_list = list(
    model_selection.ParameterGrid({
        'model_callable': [models.luke.resnet],
        'dropout_rate1': [0.8],
        'dropout_rate2': [0.8],
        'optimizer': [
            keras.optimizers.Adam(lr=1e-5),
        ],
        'loss': [
            keras.losses.categorical_crossentropy,
        ],
        'freeze': [False],
    }))

model_list = [blueno.ModelConfig(**m) for m in model_list]

PARAM_GRID = model_selection.ParameterGrid({
    'data': [
        blueno.DataConfig(
            data_dir=str(
                pathlib.Path(DATA_DIR) / 'processed-new-training-2/arrays/'),
            labels_path=str(
                pathlib.Path(DATA_DIR) /
Ejemplo n.º 5
0
 def ParameterGrid(self, param_grid):
     return list(_sklearn_model_selection.ParameterGrid(param_grid))
Ejemplo n.º 6
0
    with open(save_path + video  + '_test' + '.pickle', 'rb') as file:
        with open(save_path + 'last_stop' + video + '.npy', 'r') as stop_file:

            last_stop = np.fromfile(stop_file, dtype=np.uint32)
            print(last_stop)
            grid_prep = pickle.load(file)
        print('No error in filehandling')
except (EOFError, FileNotFoundError) as e:
    print('First time for this video.')
    last_stop = [np.uint32(0)]
    grid_prep = selection.ParameterGrid(dict( resize = [(1/2,1/2),(3/4,3/4),(1,1)],
                                              filterType = ['median', 'gauss'],
                                              filterSize = [(5,5), (15,15), (21,21)],

                                            detec=[('sift', {'sigma': 1.0, 'nOctaveLayers': 5, 'edgeThreshold': 10, 'contrastThreshold': 0.01}),

                                                ('surf',{'nOctaves': 1 , 'nOctaveLayers': 5, 'hessianThreshold': 100}),

                                                ('good',{'qualityLevel': 0.5, 'minDistance': 1, 'maxCorners': 260, 'blockSize': 2}),
                                                    ]
                                          )
                                      )
    print(len([p for p in grid_prep]))
    print(grid_prep[1])
    with open(save_path + video  + '_test' + '.pickle', 'wb') as file:
        pickle.dump(grid_prep, file)

video_path = folder + video
print('video_path: ' + video_path)

if last_stop[-1] < len(grid_prep)-1:
    for y in range(last_stop[-1], len(grid_prep)):
Ejemplo n.º 7
0
resize = (3/4, 3/4)

try:
    with open(save_path + video  + '_test' + '.pickle', 'rb') as file:
        with open(save_path + 'last_stop' + video + '.npy', 'r') as stop_file:

            last_stop = np.fromfile(stop_file, dtype=np.uint32)
            print(last_stop)
            grid = pickle.load(file)
        print('No error in filehandling')
except (EOFError, FileNotFoundError) as e:
    print('First time for this video.')
    last_stop = [np.uint32(0)]
    grid = selection.ParameterGrid(dict(hessianThreshold=np.array([10, 50, 100, 200, 500]),
                                        nOctaves=np.array([1, 3, 5]),
                                        nOctaveLayers=np.array([1, 3, 5]),
                                        )
                                    )

    with open(save_path + video  + '_test' + '.pickle', 'wb') as file:
        pickle.dump(grid, file)

video_path = folder + video
print('video_path: ' + video_path)

if last_stop[-1] < len(grid)-1:
    for y in range(last_stop[-1], len(grid)):
        print('Job %s of %s' %(str(y), str(len(grid))))
        kt = tr.keypoint_tracker(video_path, start_frame=start, end_frame=end)
        results = [video_path, [grid[y]], [lk_params], filterType, filterSize, resize]
        start_t = timeit.default_timer()
                                     random_state=42,
                                     n_jobs=1))])
    '''
    gs_cluster=GridSearchCV(estimator=texf_cluster,
                            param_grid=parms,
                            scoring="v_measure_score",
                            cv=[(range(0,len(data)), range(0,len(data)))]) # do not need CV

    parms_result=gs_cluster.fit(data,text_data.labels_true())
    print(parms_result.best_score_)
    print(parms_result.best_params_)
    '''

    result = []

    for g in list(model_selection.ParameterGrid(params)):
        print()
        print(g)
        texf_cluster.set_params(**g)
        labels_pred = texf_cluster.fit_predict(data)
        print(labels_pred)
        count_table = score_data.count_table(text_data.init_num_by_cls,
                                             labels_pred,
                                             g['KMeans__n_clusters'])
        print(count_table)
        #total_entropy=score_data.total_entropy(count_table)
        #print("Total Entropy:",total_entropy)
        print(
            "homogeneity score, completeness score, v score:",
            metrics.homogeneity_completeness_v_measure(text_data.labels_true(),
                                                       labels_pred))
Ejemplo n.º 9
0
DATA_DIR = ''
LOG_DIR = ''

NUM_GPUS = 1
GPU_OFFSET = 3

SLACK_TOKEN = ''

model_list = list(
    model_selection.ParameterGrid({
        'model_callable': [models.luke.resnet],
        'dropout_rate1': [0.8],
        'dropout_rate2': [0.8],
        'optimizer': [
            keras.optimizers.Adam(lr=1e-5),
        ],
        'loss': [
            keras.losses.categorical_crossentropy,
        ],
        'freeze': [False],
    }))

model_list = [blueno.ModelConfig(**m) for m in model_list]

data_list = list(
    model_selection.ParameterGrid({
        'pipeline_callable': [preprocessors.luke.preprocess_data],
        'data_dir': [str(pathlib.Path(DATA_DIR) / 'numpy_compressed')],
        'labels_path': [str(pathlib.Path(DATA_DIR) / 'metadata')],
        'index_col': ['Anon ID'],
        'label_col': ['occlusion_exists'],
Ejemplo n.º 10
0
 def __init__(self, estimator, param_grid, verbose=0):
     self.param_grid = cv.ParameterGrid(param_grid)
     self.parent_estimator = estimator
     self.verbose = verbose
Ejemplo n.º 11
0
resize = (3/4, 3/4)

try:
    with open(save_path + video  + '_test' + '.pickle', 'rb') as file:
        with open(save_path + 'last_stop' + video + '.npy', 'r') as stop_file:

            last_stop = np.fromfile(stop_file, dtype=np.uint32)
            print(last_stop)
            grid = pickle.load(file)
        print('No error in filehandling')
except (EOFError, FileNotFoundError) as e:
    print('First time for this video.')
    last_stop = [np.uint32(0)]
    grid = selection.ParameterGrid(dict(nOctaveLayers=np.array([1, 3, 5]),
                                        contrastThreshold=np.array([0.01, 0.04, 0.1]),
                                        edgeThreshold=np.array([2, 10, 15]),
                                        sigma=np.array([1.0, 1.6, 2.5])
                                        )
                                    )

    with open(save_path + video  + '_test' + '.pickle', 'wb') as file:
        pickle.dump(grid, file)

video_path = folder + video
print('video_path: ' + video_path)

if last_stop[-1] < len(grid)-1:
    for y in range(last_stop[-1], len(grid)):
        print('Job %s of %s' %(str(y), str(len(grid))))
        kt = tr.keypoint_tracker(video_path, start_frame=start, end_frame=end)
        results = [video_path, [grid[y]], [lk_params], filterType, filterSize, resize]
        start_t = timeit.default_timer()
Ejemplo n.º 12
0
rfest.fit(trainX[varsUsed], trainY[list(['click_bool', 'booking_bool'])])
a4 = rfest.predict(testX[varsUsed])
print(mean_squared_error(testY['click_bool']+5*testY['booking_bool'], a4[:,0]+5*a4[:,1] ))
a2 = rfest.predict(testX[varsUsed])
rfest_tuned_parameters = [{'max_depth':[1,2,4,5],'n_estimators':[50,100,150,200,300]}]
rfestGS = ms.GridSearchCV(RandomForestClassifier(),rfest_tuned_parameters,cv=5,scoring='neg_mean_squared_error')
rfestGS.fit(trainX[varsUsed],trainY[list(['click_bool', 'booking_bool'])])

rfReg = RandomForestRegressor(n_estimators=100)
rfReg.fit(trainX[varsUsed], trainY['score'])
a3 = rfReg.predict(testX[varsUsed])
print(mean_squared_error(testY['score'], est.predict(testX[varsUsed])))
rfreg_tuned_parameters = [{'max_depth':[1,2,3],'n_estimators':[50,100,150,200]}]
rfregGS = ms.GridSearchCV(RandomForestRegressor(),rfreg_tuned_parameters,cv=5,scoring='neg_mean_squared_error')
rfregGS.fit(trainX[varsUsed],trainY['score'])
a1 = ms.ParameterGrid(rfreg_tuned_parameters)
scoresave = np.zeros(len(a1))
for i in range(len(a1)):
    rfregmgs = RandomForestRegressor(**a1[i])
    rfregmgs.fit(trainX[varsUsed],trainY['score'])    
    y_pred = rfregmgs.predict(testX[varsUsed])
    ndcg = util.ndcg.ndcg(testX[['srch_id','prop_id']],  testY['score'], y_pred)
    ndcg
    scoresave[i] = ndcg
    
adaReg = AdaBoostRegressor()
adaReg.fit(trainX[varsUsed],trainY['score'])
print(mean_squared_error(testY['score'],adaReg.predict(testX[varsUsed])))
ada_tuned_parameters = [{'loss':['linear','square'],'learning_rate':[0.5,1,2],'n_estimators':[50,100,150,25]}]
adaGS = ms.GridSearchCV(AdaBoostRegressor(),ada_tuned_parameters,cv=5,scoring='neg_mean_squared_error')
adaGS.fit(trainX[varsUsed],trainY['score'])
Ejemplo n.º 13
0
DATA_DIR = ''
LOG_DIR = ''

NUM_GPUS = 1
GPU_OFFSET = 3

SLACK_TOKEN = ''

model_list = list(
    model_selection.ParameterGrid({
        'model_callable': [models.luke.resnet],
        'dropout_rate1': [0.8],
        'dropout_rate2': [0.8],
        'optimizer': [
            keras.optimizers.Adam(lr=1e-5),
        ],
        'loss': [
            keras.losses.categorical_crossentropy,
        ],
        'freeze': [False],
    }))

model_list = [blueno.ModelConfig(**m) for m in model_list]

data_list = list(
    model_selection.ParameterGrid({
        'data_dir':
        [str(pathlib.Path(DATA_DIR) / 'processed-lower' / 'arrays')],
        'labels_path':
        [str(pathlib.Path(DATA_DIR) / 'processed-lower' / 'labels.csv')],
        'index_col': ['Anon ID'],
Ejemplo n.º 14
0
 def _get_param_iterator(self):
     """Return ParameterGrid instance for the given param_grid"""
     return model_selection.ParameterGrid(self.param_grid)
Ejemplo n.º 15
0
def run_search(path, window_sizes, angles, models, size=0, result_name="search_results", novelty=False, kfold_splits=5):
    DATA_PATH = path
    grid = model_selection.ParameterGrid(get_search_parameter())

    results = pd.DataFrame(
        columns=["model", "model_parameter", "minimal_movement", "sma",
                 "window_overlap", "pls", "window_size", "angle",
                 "sensitivity", "specificity"]
    )

    if not os.path.exists("tmp"):
        os.mkdir("tmp")

    for i, params in enumerate(grid):

        print(f"Running with params: \n{params}")

        if run_done(i, len(window_sizes) * len(angles)):
            print(f"Found parameters in checkpoints, skipping...")
            continue

        generate_fourier(DATA_PATH, window_sizes, size, params)

        # print(f"The number of methods without k-folding are: {str(len(models))}")
        pbar = tqdm(total=len(models) * len(window_sizes) * len(angles) * kfold_splits)

        def update_progress(*a):
            pbar.update()

        for window_size, angle in iterate_angles():

            if result_exist(result_name, i, window_size, angle):
                print(f"Found this combination in checkpoints, skipping...")
                pbar.update(len(models)*kfold_splits)
                continue

            with Manager() as manager:
                synced_results = manager.list()

                data, labels = load_fourier_angle(window_size, angle)

                data_amount = data.shape[0]

                print(f"Data amount:  {data_amount}")

                for batch in chunkify(models, 1):
                    pool = Pool()

                    kfold_parameters = {
                        "batch": batch,
                        "pool": pool,
                        "angle": angle,
                        "splits": kfold_splits,
                        "pls_components": params["pls"],
                        "novelty": novelty
                    }

                    async_kfold(data, labels, kfold_parameters, synced_results, update_progress)

                    pool.close()
                    pool.join()

                print("\nCheckpoint created.")
                checkpoint_name = f"{result_name.split('/')[-1]}_{str(window_size)}_{angle}_{i}.csv"
                dump_results(params, synced_results, window_size, checkpoint_name)
        pbar.close()
    save_and_clean(result_name)
Ejemplo n.º 16
0
def hyperoptimize(hyperparams: Union[blueno.ParamGrid,
                                     List[blueno.ParamConfig]],
                  username: str,
                  slack_token: str = None,
                  num_gpus=1,
                  gpu_offset=0,
                  log_dir: str = None) -> None:
    """
    Runs training jobs on input hyperparameter grid.

    :param hyperparams: a dictionary of parameters. See blueno/types for
    a specification
    :param username: your name
    :param slack_token: a slack token for uploading to GitHub
    :param num_gpus: the number of gpus you will use
    :param gpu_offset: your gpu offset
    :param log_dir: the directory you will too. This directory should already
    exist
    :return:
    """
    if isinstance(hyperparams, blueno.ParamGrid):
        param_list = model_selection.ParameterGrid(hyperparams.__dict__)
    else:
        param_list = hyperparams

    logging.info(
        'optimizing grid with {} configurations'.format(len(param_list)))

    gpu_index = 0
    processes = []
    for params in param_list:
        if isinstance(params, dict):
            params = blueno.ParamConfig(**params)

        check_data_in_sync(params)

        # This is where we'd run preprocessing. To run in a reasonable amount
        # of time, the raw data must be cached in-memory.
        arrays = preprocessing.prepare_data(params, train_test_val=False)
        x_train, x_valid, y_train, y_valid, id_train, id_valid = arrays

        # Start the model training job
        # Run in a separate process to avoid memory issues
        # Note how this depends on offset
        os.environ['CUDA_VISIBLE_DEVICES'] = f'{gpu_index + gpu_offset}'

        if params.job_fn is None:
            job_fn = start_job
        else:
            job_fn = params.job_fn

        logging.debug('using job fn {}'.format(job_fn))

        # Uses the parent of the data_dir to name the job,
        # which may not work for all data formats.
        if params.job_name:
            job_name = params.job_name
        else:
            job_name = str(pathlib.Path(params.data.data_dir).parent.name)
        job_name += f'_{y_train.shape[1]}-classes'

        process = multiprocessing.Process(target=job_fn,
                                          args=(x_train, y_train,
                                                x_valid, y_valid),
                                          kwargs={
                                              'params': params,
                                              'job_name': job_name,
                                              'username': username,
                                              'slack_token': slack_token,
                                              'log_dir': log_dir,
                                              'id_valid': id_valid,
                                          })
        gpu_index += 1
        gpu_index %= num_gpus

        logging.debug(f'gpu_index is now {gpu_index + gpu_offset}')
        process.start()
        processes.append(process)
        if gpu_index == 0:
            logging.info(f'all gpus used, calling join on processes:'
                         f' {processes}')
        p: multiprocessing.Process
        for p in processes:
            p.join()
        processes = []
        time.sleep(60)
Ejemplo n.º 17
0
resize = (3 / 4, 3 / 4)

try:
    with open(save_path + video + '_test' + '.pickle', 'rb') as file:
        with open(save_path + 'last_stop' + video + '.npy', 'r') as stop_file:

            last_stop = np.fromfile(stop_file, dtype=np.uint32)
            print(last_stop)
            grid = pickle.load(file)
        print('No error in filehandling')
except (EOFError, FileNotFoundError) as e:
    print('First time for this video.')
    last_stop = [np.uint32(0)]
    grid = selection.ParameterGrid(
        dict(maxCorners=np.arange(10, 511, 250),
             qualityLevel=np.array([0.1, 0.5, 0.9]),
             minDistance=np.arange(1, 62, 30),
             blockSize=np.array([2, 3, 5])))

    with open(save_path + video + '_test' + '.pickle', 'wb') as file:
        pickle.dump(grid, file)

video_path = folder + video
print('video_path: ' + video_path)

if last_stop[-1] < len(grid) - 1:
    for y in range(last_stop[-1], len(grid)):
        print('Job %s of %s' % (str(y), str(len(grid))))
        kt = tr.keypoint_tracker(video_path, start_frame=start, end_frame=end)
        results = [
            video_path, [grid[y]], [lk_params], filterType, filterSize, resize