예제 #1
0
def optimize_a_b_step(data, template, y, a, b, idx_out_train, idx_out_test,
                      pipeline_params, pad_size):
    test_score_prediction = import_func(**pipeline_params['prediction_func'])
    count_grads_a_b = import_func(**pipeline_params['count_grads_a_b'])
    reg = pipeline_params['pipeline_optimization_params']

    y_out_train = y[idx_out_train]
    kwargs = create_kwargs(pipeline_params, data, template, a, b,
                           idx_out_train, False, reg['add_padding'], pad_size)
    K, da, db = count_dist_matrix_to_template(**kwargs)

    K_out_train = K[np.ix_(idx_out_train, idx_out_train)]

    best_params = find_pipeline_params(
        K_out_train,
        y_out_train,
        pipeline_params['ml_params'],
        pipeline_params['n_jobs'],
        random_state=pipeline_params['random_state'],
        scaled=pipeline_params['scaled'],
        scoring='roc_auc',
        n_splits=pipeline_params['n_splits'],
        kernel=pipeline_params['kernel'])

    test_score, test_loss = test_score_prediction(K=K,
                                                  y=y,
                                                  idx_train=idx_out_train,
                                                  idx_test=idx_out_test,
                                                  params=best_params)

    grads_da, grads_db, train_score, train_loss = count_grads_a_b(
        exp_K=K_out_train,
        y=y_out_train,
        da=da[np.ix_(idx_out_train, idx_out_train)],
        db=db[np.ix_(idx_out_train, idx_out_train)],
        params=best_params,
        n_splits=pipeline_params['n_splits'],
        random_state=pipeline_params['random_state'])

    gc.collect()

    return best_params, grads_da, grads_db, train_score, test_score, train_loss, test_loss
예제 #2
0
def create_template(path_to_data, train_idx, path_to_template, template_name,
                    resolution, sigma=0.01, load_func_template=None):
    images = []

    if isinstance(path_to_data[0], (str, np.string_, np.unicode_)):
        assert load_func_template is not None, "if data given by full path, you should provide loader"
        load_images = import_func(**load_func_template)
        images = load_images(path_to_data[np.ix_(train_idx)])

    if isinstance(path_to_data[0], np.ndarray):
        images = path_to_data[np.ix_(train_idx)]

    if resolution != 1:
        images = change_resolution(images, resolution, sigma)

    images = np.array(images)
    template = np.mean(images, axis=0)

    save_nii(template, os.path.join(path_to_template, template_name))
    return template
예제 #3
0
def metric_learning_to_template(PATH):
    # path to experiment config
    print("START EXPERIMENT")
    pipeline_params = json.load(open(PATH, 'r'))
    # extract params to shorter usage
    pipeline_main_loop = import_func(**pipeline_params['pipeline_main_loop'])

    random_state = pipeline_params['random_state']
    experiment_name = pipeline_params['experiment_name']
    experiment_path = os.path.join(pipeline_params['path_to_exp'],
                                   experiment_name)
    path_to_template = os.path.join(experiment_path, 'templates/')
    template_name = 'template_0.nii'

    # create folder and path
    create_exp_folders(experiment_path, params=pipeline_params)

    print('experiment name: ', experiment_name)

    load_data = import_func(**pipeline_params['load_func'])
    data, y = load_data(**pipeline_params['load_params'])

    if isinstance(data[0], (str, np.unicode, np.str, np.string_, np.unicode_)):
        np.savez(os.path.join(experiment_path, 'data_path.npz'),
                 np.array(data))

    if pipeline_params['subset'] != 1.:
        data, y = get_subset(data, y, pipeline_params['subset'],
                             pipeline_params['random_state'])
    np.savez(os.path.join(experiment_path, 'target.npz'), y)
    print("Data size: ", data.shape, " target mean: ", y.mean())

    # create splits for (train+val) and test
    idx_out_train, idx_out_test = list(
        StratifiedShuffleSplit(n_splits=1,
                               test_size=0.3,
                               random_state=random_state).split(
                                   np.arange(len(data)), y))[0]

    splits = {
        'train_val': idx_out_train.tolist(),
        'test': idx_out_test.tolist()
    }
    save_params(experiment_path, 'splits_indices', splits)

    # create template on train data and save it
    template = create_template(
        path_to_data=data,
        train_idx=idx_out_train,
        path_to_template=os.path.join(experiment_path, 'templates/'),
        template_name=template_name,
        resolution=pipeline_params['resolution'],
        load_func_template=pipeline_params['load_func_template'])

    # check if template needs padding
    if check_for_padding(template):
        template = pad_template_data_after_loop(
            template.copy(),
            os.path.join(path_to_template, template_name),
            pad_size=pipeline_params['pipeline_optimization_params']
            ['pad_size'],
            ndim=pipeline_params['ndim'])

        pipeline_params['pipeline_optimization_params']['add_padding'] = True
        pad_size = pipeline_params['pipeline_optimization_params']['pad_size']
    else:
        pad_size = 0

    pipeline_main_loop(data=data,
                       template=template,
                       y=y,
                       idx_out_train=idx_out_train,
                       idx_out_test=idx_out_test,
                       experiment_path=experiment_path,
                       path_to_template=path_to_template,
                       template_name=template_name,
                       pipeline_params=pipeline_params,
                       pad_size=pad_size)

    print("FINISHED")
예제 #4
0
def sparse_dot_product_forward(vector,
                               ndim,
                               mat_shape,
                               T,
                               loss,
                               window,
                               params_grad,
                               param_der,
                               n_jobs=10):
    mat_len = int(np.prod(mat_shape))
    # assert ndim * mat_len == len(vector), "not correct shape of vector"

    derivative_func = import_func(**param_der)

    deltas = list(
        itertools.product(range(-window, window + 1), repeat=vector.ndim - 2))
    mn, mx = (0, ) * ndim, vector.shape[2:]

    data, rows, cols = [], [], []

    for ax in range(ndim):
        result = Parallel(n_jobs=n_jobs, temp_folder='~/JOBLIB_TMP_FOLDER/')(
            delayed(one_der)(i, ax, T, mat_shape, mat_len, deltas, mn, mx,
                             derivative_func, vector, loss, params_grad)
            for i in tqdm(np.ndindex(*vector.shape[2:]), desc='dJ_der'))

        loc_data, loc_rows, loc_cols = map(np.concatenate, zip(*result))
        data.extend(loc_data)
        rows.extend(loc_rows)
        cols.extend(loc_cols)

    gc.collect()
    for i in range(min(mat_shape)):
        I = matrix_to_vec_indices(i, mat_shape)
        for ax in itertools.combinations(range(ndim), 2):
            der = derivative_func(i=(
                T,
                ax[0],
            ) + (i, ) * ndim,
                                  j=(
                                      T,
                                      ax[1],
                                  ) + (i, ) * ndim,
                                  vf=np.copy(vector),
                                  loss=loss,
                                  **params_grad)
            # if np.abs(der) > 1e-15:

            i_loc = I + ax[0] * mat_len
            j_loc = I + ax[1] * mat_len

            data.extend([der, der])
            rows.extend([i_loc, j_loc])
            cols.extend([j_loc, i_loc])

    shape = (ndim * mat_len, ndim * mat_len)
    result = coo_matrix((data, (rows, cols)), shape=shape)

    regul = np.real(ifftn(params_grad['reg'].regularizer.operator))
    r = np.arange(int(ndim * mat_len))

    reg = coo_matrix((np.repeat(regul.reshape(-1), ndim), (r, r)), shape=shape)
    try:
        return inv(result + reg)
    except:
        regul2 = coo_matrix((np.repeat(1e-8, len(r)), (r, r)), shape=shape)
        return inv(result + reg + regul2)
예제 #5
0
def pipeline_main_loop_template_only(data, template, y, idx_out_train,
                                     idx_out_test, experiment_path,
                                     path_to_template, template_name,
                                     pipeline_params, pad_size):
    # initialize learning rate changing strategy
    lr_change = import_func(**pipeline_params['lr_type'])
    lr_params = pipeline_params['lr_change_params'][pipeline_params['lr_type']
                                                    ['func']]
    template_updates = pipeline_params['template_updates']
    reg = pipeline_params['pipeline_optimization_params']
    lr = template_updates['lr']
    it = 1
    a = pipeline_params['pipeline_optimization_params']['a']
    b = pipeline_params['pipeline_optimization_params']['b']

    # create a resulting data frame
    if pipeline_params['kernel']:
        results = pd.DataFrame(columns=[
            "iter", "a", "b", "kernel gamma", "LR C ", "train_score",
            "train_loss", "test_score", "test_loss", "one_loop_time",
            "pad_size"
        ])
    else:
        results = pd.DataFrame(columns=[
            "iter", "a", "b", "LR C ", "train_score", "train_loss",
            "test_score", "test_loss", "one_loop_time", "pad_size"
        ])

    print('For params a {} and b {}'.format(a, b))
    test_score_prediction = import_func(**pipeline_params['prediction_func'])
    count_grads_template = import_func(
        **pipeline_params['count_grads_template'])

    add_padding = reg['add_padding']

    kwargs = create_kwargs(pipeline_params, data, template, a, b,
                           idx_out_train, True, add_padding, pad_size)
    y_out_train = y[idx_out_train]

    while it < pipeline_params['Number_of_iterations']:

        st = time.time()

        print('For iter {}'.format(int(it)))

        K, da, db, dJ = count_dist_matrix_to_template(**kwargs)

        K_path = os.path.join(
            os.path.join(pipeline_params['path_to_exp'],
                         pipeline_params['experiment_name']), 'kernel')
        np.savez(os.path.join(K_path, 'dJ_' + str(it) + '.npz'), dJ)
        np.savez(os.path.join(K_path, 'kernel_' + str(it) + '.npz'), K)

        K_out_train = K[np.ix_(idx_out_train, idx_out_train)]

        best_params = find_pipeline_params(
            K_out_train,
            y_out_train,
            pipeline_params['ml_params'],
            pipeline_params['n_jobs'],
            random_state=pipeline_params['random_state'],
            scaled=pipeline_params['scaled'],
            scoring='roc_auc',
            n_splits=pipeline_params['n_splits'],
            kernel=pipeline_params['kernel'])

        test_score, test_loss = test_score_prediction(K=K,
                                                      y=y,
                                                      idx_train=idx_out_train,
                                                      idx_test=idx_out_test,
                                                      params=best_params)

        grads_da, grads_db, grads_dJ, train_score, train_loss = count_grads_template(
            exp_K=K_out_train,
            y=y_out_train,
            da=da[np.ix_(idx_out_train, idx_out_train)],
            db=db[np.ix_(idx_out_train, idx_out_train)],
            dJ=dJ,
            params=best_params,
            n_splits=pipeline_params['n_splits'],
            ndim=pipeline_params['ndim'],
            random_state=pipeline_params['random_state'],
            kernel=pipeline_params['kernel'])
        np.savez(os.path.join(experiment_path, f'grads_dJ/grad_dJ_{it}.npz'),
                 grads_dJ)
        # delta = preprocess_delta_template(grads_dJ, axis=template_updates['template_axis'],
        #                                   contour_color=template_updates['color'],
        #                                   width=template_updates['width'],
        #                                   ndim=pipeline_params['ndim'])
        # np.savez(os.path.join(experiment_path, f'grads_dJ/delta_dJ_{it}.npz'), delta)
        # lr_loc = lr * int(np.max(template))/ np.max(grads_dJ) # normalize on 255,  0.1 * 255 /np.max()
        lr_loc = template_updates['lr'] * np.max(template) / np.std(grads_dJ)
        template_name = template_name.split('_')[0] + '_' + str(it) + '.nii'
        template = update_template(template, path_to_template, template_name,
                                   grads_dJ, lr_loc)

        if check_for_padding(template):
            template = pad_template_data_after_loop(
                template.copy(),
                os.path.join(path_to_template, template_name),
                pad_size=reg['pad_size'],
                ndim=pipeline_params['ndim'])

            kwargs['add_padding'] = True
            kwargs['pad_size'] += reg['pad_size']

        if pipeline_params['kernel']:
            results.loc[it - 1] = [
                it, a, b, best_params['kernel__gamma'], best_params['ml__C'],
                train_score, train_loss, test_score, test_loss,
                time.time() - st, pad_size
            ]
        else:
            results.loc[it - 1] = [
                it, a, b, best_params['ml__C'], train_score, train_loss,
                test_score, test_loss,
                time.time() - st, pad_size
            ]

        kwargs['template'] = template

        it += 1

        lr = lr_change(prev_lr=lr,
                       it=it,
                       step=lr_params['step'],
                       decay=lr_params['decay'])

        results.to_csv(os.path.join(experiment_path, 'results.csv'))

        gc.collect()
예제 #6
0
def optimize_template_step(data, template, y, a, b, idx_out_train,
                           idx_out_test, pipeline_params, template_name,
                           path_to_template, pad_size, it, lr):
    test_score_prediction = import_func(**pipeline_params['prediction_func'])
    count_grads_template = import_func(
        **pipeline_params['count_grads_template'])

    experiment_name = pipeline_params['experiment_name']
    experiment_path = os.path.join(pipeline_params['path_to_exp'],
                                   experiment_name)

    template_updates = pipeline_params['template_updates']
    reg = pipeline_params['pipeline_optimization_params']

    add_padding = reg['add_padding']
    y_out_train = y[idx_out_train]

    kwargs = create_kwargs(pipeline_params, data, template, a, b,
                           idx_out_train, True, add_padding, pad_size)
    K, da, db, dJ = count_dist_matrix_to_template(**kwargs)

    K_path = os.path.join(
        os.path.join(pipeline_params['path_to_exp'],
                     pipeline_params['experiment_name']), 'kernel')
    np.savez(os.path.join(K_path, 'dJ_' + str(it) + '.npz'), dJ)
    np.savez(os.path.join(K_path, 'kernel_' + str(it) + '.npz'), K)

    K_out_train = K[np.ix_(idx_out_train, idx_out_train)]

    best_params = find_pipeline_params(
        K_out_train,
        y_out_train,
        pipeline_params['ml_params'],
        pipeline_params['n_jobs'],
        random_state=pipeline_params['random_state'],
        scaled=pipeline_params['scaled'],
        scoring='roc_auc',
        n_splits=pipeline_params['n_splits'],
        kernel=pipeline_params['kernel'])

    test_score, test_loss = test_score_prediction(K=K,
                                                  y=y,
                                                  idx_train=idx_out_train,
                                                  idx_test=idx_out_test,
                                                  params=best_params)

    grads_da, grads_db, grads_dJ, train_score, train_loss = count_grads_template(
        exp_K=K_out_train,
        y=y_out_train,
        da=da[np.ix_(idx_out_train, idx_out_train)],
        db=db[np.ix_(idx_out_train, idx_out_train)],
        dJ=dJ,
        params=best_params,
        n_splits=pipeline_params['n_splits'],
        ndim=pipeline_params['ndim'],
        random_state=pipeline_params['random_state'],
        kernel=pipeline_params['kernel'])
    np.savez(os.path.join(experiment_path, f'grads_dJ/grad_dJ_{it}.npz'),
             grads_dJ)
    # delta = preprocess_delta_template(grads_dJ, axis=template_updates['template_axis'],
    #                                   contour_color=template_updates['color'],
    #                                   width=template_updates['width'],
    #                                   ndim=pipeline_params['ndim'])

    # np.savez(os.path.join(experiment_path, f'grads_dJ/delta_dJ_{it}.npz'), delta)

    lr_loc = template_updates['lr'] * np.max(template) / np.std(
        grads_dJ)  # lr * int(np.max(template))/ np.max(grads_dJ)
    template_name = template_name.split('_')[0] + '_' + str(it) + '.nii'
    template = update_template(template, path_to_template, template_name,
                               grads_dJ, lr_loc)

    if check_for_padding(template):
        template = pad_template_data_after_loop(template.copy(),
                                                os.path.join(
                                                    path_to_template,
                                                    template_name),
                                                pad_size=reg['pad_size'],
                                                ndim=pipeline_params['ndim'])

        add_padding = True
        pad_size += reg['pad_size']
        kwargs['add_padding'] = True
        kwargs['pad_size'] += reg['pad_size']

    gc.collect()
    return template, best_params, grads_da, grads_db, train_score, test_score, train_loss, test_loss, add_padding, pad_size
예제 #7
0
def pipeline_main_loop(data, template, y, idx_out_train, idx_out_test,
                       experiment_path, path_to_template, template_name,
                       pipeline_params, pad_size):
    # initialize learning rate changing strategy
    lr_change = import_func(**pipeline_params['lr_type'])
    lr_params = pipeline_params['lr_change_params'][pipeline_params['lr_type']
                                                    ['func']]
    lr = lr_params['init_lr']
    it = 1
    a_it = [0., pipeline_params['pipeline_optimization_params']['a']]
    b_it = [0., pipeline_params['pipeline_optimization_params']['b']]

    mta, vta = 0., 0.
    mtb, vtb = 0., 0.
    # start param for optimization
    optim_template = pipeline_params['start_optim_template']
    # create a resulting data frame
    if pipeline_params['kernel']:
        results = pd.DataFrame(columns=[
            "iter", "a", "b", "kernel gamma", "LR C ", "train_score",
            "train_loss", "test_score", "test_loss", "one_loop_time",
            "pad_size"
        ])
    else:
        results = pd.DataFrame(columns=[
            "iter", "a", "b", "LR C ", "train_score", "train_loss",
            "test_score", "test_loss", "one_loop_time", "pad_size"
        ])

    while (abs(a_it[-1] - a_it[-2]) + abs(b_it[-1] - b_it[-2])) > 1e-10 or \
            it < pipeline_params['Number_of_iterations']:

        st = time.time()

        print('For iter {}'.format(int(it)))
        print('For params a {} and b {}'.format(a_it[-1], b_it[-1]))

        if optim_template:

            template, best_params, grads_da, grads_db, train_score, test_score, train_loss, test_loss, add_padding, pad_size = optimize_template_step(
                data.copy(), template, y.copy(), a_it[-1], b_it[-1],
                idx_out_train, idx_out_test, pipeline_params, template_name,
                path_to_template, pad_size, it,
                pipeline_params['template_updates']['lr'])

            if add_padding:
                pipeline_params['pipeline_optimization_params'][
                    'add_padding'] = add_padding

        else:
            best_params, grads_da, grads_db, train_score, test_score, train_loss, test_loss = optimize_a_b_step(
                data.copy(), template, y.copy(), a_it[-1], b_it[-1],
                idx_out_train, idx_out_test, pipeline_params, pad_size)

        adam_grad_da, mta, vta = adam_step(grads_da, mta, vta, it)
        adam_grad_db, mtb, vtb = adam_step(grads_db, mtb, vtb, it)

        if pipeline_params['kernel']:
            results.loc[it - 1] = [
                it, a_it[-1], b_it[-1], best_params['kernel__gamma'],
                best_params['ml__C'], train_score, train_loss, test_score,
                test_loss,
                time.time() - st, pad_size
            ]
        else:
            results.loc[it - 1] = [
                it, a_it[-1], b_it[-1], best_params['ml__C'], train_score,
                train_loss, test_score, test_loss,
                time.time() - st, pad_size
            ]

        print("one loop time: ", time.time() - st)

        a_it += [a_it[-1] - lr * adam_grad_da]
        # b_it += [b_it[-1] - lr * adam_grad_db]

        it += 1

        lr = lr_change(prev_lr=lr,
                       it=it,
                       step=lr_params['step'],
                       decay=lr_params['decay'])

        optim_template = optim_template_strategy(
            it, pipeline_params['step_size_optim_template'])

        results.to_csv(os.path.join(experiment_path, 'results.csv'))

        gc.collect()