Beispiel #1
0
def svr_lin_objective(input_dict):
    from sklearn.svm import SVR
    from sklearn.metrics.pairwise import linear_kernel
    from sklearn.externals import joblib
    from multichannel import MultiChannelModel, multichannel_KFoldCV
    import numpy as np
    import csv, os, time
    
    #load the dataset
    dataset_root = '/home/luke/projects/THE_dataset' #directory where features/labels kept
    train_path = os.path.join(dataset_root, 'train_set_wc3d.pkl')
    test_path = os.path.join(dataset_root, 'test_set_wc3d.pkl')
    X_train, y_train = joblib.load(train_path)
    X_test, y_test = joblib.load(test_path)

    #run the exp
    C = input_dict['C']
    kpt = {'kernel_func': linear_kernel, 'param_dict': {}}
    model = SVR(kernel='precomputed', C=C)
    mcm = MultiChannelModel(num_channels=6, model=model, kernel_param_tuple=kpt)
    scores = multichannel_KFoldCV(mcm, X_train, y_train, n_folds=3, verbose=False)
    loss = 1-np.mean(scores)
    eval_time = time.time()
    
    #logging
    with open('svr_lin_log.csv','a') as f:
        fc = csv.writer(f)
        row = [loss, eval_time, C]
        fc.writerow(row)

    print loss, input_dict
    return {'loss': loss, 'eval_time': eval_time}
Beispiel #2
0
    def test_kfold(self):

        #dummy data
        X_channel = np.array([[0.1, 0.5], [0.5, 0.1], [0.1, 0.6],
                              [0.7, 0.1]])  #the folds are the same
        X_channel = np.vstack((X_channel, X_channel))
        X_train = (X_channel, X_channel + .01)

        Y = np.array([1, 10, 1, 10])
        Y = np.hstack((Y, Y))

        #set up the model
        model = SVR(kernel='precomputed', C=1)
        mcm = MultiChannelModel(num_channels=2, model=model)

        #run a CV
        score_list = multichannel_KFoldCV(mcm,
                                          X_train,
                                          Y,
                                          n_folds=2,
                                          verbose=True)

        #assertions
        print score_list
        self.assertTrue(len(score_list) == 2)
        self.assertTrue(score_list[0] == score_list[1] > .5)
Beispiel #3
0
    def test_mcm(self):
        X_channel = np.array([[0.1, 0.5], [0.5, 0.1], [0.1, 0.6], [0.7, 0.1]])
        X_train = (X_channel, X_channel)
        X_test = (X_channel + .1, X_channel + .01)
        Y = np.array([1, 10, 1, 10])

        #set up the model
        model = SVR(kernel='precomputed', C=1)
        mcm = MultiChannelModel(num_channels=2, model=model)

        #train
        mcm.fit(X_train, Y)

        #predict
        Y_pred = mcm.predict(X_test)

        #assertions
        self.assertTrue(Y.shape == (4, ))

        #scores
        train_score = mcm.score(X_train, Y)
        test_score = mcm.score(X_test, Y)

        #assertions
        print 'train_score = {0}, test_score = {1}'.format(
            train_score, test_score)
        self.assertTrue(isinstance(test_score, float))
        self.assertTrue(isinstance(train_score, float))
        self.assertTrue(test_score < train_score)
def objective(x):
    from sklearn.svm import SVR
    from sklearn.metrics.pairwise import linear_kernel
    from sklearn.externals import joblib
    from multichannel import MultiChannelModel, multichannel_KFoldCV, theano_rbf as rbf_kernel, theano_chi2 as chi2_kernel
    import numpy as np
    import csv, os, time

    #output the time taken
    t0 = time.time()

    def create_kpt(num_channels, gammas):
        kernel_param_list = []
        for channel in xrange(num_channels):
            if channel < 4:
                kdict = {'kernel_func': chi2_kernel, 'param_dict': {'gamma': gammas[channel]}}
            #elif channel == 5:
            #    kdict = {'kernel_func': rbf_kernel, 'param_dict': {'gamma': gammas[channel]}}
            kernel_param_list.append(kdict)
        kernel_param_tuple = tuple(kernel_param_list)
        return kernel_param_tuple

    #load the dataset
    dataset_root = '/home/luke/projects/THE_dataset' #directory where features/labels kept
    train_path = os.path.join(dataset_root, 'train_set_wc3d.pkl')
    test_path = os.path.join(dataset_root, 'test_set_wc3d.pkl')
    X_train, y_train = joblib.load(train_path)
    X_test, y_test = joblib.load(test_path)
    num_channels=5

    X_train = (X_train[i] for i in xrange(num_channels-1))
    X_test = (X_test[i] for i in xrange(num_channels-1))

    #run the exp
    gammas = [x['traj_gamma'], x['hog_gamma'], x['hof_gamma'],
              x['mbhx_gamma'], x['mbhy_gamma']]
    kpt = create_kpt(num_channels, gammas)
    C = x['C']
    model = SVR(kernel='precomputed', C=C)
    mcm = MultiChannelModel(num_channels=num_channels, model=model, kernel_param_tuple=kpt)
    scores = multichannel_KFoldCV(mcm, X_train, y_train, n_folds=3, verbose=False)
    loss = 1-np.mean(scores)
    eval_time = time.time()

    #logging
    with open('svr_nonlin_IDT_log.csv','a') as f:
        fc = csv.writer(f)
        row = [loss, eval_time,C] + gammas
        fc.writerow(row)

    print x, loss, 'time taken: {}'.format(time.time()-t0)
    return {'loss': loss, 'eval_time': eval_time}
def objective(x):
    from sklearn.linear_model import Ridge
    from sklearn.metrics.pairwise import linear_kernel
    from sklearn.externals import joblib
    from multichannel import MultiChannelModel, multichannel_KFoldCV, theano_rbf as rbf_kernel, theano_chi2 as chi2_kernel
    import numpy as np
    import csv, os, time

    #output the time taken
    t0 = time.time()

    def create_kpt(num_channels, gammas):
        kernel_param_list = []
        for channel in xrange(num_channels):
            if channel < 4:
                kdict = {
                    'kernel_func': chi2_kernel,
                    'param_dict': {
                        'gamma': gammas[channel]
                    }
                }
            elif channel == 5:
                kdict = {
                    'kernel_func': rbf_kernel,
                    'param_dict': {
                        'gamma': gammas[channel]
                    }
                }
            kernel_param_list.append(kdict)
        kernel_param_tuple = tuple(kernel_param_list)
        return kernel_param_tuple

    #load the dataset
    dataset_root = '/home/luke/projects/THE_dataset'  #directory where features/labels kept
    train_path = os.path.join(dataset_root, 'train_set_wc3d.pkl')
    test_path = os.path.join(dataset_root, 'test_set_wc3d.pkl')
    X_train, y_train = joblib.load(train_path)
    X_test, y_test = joblib.load(test_path)

    #run the exp
    #set the krnel gammas set from previous experiment
    gammas = [0.06386, 0.117941, 0.060457, 0.180092, 3405.482, 0.79326]
    kpt = create_kpt(6, gammas)

    #set the alpha level
    alpha = x['alpha']

    #set the channel weights
    cw = [
        x['traj_cw'], x['hog_cw'], x['hof_cw'], x['mbhx_cw'], x['mbhy_cw'],
        x['c3d_cw']
    ]

    model = Ridge(alpha=alpha)
    mcm = MultiChannelModel(num_channels=6,
                            model=model,
                            kernel_param_tuple=kpt,
                            channel_weights=cw)
    scores = multichannel_KFoldCV(mcm,
                                  X_train,
                                  y_train,
                                  n_folds=3,
                                  verbose=False)
    loss = 1 - np.mean(scores)
    eval_time = time.time()

    print 'params: {0}, loss: {1}, time taken: {2}'.format(
        x, loss,
        time.time() - t0)
    return {'loss': loss, 'eval_time': eval_time}
Beispiel #6
0
def objective(x):
    from sklearn.linear_model import Ridge
    from sklearn.metrics.pairwise import linear_kernel
    from sklearn.externals import joblib
    from multichannel import MultiChannelModel, multichannel_KFoldCV, theano_rbf as rbf_kernel, theano_chi2 as chi2_kernel
    import numpy as np
    import csv, os, time

    #output the time taken
    t0 = time.time()

    def create_kpt(num_channels, gammas):
        kernel_param_list = []
        for channel in xrange(num_channels):
            if channel < 4:
                kdict = {
                    'kernel_func': chi2_kernel,
                    'param_dict': {
                        'gamma': gammas[channel]
                    }
                }
            elif channel == 5:
                kdict = {
                    'kernel_func': rbf_kernel,
                    'param_dict': {
                        'gamma': gammas[channel]
                    }
                }
            kernel_param_list.append(kdict)
        kernel_param_tuple = tuple(kernel_param_list)
        return kernel_param_tuple

    #load the dataset
    dataset_root = '/home/luke/projects/THE_dataset'  #directory where features/labels kept
    train_path = os.path.join(dataset_root, 'train_set_wc3d.pkl')
    test_path = os.path.join(dataset_root, 'test_set_wc3d.pkl')
    X_train, y_train = joblib.load(train_path)
    X_test, y_test = joblib.load(test_path)

    #run the exp
    #set the krnel gammas
    gammas = [
        x['traj_gamma'], x['hog_gamma'], x['hof_gamma'], x['mbhx_gamma'],
        x['mbhy_gamma'], x['c3d_gamma']
    ]
    kpt = create_kpt(6, gammas)

    #set the alpha level
    alpha = x['alpha']

    #set the channel weights
    cw = [
        x['traj_cw'], x['hog_cw'], x['hof_cw'], x['mbhx_cw'], x['mbhy_cw'],
        x['c3d_cw']
    ]

    model = Ridge(alpha=alpha)
    mcm = MultiChannelModel(num_channels=6,
                            model=model,
                            kernel_param_tuple=kpt,
                            channel_weights=cw)
    scores = multichannel_KFoldCV(mcm,
                                  X_train,
                                  y_train,
                                  n_folds=3,
                                  verbose=False)
    loss = 1 - np.mean(scores)
    eval_time = time.time()

    #logging
    with open('ridge_nonlin_cweights_log.csv', 'a') as f:
        fc = csv.writer(f)
        row = [loss, eval_time, alpha] + gammas + cw
        fc.writerow(row)

    print x, loss, 'time taken: {}'.format(time.time() - t0)
    return {'loss': loss, 'eval_time': eval_time}