Ejemplo n.º 1
0
    def InitialAlignment(self, scale=0.15):
        """ Compute SVD and align object to be in a certain coordinate frame.
        
        Usage: model.InitialAlignment(scale)

        Input:
            scale - Desired scale for object. Scale is defined as the length
            along the leading eigenvector, in meters.
        """

        pts3D = self.pts3D

        # Compute eigenvecs and rotate according to them
        pc, evals, mean = utils.pca(pts3D, remove_mean=True)
        pts3D_rot = np.dot(pc.T, pts3D)

        # Find length according to max eigenvector
        mins = np.min(pts3D_rot, axis=1)
        maxs = np.max(pts3D_rot, axis=1)
        max_length = maxs[0] - mins[0]

        # Rotation matrix is the covariance matrix, but we want Z as the leading
        # eigenvector:
        rot = np.c_[-pc[2], pc[1], pc[0]]

        # Transform model to have zero mean, reasonable scale and rotation.
        self.transform(rot, np.dot(rot, -mean), float(scale) / max_length)
Ejemplo n.º 2
0
    def InitialAlignment(self, scale = 0.15):
        """ Compute SVD and align object to be in a certain coordinate frame.
        
        Usage: model.InitialAlignment(scale)

        Input:
            scale - Desired scale for object. Scale is defined as the length
            along the leading eigenvector, in meters.
        """


        pts3D = self.pts3D

        # Compute eigenvecs and rotate according to them
        pc, evals, mean = utils.pca(pts3D, remove_mean = True)
        pts3D_rot = np.dot(pc.T, pts3D)

        # Find length according to max eigenvector
        mins = np.min(pts3D_rot, axis=1)
        maxs = np.max(pts3D_rot, axis=1)
        max_length = maxs[0] - mins[0]
        
        # Rotation matrix is the covariance matrix, but we want Z as the leading
        # eigenvector:
        rot = np.c_[-pc[2], pc[1], pc[0]]

        # Transform model to have zero mean, reasonable scale and rotation.
        self.transform(rot, np.dot(rot, -mean), float(scale) / max_length)
def exercicio1():
    utils.print_header(1)
    x, y, labels = load_iris(os.path.join(constants.DATA_DIR, constants.FILENAME_IRIS_DATABASE))
    a, d = x.shape  # N samples, d attributes

    print('a)')
    for i in range(d):
        print('\tAttribute {}: Mean={:.3f}, Variance={:.3f}'.format(i, utils.mean(x[:, i]), utils.variance(x[:, i])))

    print('b)')
    for i in range(labels.shape[0]):
        print('\tClass {}: {}'.format(i, labels[i]))
        for j in range(d):
            print('\t\tAttribute {}: Mean={:.3f}, Variance={:.3f}'.format(
                j, utils.mean(x[(y == i)[:, 0], j]), utils.variance(x[(y == i)[:, 0], j]))
            )

    print('c)')
    print('\tThe histograms will be displayed')
    f, ax = plt.subplots(1, d, sharex=False, sharey=True)
    for j in range(d):
        # show title only in the top
        ax[j].set_title('Attribute {}'.format(j))
        hist_bins = np.linspace(x[:, j].min(), x[:, j].max(), num=16)
        ax[j].hist(np.vstack([
            x[(y == i)[:, 0], j]
            for i in range(labels.shape[0])
        ]).T, bins=hist_bins, linewidth=0, color=['r', 'b', 'g'])
    plot_fname = os.path.join(constants.OUTPUT_DIR, 'exercicio1-c.pdf')
    plt.legend(labels, loc='upper center', bbox_to_anchor=(0.5, 0.07), ncol=3, bbox_transform=plt.gcf().transFigure)
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.15)
    f.set_figheight(3)
    f.set_figwidth(8)
    plt.savefig(plot_fname, bbox_inches='tight')
    plt.show()
    print('\tThis plot was saved: {}'.format(plot_fname))

    print('d)')
    print('\tA plot will be displayed...')
    x_pca = utils.pca(x, n_components=2)
    # format the plot to mimic Slide 21 of Aula 3
    x_pca[:, 1] *= -1
    a = plt.scatter(x_pca[np.where(y == 0)[0], 1], x_pca[np.where(y == 0)[0], 0], c='r', marker='^', lw=0, s=100)
    b = plt.scatter(x_pca[np.where(y == 1)[0], 1], x_pca[np.where(y == 1)[0], 0], c='b', marker='o', lw=0, s=100)
    c = plt.scatter(x_pca[np.where(y == 2)[0], 1], x_pca[np.where(y == 2)[0], 0], c='g', marker='s', lw=0, s=100)
    plt.xlim([-1.5, 1.5])
    plt.ylim([-4, 4])
    plt.legend((a, b, c), tuple(labels), loc='upper left', fontsize=10)
    plot_fname = os.path.join(constants.OUTPUT_DIR, 'exercicio1-d.pdf')
    plt.savefig(plot_fname, bbox_inches='tight')
    plt.show()
    print('\tThis plot was saved: {}'.format(plot_fname))
Ejemplo n.º 4
0
def extract_feature(train_img, test_img, path=None):
    """
    This help to compute feature for knn from pretrained network
    :param FLAGS:
    :param ckpt_path:
    :return:
    """

    # check if a certain variable has been saved in the model

    if config.extract_feature == 'feature':
        dir_path = os.path.join(config.save_model, config.dataset)
        dir_path = os.path.join(dir_path,
                                'knn_num_neighbor_' + str(config.nb_teachers))
        filename = str(config.nb_teachers) + '_stdnt_resnet.checkpoint.pth.tar'
        filename = os.path.join(dir_path, filename)
        train_feature = network.pred(train_img, filename, return_feature=True)
        test_feature = network.pred(test_img, filename, return_feature=True)
        print('shape of extract feature', train_feature.shape)
        return train_feature, test_feature
        #return utils.pca(test_feature, train_feature)
    if config.extract_feature == 'hog':
        # usually the file to save all hog is too large. we decompose it into 10 pieces.
        train_data = None
        each_length = int((9 + len(train_img)) / 10)
        for idx in range(10):
            #to save pkl into several small pieces

            train_hog_path = os.path.join(
                config.hog_path, config.dataset + str(idx) + '_train_hog.pkl')
            if os.path.exists(train_hog_path) == False:
                p1 = idx * each_length
                p2 = min((idx + 1) * each_length, len(train_img))
                print('save_hog_pkl for interval{} : {}'.format(p1, p2))
                utils.save_hog(train_img[p1:p2], train_hog_path)

            with open(train_hog_path, 'rb') as f:
                if train_data is not None:
                    train_data = np.vstack((train_data, pickle.load(f)))
                else:
                    train_data = pickle.load(f)
            print('load hog feature shape', train_data.shape)
        test_hog_path = os.path.join(config.hog_path,
                                     config.dataset + '_test_hog.pkl')
        if os.path.exists(test_hog_path) == False:
            utils.save_hog(test_img, test_hog_path)
        with open(test_hog_path, 'rb') as f:
            test_data = pickle.load(f)
        return train_data, test_data
    if config.extract_feature == 'pca':
        return utils.pca(test_img, train_img)
Ejemplo n.º 5
0
def extract_feature(train_img, test_img, path=None):
    """
    This help to compute feature for knn from pretrained network
    :param FLAGS:
    :param ckpt_path:
    :return:
    """

    # check if a certain variable has been saved in the model
    if config.extract_feature == 'feature':
        # Update the feature extractor using the student model(filename) in the last iteration.
        # Replace the filename with the saved student model, the following in an example of the checkpoint
        filename = 'save_model/svhn/knn_num_neighbor_800/800_stdnt_.checkpoint.pth.tar'
        train_feature = network.pred(train_img, filename, return_feature=True)
        test_feature = network.pred(test_img, filename, return_feature=True)
        return train_feature, test_feature
    train_img = [np.asarray(data) for data in train_img]
    test_img = [np.asarray(data) for data in test_img]

    if config.extract_feature == 'hog':
        # usually the file to save all hog is too large. we decompose it into 10 pieces.
        train_data = None
        each_length = int((9 + len(train_img)) / 10)
        for idx in range(10):
            #Save pkl into several small pieces, incase the size of private dataset is too large

            train_hog_path = os.path.join(
                config.hog_path, config.dataset + str(idx) + '_train_hog.pkl')
            if os.path.exists(train_hog_path) == False:
                p1 = idx * each_length
                p2 = min((idx + 1) * each_length, len(train_img))
                print('save_hog_pkl for interval{} : {}'.format(p1, p2))
                utils.save_hog(train_img[p1:p2], train_hog_path)

            with open(train_hog_path, 'rb') as f:
                if train_data is not None:
                    train_data = np.vstack((train_data, pickle.load(f)))
                else:
                    train_data = pickle.load(f)
            print('load hog feature shape', train_data.shape)
        test_hog_path = os.path.join(config.hog_path,
                                     config.dataset + '_test_hog.pkl')
        if os.path.exists(test_hog_path) == False:
            utils.save_hog(test_img, test_hog_path)
        with open(test_hog_path, 'rb') as f:
            test_data = pickle.load(f)

        return train_data, test_data
    if config.extract_feature == 'pca':
        return utils.pca(test_img, train_img)
Ejemplo n.º 6
0
def exercicio2():
    utils.print_header(2)
    x, y, labels = load_cnae9_reduzido(os.path.join(constants.DATA_DIR, constants.FILENAME_CNAE_DATABASE))

    def display_plot(_x, _labels, fname, is_1d=False):
        plt_axes = []
        colors = 'bgrcm'
        hist_bins = np.linspace(_x.min(), _x.max(), num=16)
        if is_1d:
            plt.hist(np.vstack([_x[np.where(y == label)[0], 0] for label in _labels]).T,
                     bins=hist_bins, linewidth=0, color=colors)
        for i, label in enumerate(_labels):
            x2 = _x[np.where(y == label)[0], 0]
            y2 = _x[np.where(y == label)[0], 1] if not is_1d else -1 * np.ones(np.where(y == label)[0].shape[0])
            plt_axes.append(
                plt.scatter(x2, y2, c=colors[i], lw=0)
            )
        plt.legend(tuple(plt_axes), list(_labels), loc='upper left', fontsize=10)
        fig_fname = os.path.join(constants.OUTPUT_DIR, fname)
        plt.savefig(fig_fname, bbox_inches='tight')
        plt.show()
        return fig_fname

    print('a) a plot will be displayed...')
    x_pca = utils.pca(x, n_components=2)
    plot_fname = display_plot(x_pca, labels, 'exercicio2-a.pdf')
    print('\tThis plot was saved: {}'.format(plot_fname))

    print('b) a plot will be displayed...')
    x_pca = utils.pca(x, n_components=2, whiten=True)
    plot_fname = display_plot(x_pca, labels, 'exercicio2-b.pdf')
    print('\tThis plot was saved: {}'.format(plot_fname))

    print('c) a plot will be displayed...')
    x_pca = utils.pca(x, n_components=1, whiten=True)
    plot_fname = display_plot(x_pca, labels, 'exercicio2-c.pdf', is_1d=True)
    print('\tThis plot was saved: {}'.format(plot_fname))
Ejemplo n.º 7
0
def generate_data(num_points, seed):
    scale = np.diag(np.sqrt(np.array([0.01, 0.1, 1][::-1])))
    rotate1 = R.from_rotvec(np.array([np.deg2rad(45.), 0, 0])).as_matrix()
    rotate2 = R.from_rotvec(np.array([0, np.deg2rad(45.), 0])).as_matrix()
    rotate3 = R.from_rotvec(np.array([0, 0, np.deg2rad(45.)])).as_matrix()
    chol = rotate3 @ rotate2 @ rotate1 @ scale

    cov = chol @ chol.T
    pca_w, _ = pca(cov)
    assert np.allclose(pca_w, np.array([1., 0.1, 0.01]))

    rs = np.random.RandomState(seed=seed)
    samples = rs.randn(3, num_points)
    data = (chol @ samples).T
    return data
Ejemplo n.º 8
0
    def transform(self, X):
        """
        Reduces the dimensionality of X with t-SNE according to gradient descent
        """
        print("Start transforming X...")
        begin = time()

        if self.random_state is not None:
            print("transforming X with random state: " +
                  str(self.random_state))
            np.random.seed(self.random_state)
        else:
            print("No random state specified...")

        if (self.initialization is "PCA"):
            print(
                "First reducing dimensions of X with PCA to %.2f dimensions" %
                (self.initial_dims))
            X, _ = pca(X, self.initial_dims)

        (n, d) = X.shape
        Y = np.random.randn(n,
                            self.d_components)  # initialize a random solution

        cond_P, _ = cond_probs(X, perplexity=self.perplexity)
        P = joint_average_P(cond_P)
        #np.savetxt('results/' + self.data_name + 'Probabilities'+self.grad_method + '.csv', P, delimiter=',' )

        print("Start gradient descent...")
        t0 = time()
        if self.grad_method == 'ADAM':
            Y, cost, grad_value = self.grad_descent_ADAM(X, Y, P)
        elif self.grad_method == 'gains':
            Y, cost, grad_value = self.grad_descent_gains(X, Y, P)
        elif self.grad_method == 'SGD':
            Y, cost, grad_value = self.grad_descent(X, Y, P)

        #np.savetxt('results/' + self.data_name + '/' +self.grad_method  + 'cost' + str(self.d_components) +'.csv', cost, delimiter=',' )
        #np.savetxt('results/' + self.data_name + '/'+ self.grad_method +  'Y' +str(self.d_components) +'.csv', Y, delimiter=',')

        print("Gradient descent took %.4f seconds" % (time() - t0))

        return Y, cost, grad_value
Ejemplo n.º 9
0
def generate_work_data(dataset, labels, colors, parameters, pca_enabled=False):
    X_img = np.load('./data/' + dataset + '.npy')
    y_img = np.load('./data/' + dataset + '_labels.npy')
    save_image(color_true_map(y_img, labels_colors=colors),
               dataset + "_labels")

    X = utils.flat(X_img)
    y = utils.flat(y_img)
    train_ratio, val_ratio = 0.1, 0.1
    test_ratio = 1 - (train_ratio + val_ratio)
    tv_mask, test_mask = utils.balanced_train_test_mask(
        y, np.isin(y, labels), test_ratio)
    train_mask, val_mask = utils.balanced_train_test_mask(
        y, tv_mask, val_ratio / (val_ratio + train_ratio))

    np.save("./data/" + dataset + "_train_mask.npy", train_mask)
    np.save("./data/" + dataset + "_val_mask.npy", val_mask)
    np.save("./data/" + dataset + "_test_mask.npy", test_mask)

    if pca_enabled:
        pca = utils.pca(X[tv_mask, :], 0.99)
        utils.save_model(pca, dataset + '_pca')
        train = pca.transform(X[train_mask, :])
        test = pca.transform(X[test_mask])
        flat = pca.transform(X)
    else:
        train = X[train_mask, :]
        test = X[test_mask, :]
        flat = X

    svc = utils.svc(train, y[train_mask], parameters["C"], parameters["gamma"])
    utils.save_model(svc, dataset + '_svc')
    test_pred = svc.predict(test)
    np.save("./data/" + dataset + "_test_pred.npy", test_pred)
    classification = svc.predict(flat).reshape(y_img.shape).astype(np.uint8)
    np.save("./data/" + dataset + "_clasification.npy", classification)
    save_image(color_true_map(classification, labels_colors=colors),
               dataset + "_clasification")

    score = utils.balanced_score(y[test_mask], test_pred)
    utils.save_json({"original": score}, dataset + "_original_score")
    print("Test Score:", score)
Ejemplo n.º 10
0
def generate_raw_image_pixels(list_of_demonstrations):
	"""
	PCA and t-SNE on raw image pixels
    """

	# Design matrix of raw image pixels
	X = None

	for demonstration in list_of_demonstrations:
		print "Raw image pixels ", demonstration
		PATH_TO_ANNOTATION = constants.PATH_TO_DATA + constants.ANNOTATIONS_FOLDER + demonstration + "_" + str(constants.CAMERA) + ".p"

		start, end = utils.get_start_end_annotations(PATH_TO_ANNOTATION)
		for frm in range(start, end + 1):
			if ((frm % 6) == 0):
				PATH_TO_IMAGE = utils.get_full_image_path(constants.PATH_TO_DATA + constants.NEW_FRAMES_FOLDER + demonstration + "_" + constants.CAMERA + "/", frm)
				print demonstration, str(frm)
				img = utils.reshape(cv2.imread(PATH_TO_IMAGE).flatten())
				X = utils.safe_concatenate(X, img)

	X_pca = utils.pca(X, PC = 2)
	X_tsne = utils.tsne(X)
	data_dimred = [X_pca, X_tsne]
	pickle.dump(X_tsne, open("raw_pixel_" + demonstration + "_dimred.p", "wb"))
Ejemplo n.º 11
0
def generate_SIFT():
	data = pickle.load(open("sift_features/SIFT_plane_9_1.p", "rb"))
	X_pca = utils.pca(data, PC = 2)
	X_tsne = utils.tsne(data)
	data_dimred = [X_pca, X_tsne]
	pickle.dump(data_dimred, open("SIFT_plane_9_dimred.p", "wb"))
Ejemplo n.º 12
0
    print('Time:', time() - t)

else:
    kernel = {'linear':linear, 'rbf':rbf, 'linearbf':linearbf}[KERNEL]
    print('Generating L ...')
    t = time()
    N = len(table)
    W = [[kernel(i, j, table) for j in range(N)] for i in range(N)]
    D = [[sum(W[i]) if i==j else 0 for j in range(N)] for i in range(N)]
    L = np.array(D) - np.array(W)
    print('Time:', time() - t)

    print('Calculating eigenvector ...')
    t = time()
    ''' use second method provided on the hangout for normalized cut '''
    w, v = LA.eig(L) if TYPE=='ratio' else LA.eig(np.dot(LA.inv(D), L))
    np.save(open('w.npy', 'wb'), w)
    np.save(open('v.npy', 'wb'), v)
    print('Time:', time() - t)

idx = np.argsort(w)
w = w[idx]
v = v[:,idx]
print('Eigenvalue:', w[:4])

U = np.array([v[:,u] for u in range(1, K+1)]).T

print('Kmean ...')
labels = kmean(U, K)
pca(X_train, labels, FILENAME)
Ejemplo n.º 13
0
    feat_imp.plot(kind='bar', title='Feature Importances')
    plt.ylabel('Feature Importance Score')
    
    

train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

y = train['y']
test_ids = test['ID']
train, test = utils.label_encode_categorical(train, test)
train = train.drop(['ID', 'y'], axis = 1)
test = test.drop('ID', axis = 1)

### PCA ###
df_pca, df_test_pca = utils.pca(train, test, 0.99)

### ICA ###
columns = ['ICA_{}'.format(i) for i in range(10)]
ica = FastICA(n_components=10, random_state = 42)
df_ica = pd.DataFrame(ica.fit_transform(train), columns = columns)
df_test_ica = pd.DataFrame(ica.transform(test), columns = columns)

train = pd.concat([train, df_pca, df_ica], axis = 1)
test = pd.concat([test, df_test_pca, df_test_ica], axis = 1)


predictors = [x for x in train.columns if x not in ['ID', 'y']]
xgb1 = XGBRegressor(learning_rate =0.1,
                     n_estimators=1000,
                     max_depth=5,
Ejemplo n.º 14
0
import numpy as np

from synthetic.data import generate_data, TRAIN_SEED, VALIDATION_SEED
from utils import pca

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_train_points', type=int, default=10000)
    parser.add_argument('--num_eval_points', type=int, default=10000)

    args = parser.parse_args()

    train_data = generate_data(num_points=args.num_train_points,
                               seed=TRAIN_SEED)
    eval_data = generate_data(num_points=args.num_eval_points,
                              seed=VALIDATION_SEED)

    estimated_cov = np.cov(train_data, rowvar=False)
    pca_w, pca_v = pca(estimated_cov)

    for keep in [3, 2, 1]:
        pca_v = pca_v[:, :keep]

        # Project and re-construct
        val_data_proj = eval_data @ pca_v
        val_data_reconstr = val_data_proj @ pca_v.T

        mse = np.mean((eval_data - val_data_reconstr) ** 2)
        print(f"mse ({keep}) {mse:.3f}")
Ejemplo n.º 15
0
    model.add(Dense(units=num_classes, kernel_initializer='glorot_uniform', bias_initializer='zeros',
                    activation='softmax'))
    return model

if __name__ == "__main__":
    num_train = 50000
    num_test = 10000
    featrue_preserve_radio = .95
    x_train, y_train, x_test, y_test = utils.load_data()
    x_train = x_train[0:num_train, :]
    y_train = y_train[0:num_train]
    x_test = x_test[0:num_test, :]
    y_test = y_test[0:num_test]
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train_pca, x_test_pca = utils.pca(x_train, x_test, featrue_preserve_radio)
    '''x_train_pca, x_test_pca = utils.pca_with_model(pca_model_name='pca_model.sav',
                                                   scaler_model_name='scaler_model.sav',
                                                   x_train=x_train, x_test=x_test)'''
    y_train = utils.convert_to_one_hot(y_train, 10)
    y_test = utils.convert_to_one_hot(y_test, 10)

    model = get_model(x_train_pca.shape[1:], 10)
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
    tic = time.time()
    history = model.fit(x=x_train_pca, y=y_train, epochs=20, batch_size=256, validation_data=(x_test_pca, y_test),
                        callbacks=[TensorBoard(log_dir='./logs')])
    toc = time.time()
    print("train time: " + str(1000 * (toc - tic)) + "ms")
    utils.plot_history(history)
train, test = utils.label_encode_categorical(train, test)

# Remove constant features
desc = train.describe()
feat_to_drop = [c for c in desc.columns if desc[c][2] == 0]
train.drop(feat_to_drop, axis=1, inplace=True)
test.drop(feat_to_drop, axis=1, inplace=True)

y = train['y']
test_ids = test['ID']
test.drop('ID', axis=1, inplace=True)
train.drop(['ID', 'y'], axis=1, inplace=True)

n_components = 12
# PCA
df_pca, df_test_pca = utils.pca(train, test, n_components)

# ICA
columns = ['ICA_{}'.format(i) for i in range(n_components)]
ica = FastICA(n_components=n_components,
              random_state=420,
              max_iter=10000,
              tol=0.001)
df_ica = pd.DataFrame(ica.fit_transform(train), columns=columns)
df_test_ica = pd.DataFrame(ica.transform(test), columns=columns)

# Truncated SVD
columns = ['TSVD_{}'.format(i) for i in range(n_components)]
tsvd = TruncatedSVD(n_components=n_components, random_state=420)
df_tsvd = pd.DataFrame(tsvd.fit_transform(train), columns=columns)
df_test_tsvd = pd.DataFrame(tsvd.transform(test), columns=columns)
Ejemplo n.º 17
0
    print('Time:', time() - t)

else:
    kernel = {'linear': linear, 'rbf': rbf, 'linearbf': linearbf}[KERNEL]
    print('Generating L ...')
    t = time()
    N = len(table)
    W = [[kernel(i, j, table) for j in range(N)] for i in range(N)]
    D = [[sum(W[i]) if i == j else 0 for j in range(N)] for i in range(N)]
    L = np.array(D) - np.array(W)
    print('Time:', time() - t)

    print('Calculating eigenvector ...')
    t = time()
    ''' use second method provided on the hangout for normalized cut '''
    w, v = LA.eig(L) if TYPE == 'ratio' else LA.eig(np.dot(LA.inv(D), L))
    np.save(open('w.npy', 'wb'), w)
    np.save(open('v.npy', 'wb'), v)
    print('Time:', time() - t)

idx = np.argsort(w)
w = w[idx]
v = v[:, idx]
print('Eigenvalue:', w[:4])

U = np.array([v[:, u] for u in range(1, K + 1)]).T

print('Kmean ...')
labels = kmean(U, K)
pca(X_train, labels, FILENAME)
Ejemplo n.º 18
0
def xgb_r2_score(preds, dtrain):
    labels = dtrain.get_label()
    return 'r2', r2_score(labels, preds)

df = pd.read_csv('data/train.csv')
df_test = pd.read_csv('data/test.csv')

y = df['y']
df = df.drop(['ID', 'y'], axis = 1)
test_ids = df_test['ID']
df_test = df_test.drop('ID', axis = 1)
df, df_test = utils.label_encode_categorical(df, df_test)


### PCA ###
df_pca, df_test_pca = utils.pca(df, df_test, 10)

### ICA ###
columns = ['ICA_{}'.format(i) for i in range(10)]
ica = FastICA(n_components=10, random_state = 42)
df_ica = pd.DataFrame(ica.fit_transform(df), columns = columns)
df_test_ica = pd.DataFrame(ica.transform(df_test), columns = columns)

### XGBOOST ###
y_mean = y.mean()
# prepare dict of params for xgboost to run with
xgb_params = { 
    'eta': 0.05,
    'max_depth': 4,
    'subsample': 0.9,
    'objective': 'reg:linear',
Ejemplo n.º 19
0
''' setting parameters according to the README '''
prob = svm_problem(train_labels, train_images)
param = svm_parameter('-q')
param_best = svm_parameter('-c 32 -g 0.0078125 -q')
param_linear = svm_parameter('-t 0 -q')
param_poly = svm_parameter('-t 1 -g 1 -q')
param_rbf  = svm_parameter('-g 0.0078125 -q')

model = svm_train(prob, param)


"""
''' precompute-kernel in generate by precompute-kernel.py '''
pre_train_labels, pre_train_images = svm_read_problem('../../../lab5/data/precompute-kernel-train')
pre_test_labels, pre_test_images = svm_read_problem('../../../lab5/data/precompute-kernel-test')

print('File loaded')
prob_pre = svm_problem(pre_train_labels, pre_train_images, isKernel=True)
param_pre = svm_parameter('-t 4')

model = svm_train(prob_pre, param_pre)
"""

''' get support vectors '''
n = model.get_sv_indices()
n = [i-1 for i in n]

''' draw support vectors and dots in 2D space with PCA '''
images, labels = preprocess(path='../../../lab5/data/')
pca(images, labels, special=n)
Ejemplo n.º 20
0
	return X_tsne_scaled, X_tsne_norm, X_tsne_pca, X_tsne_zca

if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("file_name", help = "Please specify MAIN file name")
	parser.add_argument("layer", help = "Please specify layer")
	parser.add_argument("PATH_TO_DATA", help="Please specify the path to the feature data")
	parser.add_argument("--a", help = "Annotated frames")
	parser.add_argument("--PATH_TO_DATA_2", help="Please specify the path to 2nd set of feature data")
	parser.add_argument("--a_2", help="Annotated frames for 2nd set of data")
	parser.add_argument("--image", help="Parse image mode", default = None)
	args = parser.parse_args()
	if args.a_2 and args.PATH_TO_DATA_2 and not args.image:
		X1, label_map_1, index_map_1 = parse_annotations_pickle(args.a, args.PATH_TO_DATA, args.layer)
		X2, label_map_2, index_map_2 = parse_annotations_pickle(args.a_2, args.PATH_TO_DATA_2, args.layer)
		X1_pca = utils.pca(X1)
		X2_pca = utils.pca(X2)
		plot_annotated_joint(X1_pca, X2_pca, label_map_1, index_map_1, label_map_2, index_map_2, figure_name = args.file_name +".png", title = "PCA " + args.layer)
	elif args.image and not args.PATH_TO_DATA_2:
			X, label_map, index_map  = utils.parse_annotations_images(args.a, args.PATH_TO_DATA)
			pickle.dump(X, open(args.file_name + "_allimages.p", "wb"))
			pickle.dump(label_map, open(args.file_name + "_labelmap.p", "wb"))
			pickle.dump(index_map, open(args.file_name + "_indexmap.p", "wb"))
			IPython.embed()
			X_pca = utils.pca(X)
			X_tsne = utils.tsne(X)
			X_tsne_pca = utils.tsne_pca(X)
			utils.plot_annotated_embedding(X_pca, label_map, index_map, args.file_name + '_' + args.layer + '_pca.png', 'PCA ' + args.layer)
			utils.plot_annotated_embedding(X_tsne, label_map, index_map, args.file_name + '_' + args.layer + '_tsne.png', 't-SNE ' + args.layer)
			utils.plot_annotated_embedding(X_tsne_pca, label_map, index_map, args.file_name + '_' + args.layer + '_tsne_pca.png', 't-SNE (PCA Input) ' + args.layer)
	else:
### BEST MODEL: RF n = 1000, max_depth = 5, no PCA


df = pd.read_csv('data/train.csv')
df_test = pd.read_csv('data/test.csv')

y = df['y']
df = df.drop(['ID', 'y'], axis = 1)
df, df_test = utils.label_encode_categorical(df, df_test)

test_ids = df_test['ID']
df_test = df_test.drop('ID', axis = 1)

### PROVA PCA ###
df_pca, df_test_pca = utils.pca(df, df_test, 0.99)
plt.scatter(df_pca['PCA_0'], df_pca['PCA_1'], s = 1)
plt.xlabel('PCA_0')
plt.ylabel('PCA_1')
plt.show()
plt.scatter(df_pca['PCA_0'], y, s = 1)
plt.xlabel('PCA_0')
plt.ylabel('y')
plt.show()
plt.scatter(df_pca['PCA_1'], y, s = 1)
plt.xlabel('PCA_1')
plt.ylabel('y')
plt.show()

### PROVA RANDOM FOREST ###
rf = RandomForestRegressor(n_estimators=2000, n_jobs=-1, max_depth=3)