def InitialAlignment(self, scale=0.15): """ Compute SVD and align object to be in a certain coordinate frame. Usage: model.InitialAlignment(scale) Input: scale - Desired scale for object. Scale is defined as the length along the leading eigenvector, in meters. """ pts3D = self.pts3D # Compute eigenvecs and rotate according to them pc, evals, mean = utils.pca(pts3D, remove_mean=True) pts3D_rot = np.dot(pc.T, pts3D) # Find length according to max eigenvector mins = np.min(pts3D_rot, axis=1) maxs = np.max(pts3D_rot, axis=1) max_length = maxs[0] - mins[0] # Rotation matrix is the covariance matrix, but we want Z as the leading # eigenvector: rot = np.c_[-pc[2], pc[1], pc[0]] # Transform model to have zero mean, reasonable scale and rotation. self.transform(rot, np.dot(rot, -mean), float(scale) / max_length)
def InitialAlignment(self, scale = 0.15): """ Compute SVD and align object to be in a certain coordinate frame. Usage: model.InitialAlignment(scale) Input: scale - Desired scale for object. Scale is defined as the length along the leading eigenvector, in meters. """ pts3D = self.pts3D # Compute eigenvecs and rotate according to them pc, evals, mean = utils.pca(pts3D, remove_mean = True) pts3D_rot = np.dot(pc.T, pts3D) # Find length according to max eigenvector mins = np.min(pts3D_rot, axis=1) maxs = np.max(pts3D_rot, axis=1) max_length = maxs[0] - mins[0] # Rotation matrix is the covariance matrix, but we want Z as the leading # eigenvector: rot = np.c_[-pc[2], pc[1], pc[0]] # Transform model to have zero mean, reasonable scale and rotation. self.transform(rot, np.dot(rot, -mean), float(scale) / max_length)
def exercicio1(): utils.print_header(1) x, y, labels = load_iris(os.path.join(constants.DATA_DIR, constants.FILENAME_IRIS_DATABASE)) a, d = x.shape # N samples, d attributes print('a)') for i in range(d): print('\tAttribute {}: Mean={:.3f}, Variance={:.3f}'.format(i, utils.mean(x[:, i]), utils.variance(x[:, i]))) print('b)') for i in range(labels.shape[0]): print('\tClass {}: {}'.format(i, labels[i])) for j in range(d): print('\t\tAttribute {}: Mean={:.3f}, Variance={:.3f}'.format( j, utils.mean(x[(y == i)[:, 0], j]), utils.variance(x[(y == i)[:, 0], j])) ) print('c)') print('\tThe histograms will be displayed') f, ax = plt.subplots(1, d, sharex=False, sharey=True) for j in range(d): # show title only in the top ax[j].set_title('Attribute {}'.format(j)) hist_bins = np.linspace(x[:, j].min(), x[:, j].max(), num=16) ax[j].hist(np.vstack([ x[(y == i)[:, 0], j] for i in range(labels.shape[0]) ]).T, bins=hist_bins, linewidth=0, color=['r', 'b', 'g']) plot_fname = os.path.join(constants.OUTPUT_DIR, 'exercicio1-c.pdf') plt.legend(labels, loc='upper center', bbox_to_anchor=(0.5, 0.07), ncol=3, bbox_transform=plt.gcf().transFigure) plt.tight_layout() plt.subplots_adjust(bottom=0.15) f.set_figheight(3) f.set_figwidth(8) plt.savefig(plot_fname, bbox_inches='tight') plt.show() print('\tThis plot was saved: {}'.format(plot_fname)) print('d)') print('\tA plot will be displayed...') x_pca = utils.pca(x, n_components=2) # format the plot to mimic Slide 21 of Aula 3 x_pca[:, 1] *= -1 a = plt.scatter(x_pca[np.where(y == 0)[0], 1], x_pca[np.where(y == 0)[0], 0], c='r', marker='^', lw=0, s=100) b = plt.scatter(x_pca[np.where(y == 1)[0], 1], x_pca[np.where(y == 1)[0], 0], c='b', marker='o', lw=0, s=100) c = plt.scatter(x_pca[np.where(y == 2)[0], 1], x_pca[np.where(y == 2)[0], 0], c='g', marker='s', lw=0, s=100) plt.xlim([-1.5, 1.5]) plt.ylim([-4, 4]) plt.legend((a, b, c), tuple(labels), loc='upper left', fontsize=10) plot_fname = os.path.join(constants.OUTPUT_DIR, 'exercicio1-d.pdf') plt.savefig(plot_fname, bbox_inches='tight') plt.show() print('\tThis plot was saved: {}'.format(plot_fname))
def extract_feature(train_img, test_img, path=None): """ This help to compute feature for knn from pretrained network :param FLAGS: :param ckpt_path: :return: """ # check if a certain variable has been saved in the model if config.extract_feature == 'feature': dir_path = os.path.join(config.save_model, config.dataset) dir_path = os.path.join(dir_path, 'knn_num_neighbor_' + str(config.nb_teachers)) filename = str(config.nb_teachers) + '_stdnt_resnet.checkpoint.pth.tar' filename = os.path.join(dir_path, filename) train_feature = network.pred(train_img, filename, return_feature=True) test_feature = network.pred(test_img, filename, return_feature=True) print('shape of extract feature', train_feature.shape) return train_feature, test_feature #return utils.pca(test_feature, train_feature) if config.extract_feature == 'hog': # usually the file to save all hog is too large. we decompose it into 10 pieces. train_data = None each_length = int((9 + len(train_img)) / 10) for idx in range(10): #to save pkl into several small pieces train_hog_path = os.path.join( config.hog_path, config.dataset + str(idx) + '_train_hog.pkl') if os.path.exists(train_hog_path) == False: p1 = idx * each_length p2 = min((idx + 1) * each_length, len(train_img)) print('save_hog_pkl for interval{} : {}'.format(p1, p2)) utils.save_hog(train_img[p1:p2], train_hog_path) with open(train_hog_path, 'rb') as f: if train_data is not None: train_data = np.vstack((train_data, pickle.load(f))) else: train_data = pickle.load(f) print('load hog feature shape', train_data.shape) test_hog_path = os.path.join(config.hog_path, config.dataset + '_test_hog.pkl') if os.path.exists(test_hog_path) == False: utils.save_hog(test_img, test_hog_path) with open(test_hog_path, 'rb') as f: test_data = pickle.load(f) return train_data, test_data if config.extract_feature == 'pca': return utils.pca(test_img, train_img)
def extract_feature(train_img, test_img, path=None): """ This help to compute feature for knn from pretrained network :param FLAGS: :param ckpt_path: :return: """ # check if a certain variable has been saved in the model if config.extract_feature == 'feature': # Update the feature extractor using the student model(filename) in the last iteration. # Replace the filename with the saved student model, the following in an example of the checkpoint filename = 'save_model/svhn/knn_num_neighbor_800/800_stdnt_.checkpoint.pth.tar' train_feature = network.pred(train_img, filename, return_feature=True) test_feature = network.pred(test_img, filename, return_feature=True) return train_feature, test_feature train_img = [np.asarray(data) for data in train_img] test_img = [np.asarray(data) for data in test_img] if config.extract_feature == 'hog': # usually the file to save all hog is too large. we decompose it into 10 pieces. train_data = None each_length = int((9 + len(train_img)) / 10) for idx in range(10): #Save pkl into several small pieces, incase the size of private dataset is too large train_hog_path = os.path.join( config.hog_path, config.dataset + str(idx) + '_train_hog.pkl') if os.path.exists(train_hog_path) == False: p1 = idx * each_length p2 = min((idx + 1) * each_length, len(train_img)) print('save_hog_pkl for interval{} : {}'.format(p1, p2)) utils.save_hog(train_img[p1:p2], train_hog_path) with open(train_hog_path, 'rb') as f: if train_data is not None: train_data = np.vstack((train_data, pickle.load(f))) else: train_data = pickle.load(f) print('load hog feature shape', train_data.shape) test_hog_path = os.path.join(config.hog_path, config.dataset + '_test_hog.pkl') if os.path.exists(test_hog_path) == False: utils.save_hog(test_img, test_hog_path) with open(test_hog_path, 'rb') as f: test_data = pickle.load(f) return train_data, test_data if config.extract_feature == 'pca': return utils.pca(test_img, train_img)
def exercicio2(): utils.print_header(2) x, y, labels = load_cnae9_reduzido(os.path.join(constants.DATA_DIR, constants.FILENAME_CNAE_DATABASE)) def display_plot(_x, _labels, fname, is_1d=False): plt_axes = [] colors = 'bgrcm' hist_bins = np.linspace(_x.min(), _x.max(), num=16) if is_1d: plt.hist(np.vstack([_x[np.where(y == label)[0], 0] for label in _labels]).T, bins=hist_bins, linewidth=0, color=colors) for i, label in enumerate(_labels): x2 = _x[np.where(y == label)[0], 0] y2 = _x[np.where(y == label)[0], 1] if not is_1d else -1 * np.ones(np.where(y == label)[0].shape[0]) plt_axes.append( plt.scatter(x2, y2, c=colors[i], lw=0) ) plt.legend(tuple(plt_axes), list(_labels), loc='upper left', fontsize=10) fig_fname = os.path.join(constants.OUTPUT_DIR, fname) plt.savefig(fig_fname, bbox_inches='tight') plt.show() return fig_fname print('a) a plot will be displayed...') x_pca = utils.pca(x, n_components=2) plot_fname = display_plot(x_pca, labels, 'exercicio2-a.pdf') print('\tThis plot was saved: {}'.format(plot_fname)) print('b) a plot will be displayed...') x_pca = utils.pca(x, n_components=2, whiten=True) plot_fname = display_plot(x_pca, labels, 'exercicio2-b.pdf') print('\tThis plot was saved: {}'.format(plot_fname)) print('c) a plot will be displayed...') x_pca = utils.pca(x, n_components=1, whiten=True) plot_fname = display_plot(x_pca, labels, 'exercicio2-c.pdf', is_1d=True) print('\tThis plot was saved: {}'.format(plot_fname))
def generate_data(num_points, seed): scale = np.diag(np.sqrt(np.array([0.01, 0.1, 1][::-1]))) rotate1 = R.from_rotvec(np.array([np.deg2rad(45.), 0, 0])).as_matrix() rotate2 = R.from_rotvec(np.array([0, np.deg2rad(45.), 0])).as_matrix() rotate3 = R.from_rotvec(np.array([0, 0, np.deg2rad(45.)])).as_matrix() chol = rotate3 @ rotate2 @ rotate1 @ scale cov = chol @ chol.T pca_w, _ = pca(cov) assert np.allclose(pca_w, np.array([1., 0.1, 0.01])) rs = np.random.RandomState(seed=seed) samples = rs.randn(3, num_points) data = (chol @ samples).T return data
def transform(self, X): """ Reduces the dimensionality of X with t-SNE according to gradient descent """ print("Start transforming X...") begin = time() if self.random_state is not None: print("transforming X with random state: " + str(self.random_state)) np.random.seed(self.random_state) else: print("No random state specified...") if (self.initialization is "PCA"): print( "First reducing dimensions of X with PCA to %.2f dimensions" % (self.initial_dims)) X, _ = pca(X, self.initial_dims) (n, d) = X.shape Y = np.random.randn(n, self.d_components) # initialize a random solution cond_P, _ = cond_probs(X, perplexity=self.perplexity) P = joint_average_P(cond_P) #np.savetxt('results/' + self.data_name + 'Probabilities'+self.grad_method + '.csv', P, delimiter=',' ) print("Start gradient descent...") t0 = time() if self.grad_method == 'ADAM': Y, cost, grad_value = self.grad_descent_ADAM(X, Y, P) elif self.grad_method == 'gains': Y, cost, grad_value = self.grad_descent_gains(X, Y, P) elif self.grad_method == 'SGD': Y, cost, grad_value = self.grad_descent(X, Y, P) #np.savetxt('results/' + self.data_name + '/' +self.grad_method + 'cost' + str(self.d_components) +'.csv', cost, delimiter=',' ) #np.savetxt('results/' + self.data_name + '/'+ self.grad_method + 'Y' +str(self.d_components) +'.csv', Y, delimiter=',') print("Gradient descent took %.4f seconds" % (time() - t0)) return Y, cost, grad_value
def generate_work_data(dataset, labels, colors, parameters, pca_enabled=False): X_img = np.load('./data/' + dataset + '.npy') y_img = np.load('./data/' + dataset + '_labels.npy') save_image(color_true_map(y_img, labels_colors=colors), dataset + "_labels") X = utils.flat(X_img) y = utils.flat(y_img) train_ratio, val_ratio = 0.1, 0.1 test_ratio = 1 - (train_ratio + val_ratio) tv_mask, test_mask = utils.balanced_train_test_mask( y, np.isin(y, labels), test_ratio) train_mask, val_mask = utils.balanced_train_test_mask( y, tv_mask, val_ratio / (val_ratio + train_ratio)) np.save("./data/" + dataset + "_train_mask.npy", train_mask) np.save("./data/" + dataset + "_val_mask.npy", val_mask) np.save("./data/" + dataset + "_test_mask.npy", test_mask) if pca_enabled: pca = utils.pca(X[tv_mask, :], 0.99) utils.save_model(pca, dataset + '_pca') train = pca.transform(X[train_mask, :]) test = pca.transform(X[test_mask]) flat = pca.transform(X) else: train = X[train_mask, :] test = X[test_mask, :] flat = X svc = utils.svc(train, y[train_mask], parameters["C"], parameters["gamma"]) utils.save_model(svc, dataset + '_svc') test_pred = svc.predict(test) np.save("./data/" + dataset + "_test_pred.npy", test_pred) classification = svc.predict(flat).reshape(y_img.shape).astype(np.uint8) np.save("./data/" + dataset + "_clasification.npy", classification) save_image(color_true_map(classification, labels_colors=colors), dataset + "_clasification") score = utils.balanced_score(y[test_mask], test_pred) utils.save_json({"original": score}, dataset + "_original_score") print("Test Score:", score)
def generate_raw_image_pixels(list_of_demonstrations): """ PCA and t-SNE on raw image pixels """ # Design matrix of raw image pixels X = None for demonstration in list_of_demonstrations: print "Raw image pixels ", demonstration PATH_TO_ANNOTATION = constants.PATH_TO_DATA + constants.ANNOTATIONS_FOLDER + demonstration + "_" + str(constants.CAMERA) + ".p" start, end = utils.get_start_end_annotations(PATH_TO_ANNOTATION) for frm in range(start, end + 1): if ((frm % 6) == 0): PATH_TO_IMAGE = utils.get_full_image_path(constants.PATH_TO_DATA + constants.NEW_FRAMES_FOLDER + demonstration + "_" + constants.CAMERA + "/", frm) print demonstration, str(frm) img = utils.reshape(cv2.imread(PATH_TO_IMAGE).flatten()) X = utils.safe_concatenate(X, img) X_pca = utils.pca(X, PC = 2) X_tsne = utils.tsne(X) data_dimred = [X_pca, X_tsne] pickle.dump(X_tsne, open("raw_pixel_" + demonstration + "_dimred.p", "wb"))
def generate_SIFT(): data = pickle.load(open("sift_features/SIFT_plane_9_1.p", "rb")) X_pca = utils.pca(data, PC = 2) X_tsne = utils.tsne(data) data_dimred = [X_pca, X_tsne] pickle.dump(data_dimred, open("SIFT_plane_9_dimred.p", "wb"))
print('Time:', time() - t) else: kernel = {'linear':linear, 'rbf':rbf, 'linearbf':linearbf}[KERNEL] print('Generating L ...') t = time() N = len(table) W = [[kernel(i, j, table) for j in range(N)] for i in range(N)] D = [[sum(W[i]) if i==j else 0 for j in range(N)] for i in range(N)] L = np.array(D) - np.array(W) print('Time:', time() - t) print('Calculating eigenvector ...') t = time() ''' use second method provided on the hangout for normalized cut ''' w, v = LA.eig(L) if TYPE=='ratio' else LA.eig(np.dot(LA.inv(D), L)) np.save(open('w.npy', 'wb'), w) np.save(open('v.npy', 'wb'), v) print('Time:', time() - t) idx = np.argsort(w) w = w[idx] v = v[:,idx] print('Eigenvalue:', w[:4]) U = np.array([v[:,u] for u in range(1, K+1)]).T print('Kmean ...') labels = kmean(U, K) pca(X_train, labels, FILENAME)
feat_imp.plot(kind='bar', title='Feature Importances') plt.ylabel('Feature Importance Score') train = pd.read_csv('data/train.csv') test = pd.read_csv('data/test.csv') y = train['y'] test_ids = test['ID'] train, test = utils.label_encode_categorical(train, test) train = train.drop(['ID', 'y'], axis = 1) test = test.drop('ID', axis = 1) ### PCA ### df_pca, df_test_pca = utils.pca(train, test, 0.99) ### ICA ### columns = ['ICA_{}'.format(i) for i in range(10)] ica = FastICA(n_components=10, random_state = 42) df_ica = pd.DataFrame(ica.fit_transform(train), columns = columns) df_test_ica = pd.DataFrame(ica.transform(test), columns = columns) train = pd.concat([train, df_pca, df_ica], axis = 1) test = pd.concat([test, df_test_pca, df_test_ica], axis = 1) predictors = [x for x in train.columns if x not in ['ID', 'y']] xgb1 = XGBRegressor(learning_rate =0.1, n_estimators=1000, max_depth=5,
import numpy as np from synthetic.data import generate_data, TRAIN_SEED, VALIDATION_SEED from utils import pca if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_train_points', type=int, default=10000) parser.add_argument('--num_eval_points', type=int, default=10000) args = parser.parse_args() train_data = generate_data(num_points=args.num_train_points, seed=TRAIN_SEED) eval_data = generate_data(num_points=args.num_eval_points, seed=VALIDATION_SEED) estimated_cov = np.cov(train_data, rowvar=False) pca_w, pca_v = pca(estimated_cov) for keep in [3, 2, 1]: pca_v = pca_v[:, :keep] # Project and re-construct val_data_proj = eval_data @ pca_v val_data_reconstr = val_data_proj @ pca_v.T mse = np.mean((eval_data - val_data_reconstr) ** 2) print(f"mse ({keep}) {mse:.3f}")
model.add(Dense(units=num_classes, kernel_initializer='glorot_uniform', bias_initializer='zeros', activation='softmax')) return model if __name__ == "__main__": num_train = 50000 num_test = 10000 featrue_preserve_radio = .95 x_train, y_train, x_test, y_test = utils.load_data() x_train = x_train[0:num_train, :] y_train = y_train[0:num_train] x_test = x_test[0:num_test, :] y_test = y_test[0:num_test] x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train_pca, x_test_pca = utils.pca(x_train, x_test, featrue_preserve_radio) '''x_train_pca, x_test_pca = utils.pca_with_model(pca_model_name='pca_model.sav', scaler_model_name='scaler_model.sav', x_train=x_train, x_test=x_test)''' y_train = utils.convert_to_one_hot(y_train, 10) y_test = utils.convert_to_one_hot(y_test, 10) model = get_model(x_train_pca.shape[1:], 10) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) tic = time.time() history = model.fit(x=x_train_pca, y=y_train, epochs=20, batch_size=256, validation_data=(x_test_pca, y_test), callbacks=[TensorBoard(log_dir='./logs')]) toc = time.time() print("train time: " + str(1000 * (toc - tic)) + "ms") utils.plot_history(history)
train, test = utils.label_encode_categorical(train, test) # Remove constant features desc = train.describe() feat_to_drop = [c for c in desc.columns if desc[c][2] == 0] train.drop(feat_to_drop, axis=1, inplace=True) test.drop(feat_to_drop, axis=1, inplace=True) y = train['y'] test_ids = test['ID'] test.drop('ID', axis=1, inplace=True) train.drop(['ID', 'y'], axis=1, inplace=True) n_components = 12 # PCA df_pca, df_test_pca = utils.pca(train, test, n_components) # ICA columns = ['ICA_{}'.format(i) for i in range(n_components)] ica = FastICA(n_components=n_components, random_state=420, max_iter=10000, tol=0.001) df_ica = pd.DataFrame(ica.fit_transform(train), columns=columns) df_test_ica = pd.DataFrame(ica.transform(test), columns=columns) # Truncated SVD columns = ['TSVD_{}'.format(i) for i in range(n_components)] tsvd = TruncatedSVD(n_components=n_components, random_state=420) df_tsvd = pd.DataFrame(tsvd.fit_transform(train), columns=columns) df_test_tsvd = pd.DataFrame(tsvd.transform(test), columns=columns)
print('Time:', time() - t) else: kernel = {'linear': linear, 'rbf': rbf, 'linearbf': linearbf}[KERNEL] print('Generating L ...') t = time() N = len(table) W = [[kernel(i, j, table) for j in range(N)] for i in range(N)] D = [[sum(W[i]) if i == j else 0 for j in range(N)] for i in range(N)] L = np.array(D) - np.array(W) print('Time:', time() - t) print('Calculating eigenvector ...') t = time() ''' use second method provided on the hangout for normalized cut ''' w, v = LA.eig(L) if TYPE == 'ratio' else LA.eig(np.dot(LA.inv(D), L)) np.save(open('w.npy', 'wb'), w) np.save(open('v.npy', 'wb'), v) print('Time:', time() - t) idx = np.argsort(w) w = w[idx] v = v[:, idx] print('Eigenvalue:', w[:4]) U = np.array([v[:, u] for u in range(1, K + 1)]).T print('Kmean ...') labels = kmean(U, K) pca(X_train, labels, FILENAME)
def xgb_r2_score(preds, dtrain): labels = dtrain.get_label() return 'r2', r2_score(labels, preds) df = pd.read_csv('data/train.csv') df_test = pd.read_csv('data/test.csv') y = df['y'] df = df.drop(['ID', 'y'], axis = 1) test_ids = df_test['ID'] df_test = df_test.drop('ID', axis = 1) df, df_test = utils.label_encode_categorical(df, df_test) ### PCA ### df_pca, df_test_pca = utils.pca(df, df_test, 10) ### ICA ### columns = ['ICA_{}'.format(i) for i in range(10)] ica = FastICA(n_components=10, random_state = 42) df_ica = pd.DataFrame(ica.fit_transform(df), columns = columns) df_test_ica = pd.DataFrame(ica.transform(df_test), columns = columns) ### XGBOOST ### y_mean = y.mean() # prepare dict of params for xgboost to run with xgb_params = { 'eta': 0.05, 'max_depth': 4, 'subsample': 0.9, 'objective': 'reg:linear',
''' setting parameters according to the README ''' prob = svm_problem(train_labels, train_images) param = svm_parameter('-q') param_best = svm_parameter('-c 32 -g 0.0078125 -q') param_linear = svm_parameter('-t 0 -q') param_poly = svm_parameter('-t 1 -g 1 -q') param_rbf = svm_parameter('-g 0.0078125 -q') model = svm_train(prob, param) """ ''' precompute-kernel in generate by precompute-kernel.py ''' pre_train_labels, pre_train_images = svm_read_problem('../../../lab5/data/precompute-kernel-train') pre_test_labels, pre_test_images = svm_read_problem('../../../lab5/data/precompute-kernel-test') print('File loaded') prob_pre = svm_problem(pre_train_labels, pre_train_images, isKernel=True) param_pre = svm_parameter('-t 4') model = svm_train(prob_pre, param_pre) """ ''' get support vectors ''' n = model.get_sv_indices() n = [i-1 for i in n] ''' draw support vectors and dots in 2D space with PCA ''' images, labels = preprocess(path='../../../lab5/data/') pca(images, labels, special=n)
return X_tsne_scaled, X_tsne_norm, X_tsne_pca, X_tsne_zca if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("file_name", help = "Please specify MAIN file name") parser.add_argument("layer", help = "Please specify layer") parser.add_argument("PATH_TO_DATA", help="Please specify the path to the feature data") parser.add_argument("--a", help = "Annotated frames") parser.add_argument("--PATH_TO_DATA_2", help="Please specify the path to 2nd set of feature data") parser.add_argument("--a_2", help="Annotated frames for 2nd set of data") parser.add_argument("--image", help="Parse image mode", default = None) args = parser.parse_args() if args.a_2 and args.PATH_TO_DATA_2 and not args.image: X1, label_map_1, index_map_1 = parse_annotations_pickle(args.a, args.PATH_TO_DATA, args.layer) X2, label_map_2, index_map_2 = parse_annotations_pickle(args.a_2, args.PATH_TO_DATA_2, args.layer) X1_pca = utils.pca(X1) X2_pca = utils.pca(X2) plot_annotated_joint(X1_pca, X2_pca, label_map_1, index_map_1, label_map_2, index_map_2, figure_name = args.file_name +".png", title = "PCA " + args.layer) elif args.image and not args.PATH_TO_DATA_2: X, label_map, index_map = utils.parse_annotations_images(args.a, args.PATH_TO_DATA) pickle.dump(X, open(args.file_name + "_allimages.p", "wb")) pickle.dump(label_map, open(args.file_name + "_labelmap.p", "wb")) pickle.dump(index_map, open(args.file_name + "_indexmap.p", "wb")) IPython.embed() X_pca = utils.pca(X) X_tsne = utils.tsne(X) X_tsne_pca = utils.tsne_pca(X) utils.plot_annotated_embedding(X_pca, label_map, index_map, args.file_name + '_' + args.layer + '_pca.png', 'PCA ' + args.layer) utils.plot_annotated_embedding(X_tsne, label_map, index_map, args.file_name + '_' + args.layer + '_tsne.png', 't-SNE ' + args.layer) utils.plot_annotated_embedding(X_tsne_pca, label_map, index_map, args.file_name + '_' + args.layer + '_tsne_pca.png', 't-SNE (PCA Input) ' + args.layer) else:
### BEST MODEL: RF n = 1000, max_depth = 5, no PCA df = pd.read_csv('data/train.csv') df_test = pd.read_csv('data/test.csv') y = df['y'] df = df.drop(['ID', 'y'], axis = 1) df, df_test = utils.label_encode_categorical(df, df_test) test_ids = df_test['ID'] df_test = df_test.drop('ID', axis = 1) ### PROVA PCA ### df_pca, df_test_pca = utils.pca(df, df_test, 0.99) plt.scatter(df_pca['PCA_0'], df_pca['PCA_1'], s = 1) plt.xlabel('PCA_0') plt.ylabel('PCA_1') plt.show() plt.scatter(df_pca['PCA_0'], y, s = 1) plt.xlabel('PCA_0') plt.ylabel('y') plt.show() plt.scatter(df_pca['PCA_1'], y, s = 1) plt.xlabel('PCA_1') plt.ylabel('y') plt.show() ### PROVA RANDOM FOREST ### rf = RandomForestRegressor(n_estimators=2000, n_jobs=-1, max_depth=3)