def backward(self, X, **params): try: self.lossy = params["lossy"] except KeyError: pass try: self.n_components = params["n_components"] except KeyError: pass pca = PCA(n_components=self.n_components) tiles = [] for x in X: component, cov, mean = x component0, component1, component2 = dcps_array_3d(component) cov0, cov1, cov2 = dcps_array_3d(cov) mean0, mean1, mean2 = mean[0], mean[1], mean[2] pca.components_ = cov0[:self.n_components, :self.n_components] pca.mean_ = mean0[:self.n_components] channel0 = pca.inverse_transform(component0[:, :self.n_components]) pca.components_ = cov2[:self.n_components, :self.n_components] pca.mean_ = mean1[:self.n_components] channel1 = pca.inverse_transform(component1[:, :self.n_components]) pca.components_ = cov2[:self.n_components, :self.n_components] pca.mean_ = mean2[:self.n_components] channel2 = pca.inverse_transform(component2[:, :self.n_components]) tiles.append(cat_arrays_2d([channel0, channel1, channel2])) return tiles
def naive_bayes(images, size, labels, color_labels): mean = numpy.mean(images) std = numpy.std(images) scaled = preprocessing.scale(images, with_mean=True, with_std=True) pca = PCA(4) pca.fit(scaled) one_two = pca.components_[:2] three_four = pca.components_[2:4] lab = numpy.copy(labels) for i, elem in enumerate(labels): if elem == 'dog': labels[i] = 0 if elem == 'house': labels[i] = 1 if elem == 'guitar': labels[i] = 2 if elem == 'person': labels[i] = 3 X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( scaled, labels, train_size=0.7, random_state=1) model = nb.GaussianNB() model.fit(X_train, y_train) score_original = model.score(X_test, y_test) print(f"Score with without pca decomposition is: {score_original}") pca.components_ = one_two X_train_transf = pca.transform(X_train) X_test_transf = pca.transform(X_test) model.fit(X_train_transf, y_train) score_first = model.score(X_test_transf, y_test) print(f"Score with first two pcs is: {score_first}") title = "Decision boundaries for NB with first two pca components" plot_boundaries(X_train_transf, y_train, X_test_transf, y_test, model, title, lab) pca.components_ = three_four X_train_transf = pca.transform(X_train) X_test_transf = pca.transform(X_test) model.fit(X_train_transf, y_train) score_second = model.score(X_test_transf, y_test) print(f"Score with third and fourth pcs is: {score_second}") title = "Decision boundaries for NB with 3rd and 4th pca components" plot_boundaries(X_train_transf, y_train, X_test_transf, y_test, model, title, lab) return
def showImageWithPcaLast(n_pca, input_matrix, nImg, show): #PCA last n computation pca_last_n = PCA() scaled = scaler.fit_transform(input_matrix) pca_last_n = pca_last_n.fit(scaled) components = pca_last_n.components_[-n_pca:] pca_last_n.components_ = components x_last_n = pca_last_n.transform(scaled) x_inv_last_n = pca_last_n.inverse_transform(x_last_n) x_inv_last_n = scaler.inverse_transform(x_inv_last_n) #print variance ratio val = np.sum(pca_last_n.explained_variance_ratio_) print("Variance converage for the last " + str(n_pca) + ": " + str(val)) #variance covered with this pca if (show): fig_last_n = plt.figure() fig_last_n.add_subplot(1, 2, 1) plt.imshow(np.reshape(input_matrix[nImg, :] / 255.0, (227, 227, 3))) fig_last_n.add_subplot(1, 2, 2) plt.imshow(np.reshape(x_inv_last_n[nImg, :] / 255.0, (227, 227, 3))) plt.show() #plt.clf() else: return x_inv_last_n
def test_init(self, df_norm, n_components): from flotilla.compute.decomposition import DataFramePCA test_pca = DataFramePCA(df_norm, n_components=n_components) true_pca = PCA(n_components=n_components) true_pca.fit(df_norm.values) pc_names = ['pc_{}'.format(i + 1) for i in range(true_pca.components_.shape[0])] true_pca.components_ = pd.DataFrame(true_pca.components_, index=pc_names, columns=df_norm.columns) true_pca.explained_variance_ = pd.Series( true_pca.explained_variance_, index=pc_names) true_pca.explained_variance_ratio_ = pd.Series( true_pca.explained_variance_ratio_, index=pc_names) true_pca.reduced_space = true_pca.transform(df_norm.values) true_pca.reduced_space = pd.DataFrame(true_pca.reduced_space, index=df_norm.index, columns=pc_names) npt.assert_array_equal(test_pca.X, df_norm.values) pdt.assert_frame_equal(test_pca.components_, true_pca.components_) pdt.assert_series_equal(test_pca.explained_variance_, true_pca.explained_variance_) pdt.assert_series_equal(test_pca.explained_variance_ratio_, true_pca.explained_variance_ratio_) pdt.assert_frame_equal(test_pca.reduced_space, true_pca.reduced_space)
def load(self, filename='pca.nc'): """ Read sklearn PCA parameters from a netcdf file """ infile = netCDF4.Dataset(filename, 'r') self.locations = [json.loads(string) for string in list(infile.variables['location'])] self.pcas = [] id = 0 for location in self.locations: n_components = infile.variables['n_components'][id] components = infile.variables['components'][id] mean = infile.variables['means'][id] explained_variance_ratio = infile.variables['explained_variance_ratio'][id] noise_variance = infile.variables['noise_variance'][id] pca = PCA(n_components=n_components) pca.components_ = components pca.mean_ = mean pca.explained_variance_ratio_ = explained_variance_ratio pca.noise_variance_ = noise_variance self.pcas.append(pca) id += 1
def principalComponent(X, n_components, isPrint=False, isShow=False, n_show=None): pca = PCA(n_components=n_components) pca.fit(X) X_pca = pca.transform(X) if isPrint: print("Форма исходного массива: {}".format(str(X.shape))) print("Форма массива после сокращения размерности: {}".format( str(X_pca.shape))) print("форма главных компонент: {}".format(pca.components_.shape)) print("компоненты PCA:\n{}".format(pca.components_)) if isShow: if n_show is not None: pca.components_ = pca.components_[0:n_show[0], 0:n_show[1]] plt.matshow(pca.components_, cmap='seismic') # plt.yticks([0, 1], ["Первая компонента", "Вторая компонента"]) plt.colorbar() # plt.xticks(range(len(X.columns)), # X.columns, rotation=85, ha='left') plt.xlabel("Характеристика") plt.ylabel("Главные компоненты") plt.show() return X_pca
def E(A, B, n_components=20): pca_A = PCA(n_components) pca_A.fit(np.array(A)) pca_B = PCA(n_components) pca_B.fit(np.array(B)) #This approach tries to use the eigenvector and eiganvalues from B pca_A.components_ = pca_B.components_ pca_A.explained_variance_ = pca_B.explained_variance_ transformed = pca_A.transform(A) inverse_transformed = pca_A.inverse_transform(transformed) #This approach tries to use pca_B with mean_A # pca_B.mean_=pca_A.mean_ # transformed = pca_B.transform(A) # inverse_transformed = pca_B.inverse_transform(transformed) error_vector = np.array(A) - np.array(inverse_transformed) N = len(error_vector) squares = [] for i in range(0, len(error_vector)): squares.append(list(map(lambda x: x * x, error_vector[i]))) sums = list(map(lambda x: sum(x), squares)) error = sum(sums) / N return error
def _initialize_with_pca(self, datas, inputs=None, masks=None, tags=None, num_iters=20): for data in datas: assert data.shape[1] == self.N N_offsets = np.cumsum(self.N_vec)[:-1] pcas = [] split_datas = list( zip(*[np.split(data, N_offsets, axis=1) for data in datas])) split_masks = list( zip(*[np.split(mask, N_offsets, axis=1) for mask in masks])) assert len(split_masks) == len(split_datas) == self.P for em, dps, mps in zip(self.emissions_models, split_datas, split_masks): pcas.append(em._initialize_with_pca(dps, inputs, mps, tags)) # Combine the PCA objects from sklearn.decomposition import PCA pca = PCA(self.D) pca.components_ = block_diag(*[p.components_ for p in pcas]) pca.mean_ = np.concatenate([p.mean_ for p in pcas]) # Not super pleased with this, but it should work... pca.noise_variance_ = np.concatenate( [p.noise_variance_ * np.ones(n) for p, n in zip(pcas, self.N_vec)]) return pca
def from_tuple(tuple): # Create PCA object components, explained_variance, mean, whiten = tuple pca = PCA(whiten=whiten) pca.components_ = components pca.explained_variance_ = explained_variance pca.mean_ = mean return pca
def get_components(data): pca = PCA(n_components=COMPONENTS) pca.fit(data) new_components = np.array([np.dot(component, ortho_rotation(pca.components_)) for component in pca.components_]) pca.components_ = new_components print(pca.components_, pca.explained_variance_ratio_) transformed = pca.transform(data) df_transformed = pd.DataFrame(data=transformed, index=data.index) return df_transformed
def applyPCA(sampleData, mean, components): pca = PCA(n_components=components.shape[0]) pca.components_ = components pca.mean_ = mean transform = pca.transform(np.array([sampleData])) reconstructed = fast_dot(transform, pca.components_) + pca.mean_ reconstructed = reconstructed[0] return sampleData / reconstructed
def project_pc(sample_data, ref_file, ap): pca = PCA(n_components=ref_file['pca_components{}'.format(ap)].shape[0]) pca.components_ = ref_file['pca_components{}'.format(ap)] pca.mean_ = ref_file['pca_mean{}'.format(ap)] transform = pca.transform(np.array([sample_data])) reconstructed = np.dot(transform, pca.components_) + pca.mean_ reconstructed = reconstructed[0] return sample_data / reconstructed
def test_init(self, df_norm, n_components): from flotilla.compute.decomposition import DataFramePCA test_pca = DataFramePCA(df_norm, n_components=n_components) true_pca = PCA(n_components=n_components) true_pca.fit(df_norm.values) pc_names = ['pc_{}'.format(i+1) for i in range(true_pca.components_.shape[0])] true_pca.components_ = pd.DataFrame(true_pca.components_, index=pc_names, columns=df_norm.columns) true_pca.explained_variance_ = pd.Series( true_pca.explained_variance_, index=pc_names) true_pca.explained_variance_ratio_ = pd.Series( true_pca.explained_variance_ratio_, index=pc_names) true_pca.reduced_space = true_pca.transform(df_norm.values) true_pca.reduced_space = pd.DataFrame(true_pca.reduced_space, index=df_norm.index, columns=pc_names) npt.assert_array_equal(test_pca.X, df_norm.values) pdt.assert_frame_equal(test_pca.components_, true_pca.components_) pdt.assert_series_equal(test_pca.explained_variance_, true_pca.explained_variance_) pdt.assert_series_equal(test_pca.explained_variance_ratio_, true_pca.explained_variance_ratio_) pdt.assert_frame_equal(test_pca.reduced_space, true_pca.reduced_space) # class TestDataFrameNMF(): # def test_init(self, df_nonneg, n_components, RANDOM_STATE): # from flotilla.compute.decomposition import DataFrameNMF # # test_nmf = DataFrameNMF(df_nonneg, n_components=n_components, # random_state=RANDOM_STATE) # # true_nmf = NMF(n_components=n_components, random_state=RANDOM_STATE) # true_nmf.reduced_space = true_nmf.fit_transform(df_nonneg.values) # pc_names = ['pc_{}'.format(i + 1) for i in # range(true_nmf.components_.shape[0])] # true_nmf.reduced_space = pd.DataFrame(true_nmf.reduced_space, # index=df_nonneg.index, # columns=pc_names) # true_nmf.components_ = pd.DataFrame(true_nmf.components_, # index=pc_names, # columns=df_nonneg.columns) # # npt.assert_almost_equal(test_nmf.X, df_nonneg.values, decimal=4) # pdt.assert_frame_equal(test_nmf.components_, # true_nmf.components_) # pdt.assert_frame_equal(test_nmf.reduced_space, # true_nmf.reduced_space)
def cal_pca(point_cloud,is_show=False,desired_num_of_feature=3,title="pca demo"): pca = PCA(n_components=desired_num_of_feature) pca.fit(point_cloud) print("*"*30) print("z 向量 %f ,%f ,%f" % (pca.components_[2,0],pca.components_[2,1],pca.components_[2,2])) if(np.inner(pca.components_[2,:],[0,0,1])>0): print("pca_z向量與z方向同向,需要對x軸旋轉180度") pca.components_[2,:]=-pca.components_[2,:] # r = R.from_euler('x',180, degrees=True) # r_b_o=R.from_dcm(pca.components_.T) # r3=r_b_o*r # pca.components_=r3.as_dcm().T # 求出x,y的外積,應該為z 看是否與第三軸同向確認是否為正確 x_axis_matrix=np.outer(pca.components_[1,:],pca.components_[2,:]) x_axis=np.asarray([x_axis_matrix[1,2]-x_axis_matrix[2,1],x_axis_matrix[2,0]-x_axis_matrix[0,2],x_axis_matrix[0,1]-x_axis_matrix[1,0]]) print("*"*30) print("外積計算的x軸為:") print(x_axis) # 確認pca_x與經由外積(y,z)計算的x同向 if(np.allclose(pca.components_[0,:],x_axis)): print("pca_x與外積(y,z)計算的x同向") else: # 反向,將不重要的x軸轉向 print("x方向不正確,需替換成正確的項") pca.components_[0,:]=x_axis if(np.inner(pca.components_[0,:],[1,0,0])<0): # 希望夾爪朝前,這樣末端點就不需要轉太多 print("pca_x向量與x方向反向,需要對z軸旋轉180度") r = R.from_euler('z',180, degrees=True) # r_b_o=R.from_dcm(pca.components_.T) r3=np.dot(pca.components_.transpose(),r.as_dcm().astype(int)) pca.components_=r3.transpose() if is_show: fig = plt.figure(figsize=(3,3)) ax = fig.add_subplot(111, projection='3d') # ax.set_xlabel('X') # ax.set_ylabel('Y') # ax.set_zlabel('Z') # ax.set_xlim3d(-1, 1) # ax.set_ylim3d(-1,1) # ax.set_zlim3d(-1,1) plt.title(title) ax.scatter(point_cloud[:,0], point_cloud[:,1], point_cloud[:,2], c='y',s=1) xm,ym,zm=get_centroid_from_pc(point_cloud) ax.scatter(xm, ym, zm, c='r',s=10) discount=1 print("*"*30) for length, vector in zip(pca.explained_variance_, pca.components_): ax.quiver(xm,ym,zm,vector[0],vector[1],vector[2], length=discount) discount/=3 plt.tight_layout() plt.show() return pca.components_,pca.explained_variance_
def showColorClasses(input_matrix): cvec = [] #fig = plt.figure(figsize = (6,6)) plt.subplots_adjust(hspace=0.4, wspace=1) scaled = scaler.fit_transform(input_matrix) x_t = PCA(2).fit_transform(scaled) for k in y: #z = labels[k] cvec.append(label_color[labels[k]]) cvec = [label_color[labels[k]] for k in y] #plt.subplot(1,3,1) plt.scatter(x_t[:, 0], x_t[:, 1], c=cvec, s=4) plt.title("Scatter plot using PC(1,2)") plt.xlabel("PC1") plt.ylabel("PC2") plt.show() pca_tot = PCA() pca_tot = pca_tot.fit(x) components3_4 = pca_tot.components_[3:5] components10_11 = pca_tot.components_[10:12] pca_tot.components_ = components3_4 x_3_4 = pca_tot.transform(x) #plt.subplot(1,3,2) plt.scatter(x_3_4[:, 0], x_3_4[:, 1], c=cvec, s=4) plt.title("Scatter plot using PC(3,4)") plt.xlabel("PC3") plt.ylabel("PC4") plt.show() pca_tot.components_ = components10_11 x_10_11 = pca_tot.transform(x) #plt.subplot(1,3,3) plt.scatter(x_10_11[:, 0], x_10_11[:, 1], c=cvec, s=4) plt.title("Scatter plot using PC(10,11)") plt.xlabel("PC10") plt.ylabel("PC11") plt.show()
def manual_fit(data, from_pc, to_pc=None): if to_pc == None: #perfoms PCA with all the PCs pca = PCA() #fit the PCA on the data (computes the PCs on the data) pca.fit(data) to_pc = len(pca.components_) else: to_pc += 1 pca = PCA(to_pc) pca.fit(data) # extracts the last N PCs (last N rows.. corrispondant columns are extracted automatically) pca.components_ = pca.components_[from_pc:to_pc] return pca
def extract_pca_component(img, mask): # extract ROI data = apply_mask(img, image.index_img(mask, 0)) # normalized data scaler = StandardScaler() normalized = scaler.fit_transform(data) # pca pca = PCA(n_components=1) pca.fit(normalized) # force eigenvalue to be positive if np.all(pca.components_ < 0): pca.components_ = -1 * pca.components_ projected = pca.transform(data) # variance var_projected = np.sum(np.var(projected, axis=0)) var_original = np.sum(np.var(data, axis=0)) return projected, var_projected / var_original
def run_sample_weighting_and_pca_transformation( training_taskset, validation_taskset, testing_taskset, should_standardise, pca_transform, training_event_stats): training_responses = training_taskset[2] validation_responses = validation_taskset[2] testing_responses = testing_taskset[2] kde_params = {} kde_params["normalised"] = should_standardise kde_params["normalisation_statistic"] = training_event_stats[-1][1] kde_params["values"] = training_responses kde, _, _ = run_kernel_density_estimation(**kde_params) kde_params["kde"] = kde training_sample_weights = compute_sample_weights_from_kde(**kde_params) kde_params["values"] = validation_responses validation_sample_weights = compute_sample_weights_from_kde(**kde_params) kde_params["values"] = testing_responses testing_sample_weights = compute_sample_weights_from_kde(**kde_params) num_input_events = len(training_taskset[1][0]) if pca_transform == True: from sklearn.decomposition import PCA pca = PCA(n_components=num_input_events) pca.fit_transform(training_taskset[1]) else: pca = pca_struct() pca.components_ = np.identity(num_input_events) run_pca_transformation(training_taskset, pca) run_pca_transformation(validation_taskset, pca) run_pca_transformation(testing_taskset, pca) return training_taskset, validation_taskset, testing_taskset, training_sample_weights, validation_sample_weights, testing_sample_weights, pca
def fit(self, predictors, locations, **kwargs): self.locations = locations self.pcas = [] self.n = predictors['n'] for location in locations: raw = extract_n_by_n(predictors, location, **kwargs) #pca = PCA(n_components='mle', whiten=True) #pca = PCA(n_components=0.95, whiten=True) pca = PCA(n_components=2) pca = pca.fit(raw) components = pca.components_ pca.components_ = components self.pcas.append(pca.fit(raw)) print "pca: ", location, pca.n_components_, pca.explained_variance_ratio_
def fit(self, X, y=None): max_possible_comp = min(X.shape) self.max_n_components = int(min([self.max_n_components, max_possible_comp])) pca = PCA(n_components=self.max_n_components) pca.fit(X) exp_var_rat = pca.explained_variance_ratio_ sum_exp_var_rat = cumsum(exp_var_rat) n_comp_to_retain = 0 for n_comp_to_retain in range(self.min_n_components, self.max_n_components + 1): if sum_exp_var_rat[n_comp_to_retain - 1] >= self.target_variance: break pca.components_ = pca.components_[:n_comp_to_retain, :] # Note: pca not needed for the functioning of the class, but keeping around for debug reasons self._pca = pca self.scalings_ = pca.components_.T return self
def naiveBayesClassifier(input_matrix, classes, firstPC=0, lastPC=0): scaled = scaler.fit_transform(input_matrix) if firstPC == 0 and lastPC == 0: x_train, x_test, y_train, y_test = train_test_split(scaled, classes, test_size=0.1) else: #Select only PCA in range pca_tot = PCA() pca_tot = pca_tot.fit(scaled) twoComponents = pca_tot.components_[firstPC:lastPC] pca_tot.components_ = twoComponents x = pca_tot.transform(scaled) #Train and use the model x_train, x_test, y_train, y_test = train_test_split(x, classes, test_size=0.1) clf = GaussianNB() clf.fit(x_train, y_train) prediction = clf.predict(x_test) accuracy = accuracy_score(y_test, prediction) print('Accuracy score: ' + str(accuracy))
def cal_pca_for_pose_data_generator(point_cloud,desired_num_of_feature=3): pca = PCA(n_components=desired_num_of_feature) pca.fit(point_cloud) # print("z 向量 %f ,%f ,%f" % (pca.components_[2,0],pca.components_[2,1],pca.components_[2,2])) if(np.inner(pca.components_[2,:],[0,0,1])>0): # print("pca_z向量與z方向同向,需要對x軸旋轉180度") pca.components_[2,:]=-pca.components_[2,:] # r = R.from_euler('x',180, degrees=True) # r_b_o=R.from_dcm(pca.components_.T) # r3=r_b_o*r # pca.components_=r3.as_dcm().T # 求出x,y的外積,應該為z 看是否與第三軸同向確認是否為正確 x_axis_matrix=np.outer(pca.components_[1,:],pca.components_[2,:]) x_axis=np.asarray([x_axis_matrix[1,2]-x_axis_matrix[2,1],x_axis_matrix[2,0]-x_axis_matrix[0,2],x_axis_matrix[0,1]-x_axis_matrix[1,0]]) # print("*"*30) # print("外積計算的x軸為:") # print(x_axis) # 確認pca_x與經由外積(y,z)計算的x同向 if(np.allclose(pca.components_[0,:],x_axis)): # print("pca_x與外積(y,z)計算的x同向") pass else: # 反向,將不重要的x軸轉向 # print("x方向不正確,需替換成正確的項") pca.components_[0,:]=x_axis if(np.inner(pca.components_[0,:],[1,0,0])<0): # 希望夾爪朝前,這樣末端點就不需要轉太多 # print("pca_x向量與x方向反向,需要對z軸旋轉180度") r = R.from_euler('z',180, degrees=True) # r_b_o=R.from_dcm(pca.components_.T) r3=np.dot(pca.components_.transpose(),r.as_dcm().astype(int)) pca.components_=r3.transpose() return pca.components_,pca.explained_variance_
def apply_cv(epochs): count = 1 confusion_matrixes = [] confusion_matrixes_percent = [] predicted = '' test_label = '' firstIterCV = True probabilities = np.array([[]], ndmin=2) predictions = np.array([]) best_threshold = [] cv_probabilities = [] cv_probabilities_label = [] for train, test in cv: ## Train Data processing ## train_data = epochs._data[train] train_label = label[train] # Online simulation flag if FILTER_METHOD is 'WINDOWED': # epochs should have one epoch only train_bp = mne.filter.band_pass_filter( train_data, sfreq, Fp1=2, Fp2=h_freq, copy=True, filter_length=None, method='fft', iir_params=None) # bandpass on one epoch if FILTER_METHOD is 'NC' or FILTER_METHOD is 'LFILT': train_bp = train_data train_bp = train_bp[:, :, paddingIdx:paddingIdx + (int((tmax - tmin) * sfreq))] for trial in range(train_bp.shape[0]): for ch in range(train_bp.shape[1]): train_bp[trial, ch, :] = train_bp[trial, ch, :] - np.mean( train_bp[trial, ch, :]) # plt.figure() # plt.plot(train_bp[7,:].T) # plt.savefig(str(FILTER_METHOD)+'.png') # Normalization (train_normalized, trainShiftFactor, trainScaleFactor) = normalizeAcrossEpoch(train_bp, 'MinMax') # Downsampling train_downsampling = train_normalized[:, :, ::decim_factor] # Merge (reshape) channel and time for the PCA train_reshaped = train_downsampling.reshape( train_downsampling.shape[0], -1) # PCA initialisation if APPLY_PCA is False: pca = None train_pcaed = train_reshaped else: pca = PCA(0.95) pca.fit(train_reshaped) pca.components_ = -pca.components_ # inversion of vector to be constistant with Inaki's code train_pcaed = pca.transform(train_reshaped) # PCA # train_pcaed = train_reshaped ## Test data processing ## test_data = epochs._data[test] test_label = label[test] # Compute_feature does the same steps as for train, but requires a computed PCA (that we got from train) # (bandpass, norm, ds, and merge channel and time) test_pcaed = compute_features(test_data, sfreq, l_freq, h_freq, decim_factor, trainShiftFactor, trainScaleFactor, pca, FILTER_METHOD, tmin, tmax, paddingIdx, iir_params=dict(a=a, b=b)) # test_pcaed = compute_features(test_data,sfreq,l_freq,h_freq,decim_factor,trainShiftFactor,trainScaleFactor,pca=None) ## Test ## train_x = train_pcaed test_x = test_pcaed # Classifier init # RF = dict(trees=100, maxdepth=None) # cls = RandomForestClassifier(n_estimators=RF['trees'], max_features='auto', max_depth=RF['maxdepth'], n_jobs=n_jobs) # cls = RandomForestClassifier(n_estimators=RF['trees'], max_features='auto', max_depth=RF['maxdepth'], class_weight="balanced", n_jobs=n_jobs) # cls = LDA(solver='eigen') # cls = QDA(reg_param=0.3) # regularized LDA # cls.fit( train_x, train_label ) # Y_pred= cls.predict( test_x ) # prediction = Y_pred # Fitting cls = rLDA(regcoeff) cls.fit(train_x, train_label) predicted = cls.predict(test_x) probs = cls.predict_proba(test_x) prediction = np.array(predicted) if useLeaveOneOut is True: if firstIterCV is True: probabilities = np.append(probabilities, probs, axis=1) firstIterCV = False predictions = np.append(predictions, prediction) else: probabilities = np.append(probabilities, probs, axis=0) predictions = np.append(predictions, prediction) else: predictions = np.append(predictions, prediction) probabilities = np.append(probabilities, probs) # Performance if useLeaveOneOut is not True: cm = np.array(confusion_matrix(test_label, prediction)) cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] confusion_matrixes.append(cm) confusion_matrixes_percent.append(cm_normalized) avg_confusion_matrixes = np.mean(confusion_matrixes_percent, axis=0) print('CV #' + str(count)) print('Prediction: ' + str(prediction)) print(' Actual: ' + str(test_label)) # Append probs to the global list probs_np = np.array(probs) cv_probabilities.append(probs_np[:, 0]) cv_probabilities_label.append(test_label) # if useLeaveOneOut is not True: # print('Confusion matrix') # print(cm) # print('Confusion matrix (normalized)') # print(cm_normalized) # print('---') # print('True positive rate: '+str(cm_normalized[0][0])) # print('True negative rate: '+str(cm_normalized[1][1])) print('===================') ## One CV done, go to the next one count += 1 best_threshold = None cv_prob_linear = np.ravel(cv_probabilities) cv_prob_label_np = np.array(cv_probabilities_label) cv_prob_label_linear = np.ravel(cv_prob_label_np) threshold_list = np.linspace(0, 1, 100) biglist_fpr = [] biglist_tpr = [] biglist_thresh = [] biglist_cms = [] for thresh in threshold_list: biglist_pred = [ 4 if x < thresh else 3 for x in cv_prob_linear ] # list comprehension to quickly go through the list. biglist_cm = confusion_matrix(cv_prob_label_linear, biglist_pred) biglist_cm_norm = biglist_cm.astype('float') / biglist_cm.sum( axis=1)[:, np.newaxis] biglist_cms.append(biglist_cm_norm) biglist_tpr.append(biglist_cm_norm[0][0]) biglist_fpr.append(biglist_cm_norm[1][0]) biglist_thresh.append(thresh) biglist_auc = auc(biglist_fpr, biglist_tpr) # Make a subset of data where FPR < MAX_FPR idx_below_maxfpr = np.where(np.array(biglist_fpr) < MAX_FPR) fpr_below_maxfpr = np.array(biglist_fpr)[idx_below_maxfpr[0]] tpr_below_maxfpr = np.array(biglist_tpr)[idx_below_maxfpr[0]] # Look for the best (max value) FPR in that subset best_tpr_below_maxfpr = np.max(tpr_below_maxfpr) best_tpr_below_maxfpr_idx = np.array( np.where( biglist_tpr == best_tpr_below_maxfpr)).ravel() # get its idx # Get the associated TPRs best_tpr_below_maxfpr_associated_fpr = np.array( biglist_fpr)[best_tpr_below_maxfpr_idx] # Get the best (min value) in that subset best_associated_fpr = np.min(best_tpr_below_maxfpr_associated_fpr) # ... get its idx best_associated_fpr_idx = np.array( np.where(biglist_fpr == best_associated_fpr)).ravel() # The best idx is the one that is on both set best_idx = best_tpr_below_maxfpr_idx[np.in1d(best_tpr_below_maxfpr_idx, best_associated_fpr_idx)] plt.plot(biglist_fpr, biglist_tpr) plt.xlabel('False positive rate') plt.ylabel('True positive rate') best_threshold = threshold_list[best_idx] print('#################################') print('Best treshold:' + str(best_threshold)) print('Gives a TPR of ' + str(best_tpr_below_maxfpr)) print('And a FPR of ' + str(best_associated_fpr)) print('CM') print(biglist_cms[best_idx[0]]) return (biglist_auc, best_threshold)
def main(filename, xtrains_percent=0.8, maxfeature=1, fit_ylabel=False, nn_estimator=100, sepaLabel=True, treeLabel=False, seed=42, pcaLabel=False, n_comp=2, sepa2=False, time_label=False, stream=False, sfl=False): mugen = float("inf") all_start = time.time() rng = np.random.RandomState(seed) # httpとsmtpは別の方法でデータ取得 if filename == '/home/anegawa/Dropbox/http.mat' or filename == '/home/anegawa/Dropbox/smtp.mat': mat = {} f = h5py.File(filename) for k, v in f.items(): mat[k] = np.array(v) X = mat['X'].T y2 = mat['y'][0] y3 = [] for i in range(len(y2)): y3.append(int(y2[i])) y = np.reshape(y3, [len(y3), 1]) else: mat = scipy.io.loadmat(filename) X = mat['X'] y = mat['y'] rate = xtrains_percent max_feat = int(maxfeature) if max_feat == 3: max_feat = X.shape[1] if treeLabel: anegawa = 0 else: print('X_train\'s rate : ' + str(rate)) print('max_features : ' + str(max_feat)) print('fit_ylabel : ' + str(fit_ylabel)) print('nn_estimator : ' + str(nn_estimator)) print('sepaLabel : ' + str(sepaLabel)) clf = IsolationForest(random_state=rng) clf.n_estimators = nn_estimator clf.verbose = 0 clf.max_features = max_feat if (str(filename) == '/home/anegawa/Dropbox/shuttle.mat'): clf.contamination = 0.07 elif (str(filename) == '/home/anegawa/Dropbox/http.mat'): clf.contamination = 0.004 elif (str(filename) == '/home/anegawa/Dropbox/pima.mat'): clf.contamination = 0.35 elif (str(filename) == '/home/anegawa/Dropbox/mammography.mat'): clf.contamination = 0.02 elif (str(filename) == '/home/anegawa/Dropbox/cover.mat'): clf.contamination = 0.009 elif (str(filename) == '/home/anegawa/Dropbox/breastw.mat'): clf.contamination = 0.35 elif (str(filename) == '/home/anegawa/Dropbox/arrhythmia.mat'): clf.contamination = 0.15 elif (str(filename) == '/home/anegawa/Dropbox/ionosphere.mat'): clf.contamination = 0.36 elif (str(filename) == '/home/anegawa/Dropbox/satellite.mat'): clf.contamination = 0.32 elif (str(filename) == '/home/anegawa/Dropbox/annthyroid.mat'): clf.contamination = 0.07 elif (str(filename) == '/home/anegawa/Dropbox/smtp.mat'): clf.contamination = 0.03 / 100 else: print('cannot file it.') # Generate train data a = rng.randn(400, 2) X = 0.3 * a X_train = np.r_[X + 2, X - 2] # X_train = np.ones([400, 2]) # Generate some regular novel observations X = 0.3 * rng.randn(400, 2) X_test = np.r_[X + 2, X - 2] # X_test = np.ones([400, 2]) # Generate some abnormal novel observations X_outliers = np.random.exponential(1. / 0.001, size=[20, 2]) # X_outliers = rng.uniform(low=-4, high=4, size=(20, 2)) # X_outliers = np.zeros([20, 2]) X_test = np.r_[X_test, X_outliers] X_train_correct = np.ones([X_train.shape]) hoge = 1 / (1 - rate) cross_count = int(np.ceil(hoge)) if cross_count > hoge: cross_count = cross_count - 1 sum_auc = 0 sum_accuracy = 0 pca_fit_time = 0 pca_transform_train_time = 0 pca_transform_test_time = 0 test_time = 0 fit_time = 0 sum_train_time = 0 # for count in range(cross_count): if sepaLabel == True: # separated # data cut X_anomaly = [] X_normal = [] for i in range(len(X)): if y[i] == 1: X_anomaly.append(X[i]) else: X_normal.append(X[i]) cutter_anomaly = int(np.ceil(len(X_anomaly) * rate)) cutter_normal = int(np.ceil(len(X_normal) * rate)) for count in range(cross_count): part_anomaly = int(np.ceil(cutter_anomaly * count)) part_normal = int(np.ceil(cutter_normal * count)) X_train = [] X_train_correct = [] X_test = [] X_test_correct = [] for i, k in zip(range(len(X_anomaly)), range(part_anomaly, part_anomaly + len(X_anomaly))): while k >= len(X_anomaly): k = k - len(X_anomaly) if i < cutter_anomaly: X_train.append(X_anomaly[k]) X_train_correct.append(-1) else: X_test.append(X_anomaly[k]) X_test_correct.append(-1) for i, k in zip(range(len(X_normal)), range(part_normal, part_normal + len(X_normal))): while k >= len(X_normal): k = k - len(X_normal) if i < cutter_normal: X_train.append(X_normal[k]) X_train_correct.append(1) else: X_test.append(X_normal[k]) X_test_correct.append(1) if sfl: X_train_set = [] X_test_set = [] for i in range(len(X_train)): buf = [] buf.append(X_train[i]) buf.append(X_train_correct[i]) X_train_set.append(buf) for i in range(len(X_test)): buf = [] buf.append(X_test[i]) buf.append(X_test_correct[i]) X_test_set.append(buf) random.shuffle(X_train_set) random.shuffle(X_test_set) X_train = [] X_test = [] X_train_correct = [] X_test_correct = [] for i in range(len(X_train_set)): X_train.append(X_train_set[i][0]) X_train_correct.append(X_train_set[i][1]) for i in range(len(X_test_set)): X_test.append(X_test_set[i][0]) X_test_correct.append(X_test_set[i][1]) else: # mixed cutter = len(X) * rate # test start this index at the first time for count in range(cross_count): part = int(np.ceil(cutter * count)) # while part >= len(X): # part = part - len(X) X_train = [] X_train_correct = [] X_test = [] X_test_correct = [] for i, k in zip(range(len(X)), range(part, part + len(X))): while k >= len(X): k = k - len(X) if i < len(X) * rate: X_train.append(X[k]) X_train_correct.append(y[k]) else: X_test.append(X[k]) X_test_correct.append(y[k]) for q in range(len(X_train_correct)): j = X_train_correct[q] if (j == 1): X_train_correct[q] = -1 else: X_train_correct[q] = 1 for w in range(len(X_test_correct)): j = X_test_correct[w] if (j == 1): X_test_correct[w] = -1 else: X_test_correct[w] = 1 # owari # finished cutting data if pcaLabel: pca_fit_start = time.time() pca = PCA(copy=True, iterated_power='auto', n_components=n_comp, random_state=None, svd_solver='auto', tol=0.0, whiten=False) pca2 = PCA(copy=True, iterated_power='auto', random_state=None, svd_solver='auto', tol=0.0, whiten=False) if sepa2: # if False: print("こっち入ってるけどええんか!?") pca2.fit(X_train_normal) component = pca2.components_ component2 = np.sort(pca2.components_) if n_comp < len(component2): pca2.components_ = component2[0:n_comp] # print(pca2.components_.shape) X_train = pca2.transform(X_train) X_test = pca2.transform(X_test) else: pca.fit(X_train) pca_fit_finish = time.time() pca_transform_train_start = time.time() X_train = pca.transform(X_train) pca_transform_train_finish = time.time() # a = X_test[0] # X_test = pca.transform(a) # if not stream: # pca_transform_test_start = time.time() # X_test = pca.transform(X_test) #stream version # pca_transform_test_finish = time.time() # pca_transform_test_time += (pca_transform_test_finish - pca_transform_test_start) clf.max_features = n_comp pca_fit_time += (pca_fit_finish - pca_fit_start) pca_transform_train_time += (pca_transform_train_finish - pca_transform_train_start) fit_start = time.time() if fit_ylabel: clf.fit(X_train, X_train_correct, sample_weight=None) else: clf.fit(X_train, y=None, sample_weight=None) fit_finish = time.time() fit_time += (fit_finish - fit_start) # if pcaLabel and stream: if stream: sum_score_auc = [] sum_score_acc = [] # print(X_test[0:1]) for i in range(len(X_test)): if pcaLabel: pca_transform_test_start = time.time() a = [X_test[i]] X_test_pca = pca.transform(a) pca_transform_test_finish = time.time() pca_transform_test_time += (pca_transform_test_finish - pca_transform_test_start) else: X_test_pca = [X_test[i]] test_start = time.time() y_pred_test, a_score = clf.predict(X_test_pca) test_finish = time.time() test_time += (test_finish - test_start) sum_score_auc.append(a_score) sum_score_acc.append(y_pred_test) a_score = sum_score_auc y_pred_test = sum_score_acc else: if pcaLabel: pca_transform_test_start = time.time() X_test = pca.transform(X_test) # stream version pca_transform_test_finish = time.time() pca_transform_test_time += (pca_transform_test_finish - pca_transform_test_start) test_start = time.time() y_pred_test, a_score = clf.predict(X_test) test_finish = time.time() test_time += (test_finish - test_start) # a_score = clf.decision_function(X_test) acc = calc_accuracy(X_test_correct, y_pred_test, treeLabel) AUC = calc_AUC(X_test_correct, a_score, treeLabel) sum_auc += AUC sum_accuracy = acc # return AUC # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # plot the line, the samples, and the nearest vectors to the plane # xx, yy = np.meshgrid(np.linspace(-200, 200, 1000), np.linspace(-200, 200, 1000)) # # clf.max_features = 2 # # print(yy.ravel()) # # Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) # # # Z = Z.reshape(xx.shape) # # plt.figure(figsize=(100, 200)) # plt.suptitle("satellite") # # plt.contourf(xx, yy, Z, cmap=plt.cm.Blues_r) # # X_train = np.array(X_train) # X_test = np.array(X_test) # # lim = True # x = (-200, 200) # y = (-200, 300) # # for i,j in zip(range(2), [True, False]): # small = j # trueがsmallestね # # plt.subplot(2, 2, i+1) # if small: # plt.title("smallest") # else: # plt.title("largest") # # if small: # # b1 = plt.scat # # plot the line, the samples, and the nearest vectors to the plane # xx, yy = np.meshgrid(np.linspace(-200, 200, 1000), np.linspace(-200, 200, 1000)) # # clf.max_features = 2 # # print(yy.ravel()) # # Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) # # # Z = Z.reshape(xx.shape) # # plt.figure(figsize=(100, 200)) # plt.suptitle("satellite") # # plt.contourf(xx, yy, Z, cmap=plt.cm.Blues_r) # # X_train = np.array(X_train) # X_test = np.array(X_test) # # lim = True # x = (-200, 200) # y = (-200, 300) # # for i,j in zip(range(2), [True, False]): # small = j # trueがsmallestね # # plt.subplot(2, 2, i+1) # if small: # plt.title("smallest") # else: # plt.title("largest") # # if small: # # b1 = plt.scatter(X_train[:, X_train.shape[1]-1], X_train[:, X_train.shape[1]-2], c='white', s=20, edgecolor='k') # b2 = plt.scatter(X_test[:, X_test.shape[1]-1], X_test[:, X_test.shape[1]-2], c='green', s=20, edgecolor='k') # else: # # b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=20, edgecolor='k') # b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green', s=20, edgecolor='k') # # c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red', s=20, edgecolor='k') # plt.axis('tight') # if lim: # plt.xlim(x) # plt.ylim(y) # # plt.legend([b1, b2], # # ["training observations", # # "testing observations"], # # loc="upper left") # plt.legend([b2],["testing observations"], # loc="upper left") # # plt.legend([b1], ["training observations"], # # loc="upper left") # # # # plt.subplot(2, 2, i+3) # if small: # b1 = plt.scatter(X_train[:, X_train.shape[1]-1], X_train[:, X_train.shape[1]-2], c='white', s=20, edgecolor='k') # # b2 = plt.scatter(X_test[:, X_test.shape[1] - 1], X_test[:, X_test.shape[1] - 2], c='green', s=20, edgecolor='k') # else: # b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=20, edgecolor='k') # # b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green', s=20, edgecolor='k') # # c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red', s=20, edgecolor='k') # plt.axis('tight') # if lim: # plt.xlim(x) # plt.ylim(y) # # plt.legend([b1, b2], # # ["training observations", # # "testing observations"], # # loc="upper left") # # plt.legend([b2], ["testing observations"], # # loc="upper left") # plt.legend([b1], ["training observations"], # loc="upper left") # plt.show()ter(X_train[:, X_train.shape[1]-1], X_train[:, X_train.shape[1]-2], c='white', s=20, edgecolor='k') # b2 = plt.scatter(X_test[:, X_test.shape[1]-1], X_test[:, X_test.shape[1]-2], c='green', s=20, edgecolor='k') # else: # # b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=20, edgecolor='k') # b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green', s=20, edgecolor='k') # # c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red', s=20, edgecolor='k') # plt.axis('tight') # if lim: # plt.xlim(x) # plt.ylim(y) # # plt.legend([b1, b2], # # ["training observations", # # "testing observations"], # # loc="upper left") # plt.legend([b2],["testing observations"], # loc="upper left") # # plt.legend([b1], ["training observations"], # # loc="upper left") # # # # plt.subplot(2, 2, i+3) # if small: # b1 = plt.scatter(X_train[:, X_train.shape[1]-1], X_train[:, X_train.shape[1]-2], c='white', s=20, edgecolor='k') # # b2 = plt.scatter(X_test[:, X_test.shape[1] - 1], X_test[:, X_test.shape[1] - 2], c='green', s=20, edgecolor='k') # else: # b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=20, edgecolor='k') # # b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green', s=20, edgecolor='k') # # c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red', s=20, edgecolor='k') # plt.axis('tight') # if lim: # plt.xlim(x) # plt.ylim(y) # # plt.legend([b1, b2], # # ["training observations", # # "testing observations"], # # loc="upper left") # # plt.legend([b2], ["testing observations"], # # loc="upper left") # plt.legend([b1], ["training observations"], # loc="upper left") # plt.show() # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ auc2 = sum_auc / cross_count # print(sum_accuracy) acc2 = sum_accuracy / cross_count # calc time all_finish = time.time() all_time = all_finish - all_start pca_fit_time = pca_fit_time / cross_count pca_transform_train_time = pca_transform_train_time / cross_count pca_transform_test_time = pca_transform_test_time / cross_count test_time = test_time / cross_count fit_time = fit_time / cross_count sum_train_time = fit_time + pca_fit_time + pca_transform_train_time sum_test_time = pca_transform_test_time + test_time # print("sum_train_time : " + str(sum_train_time)) # print("pca_transform_train_time : " + str(pca_transform_train_time)) # print("pca_fit_time : " + str(pca_fit_time)) # print("test_time : " + str(test_time)) # print("fit_time : " + str(fit_time)) # print("all_time : " + str(all_time)) # return if time_label: return all_time, pca_fit_time + pca_transform_train_time, fit_time, pca_transform_test_time, test_time, sum_train_time, sum_test_time elif treeLabel: if math.isnan(auc2): majikayo = True return auc2 else: return auc2, acc2
#MAKE DF from BIOM table col = ['sample'] for i in list(range(8)): col.append('OTU'+str(i)) #list of column names table= pd.read_csv('{}/{}'.format(cwd,inputfile),delim_whitespace=True, header= None) table.columns= col # name columns table= table.set_index('sample') # index by sample names null_data = table[table.isnull().any(axis=1)] print '**Warning: the following lines are missing data. All nulls will be filled with zeros' print null_data table= table.fillna(0) #replace any missing values with zeros table.to_csv(r'pd_df.csv') print 'pandas dataframe saved as: pd_df.csv' ##REMOVING PC1 pca = PCA(n_components=8) #keeps 8 components? was 100 in original script-not sure why #X = pca.fit_transform(table.apply(np.log(table))) #fit df into model X= pca.fit_transform(table) #print X[1] Y = X[:,1:] #Y = every value in list X except the first one #print Y[1] #just showing that the first value is removed untrans = pca.inverse_transform(X) #get original data matrix back (w/out PC1 variance) pca.components_ = pca.components_[1:] #remove the first PCA vector trans = pca.inverse_transform(Y) print 'new pca vectors saved as: transformed_pca.txt' with open('transformed_pca.txt' , 'w') as f: count =0 while count < len(trans): f.write(trans[count]) count += 1
def query(query_list, feature_list, out_dir, top=200, pca_thresh=0.9, out_dim=None, pca_file='-1', qe_fn=None, mask_pred=False, euclidean_dist=False, rmac=False, mac=False, aml=False): """Query by list.""" print(Notify.INFO, 'Read feature', Notify.ENDC) print(Notify.INFO, 'Use R-MAC: ', rmac, Notify.ENDC) num_regions, db_feat, image_names = read_feature(feature_list, euclidean_dist, rmac, mac) # below codes are for predicted mask visualization. itv = 10 while mask_pred: idx0 = randint(itv, len(feature_list) - 1 - itv) idx1 = randint(idx0 - itv, idx0 + itv) print(Notify.INFO, 'Pair idx', (idx0, idx1), Notify.ENDC) # FIXME: adapt the image ext. image_path0 = feature_list[idx0].replace('npy', 'JPG') image_path1 = feature_list[idx1].replace('npy', 'JPG') # some images end with '.jpg' if not os.path.exists(image_path0): image_path0 = feature_list[idx0].replace('npy', 'jpg') if not os.path.exists(image_path1): image_path1 = feature_list[idx1].replace('npy', 'jpg') rv0 = db_feat[idx0 * num_regions:(idx0 + 1) * num_regions] rv1 = db_feat[idx1 * num_regions:(idx1 + 1) * num_regions] mask_prediction(rv0, rv1, image_path0, image_path1) print(Notify.INFO, '# Feature', len(feature_list), Notify.ENDC) print(Notify.INFO, '# Dim', db_feat.shape[-1], Notify.ENDC) print(Notify.INFO, '# Reginal vector', num_regions, Notify.ENDC) # perform PCA whitening. use_pca = (pca_thresh is not None or out_dim is not None or pca_file != '-1') and len(image_names) > out_dim if pca_file != '-1': pca_data = np.load(pca_file).item() pca = PCA(whiten=True, copy=True, random_state=0) pca.mean_ = pca_data['mean'] pca.components_ = pca_data['eigvec'] pca.explained_variance_ = pca_data['eigval'] else: pca = None if use_pca: db_trans_feat, pca = whitening(db_feat, num_regions, pca_thresh, out_dim, pca=pca) print(Notify.INFO, 'PCA-ed feature dim', db_trans_feat.shape[1], Notify.ENDC) else: print(Notify.WARNING, 'No whitening', Notify.ENDC) db_trans_feat = db_feat if query_list is not None: query_num_regions, query_feat, query_names = read_feature( query_list, euclidean_dist, rmac, mac) assert (num_regions == query_num_regions) if use_pca: query_trans_feat, _ = whitening(query_feat, num_regions, pca=pca) else: query_trans_feat = query_feat query_num = len(query_list) else: query_trans_feat = db_trans_feat query_num = len(feature_list) # output path name if not os.path.exists(out_dir): os.makedirs(out_dir) match_index_file = os.path.join(out_dir, 'match_pairs') print(Notify.INFO, 'Compute nn distance', Notify.ENDC) start = time.time() query_result = match_gpu(query_trans_feat, db_trans_feat, num_regions, top, euclidean_dist=euclidean_dist, aml=aml) end = time.time() print(Notify.INFO, 'Time cost in matching', end - start, 's', Notify.ENDC) if qe_fn is not None: for _ in range(ARGS.et): print(Notify.INFO, 'Expand queries and re-match', Notify.ENDC) qe_feature = qe_fn(query_trans_feat, db_trans_feat, query_result, num_regions) query_result = match_gpu(qe_feature, db_trans_feat, num_regions, top, aml=aml) content = [] aps = [] for i in range(query_num): inds = query_result[i][0] dists = query_result[i][1] content.extend([ ' '.join([str(i), str(inds[j]), str(dists[j] / num_regions)]) for j in range(len(inds)) ]) write_list(content, match_index_file) return 0
def run_to_task(task_to): import general_utils from general_utils import RuntimeDeterminedEnviromentVars import models.architectures as architectures from data.load_ops import resize_rescale_image from data.load_ops import rescale_image import utils from data.task_data_loading import load_and_specify_preprocessors_for_representation_extraction from data.task_data_loading import load_and_specify_preprocessors_for_input_depends_on_target import lib.data.load_ops as load_ops tf.logging.set_verbosity(tf.logging.ERROR) args = parser.parse_args() cfg, is_transfer, task, config_name = generate_cfg(args.config, args.vid, args) if task == 'class_places' or task == 'class_1000': synset = get_synset(task) if task == 'jigsaw': cfg['preprocess_fn'] = load_and_specify_preprocessors_for_input_depends_on_target print("Doing {task}".format(task=task)) general_utils = importlib.reload(general_utils) tf.reset_default_graph() training_runners = { 'sess': tf.InteractiveSession(), 'coord': tf.train.Coordinator() } ############## Start dataloading workers ############## if is_transfer: get_data_prefetch_threads_init_fn = utils.get_data_prefetch_threads_init_fn_transfer setup_input_fn = utils.setup_input_transfer else: setup_input_fn = utils.setup_input get_data_prefetch_threads_init_fn = utils.get_data_prefetch_threads_init_fn ############## Set Up Inputs ############## # tf.logging.set_verbosity( tf.logging.INFO ) inputs = setup_input_fn(cfg, is_training=False, use_filename_queue=False) RuntimeDeterminedEnviromentVars.load_dynamic_variables(inputs, cfg) RuntimeDeterminedEnviromentVars.populate_registered_variables() start_time = time.time() ############## Set Up Model ############## model = utils.setup_model(inputs, cfg, is_training=IN_TRAIN_MODE) m = model['model'] model['saver_op'].restore(training_runners['sess'], cfg['model_path']) data_prefetch_init_fn = get_data_prefetch_threads_init_fn( inputs, cfg, is_training=False, use_filename_queue=False) prefetch_threads = threading.Thread(target=data_prefetch_init_fn, args=(training_runners['sess'], training_runners['coord'])) prefetch_threads.start() list_of_fname = np.load( '/home/ubuntu/task-taxonomy-331b/assets/aws_data/video{}_fname.npy'. format(args.vid)) import errno try: os.mkdir('/home/ubuntu/{}'.format(task)) os.mkdir('/home/ubuntu/{}/vid1'.format(task)) os.mkdir('/home/ubuntu/{}/vid2'.format(task)) os.mkdir('/home/ubuntu/{}/vid3'.format(task)) os.mkdir('/home/ubuntu/{}/vid4'.format(task)) except OSError as e: if e.errno != errno.EEXIST: raise curr_comp = np.zeros((3, 64)) curr_fit_img = np.zeros((256, 256, 3)) embeddings = [] curr_vp = [] curr_layout = [] ############## Run First Batch ############## def rescale_l_for_display(batch, rescale=True): ''' Prepares network output for display by optionally rescaling from [-1,1], and by setting some pixels to the min/max of 0/1. This prevents matplotlib from rescaling the images. ''' if rescale: display_batch = [ rescale_image(im.copy(), new_scale=[0, 100], current_scale=[-1, 1]) for im in batch ] else: display_batch = batch.copy() for im in display_batch: im[0, 0, 0] = 1.0 # Adjust some values so that matplotlib doesn't rescale im[0, 1, 0] = 0.0 # Now adjust the min return display_batch for step_num in range(inputs['max_steps'] - 1): #for step_num in range(20): #if step_num > 0 and step_num % 20 == 0: print(step_num) if is_transfer: (input_batch, target_batch, data_idx, predicted) = training_runners['sess'].run([ m.input_images, m.target_images, model['data_idxs'], m.decoder.decoder_output ]) else: (input_batch, target_batch, data_idx, predicted) = training_runners['sess'].run([ m.input_images, m.targets, model['data_idxs'], m.decoder_output ]) if task == 'segment2d' or task == 'segment25d': from sklearn.decomposition import PCA x = np.zeros((32, 256, 256, 3), dtype='float') k_embed = 8 for i in range(predicted.shape[0]): embedding_flattened = np.squeeze(predicted[i]).reshape( (-1, 64)) embeddings.append(embedding_flattened) if len(embeddings) > k_embed: embeddings.pop(0) pca = PCA(n_components=3) pca.fit(np.vstack(embeddings)) min_order = None min_dist = float('inf') copy_of_comp = np.copy(pca.components_) for order in itertools.permutations([0, 1, 2]): #reordered = pca.components_[list(order), :] #dist = np.linalg.norm(curr_comp-reordered) pca.components_ = copy_of_comp[order, :] lower_dim = pca.transform(embedding_flattened).reshape( (256, 256, -1)) lower_dim = (lower_dim - lower_dim.min()) / ( lower_dim.max() - lower_dim.min()) dist = np.linalg.norm(lower_dim - curr_fit_img) if dist < min_dist: min_order = order min_dist = dist pca.components_ = copy_of_comp[min_order, :] lower_dim = pca.transform(embedding_flattened).reshape( (256, 256, -1)) lower_dim = (lower_dim - lower_dim.min()) / (lower_dim.max() - lower_dim.min()) curr_fit_img = np.copy(lower_dim) x[i] = lower_dim predicted = x if task == 'curvature': std = [31.922, 21.658] mean = [123.572, 120.1] predicted = (predicted * std) + mean predicted[:, 0, 0, :] = 0. predicted[:, 1, 0, :] = 1. predicted = np.squeeze( np.clip(predicted.astype(int) / 255., 0., 1.)[:, :, :, 0]) if task == 'colorization': maxs = np.amax(predicted, axis=-1) softmax = np.exp(predicted - np.expand_dims(maxs, axis=-1)) sums = np.sum(softmax, axis=-1) softmax = softmax / np.expand_dims(sums, -1) kernel = np.load( '/home/ubuntu/task-taxonomy-331b/lib/data/pts_in_hull.npy') gen_target_no_temp = np.dot(softmax, kernel) images_resized = np.zeros([0, 256, 256, 2], dtype=np.float32) for image in range(gen_target_no_temp.shape[0]): temp = scipy.ndimage.zoom(np.squeeze( gen_target_no_temp[image]), (4, 4, 1), mode='nearest') images_resized = np.append(images_resized, np.expand_dims(temp, axis=0), axis=0) inp_rescale = rescale_l_for_display(input_batch) output_lab_no_temp = np.concatenate((inp_rescale, images_resized), axis=3).astype(np.float64) for i in range(input_batch.shape[0]): output_lab_no_temp[i, :, :, :] = skimage.color.lab2rgb( output_lab_no_temp[i, :, :, :]) predicted = output_lab_no_temp just_rescale = [ 'autoencoder', 'denoise', 'edge2d', 'edge3d', 'keypoint2d', 'keypoint3d', 'reshade', 'rgb2sfnorm', 'impainting_whole' ] if task in just_rescale: predicted = (predicted + 1.) / 2. predicted = np.clip(predicted, 0., 1.) predicted[:, 0, 0, :] = 0. predicted[:, 1, 0, :] = 1. just_clip = ['rgb2depth', 'rgb2mist'] if task in just_clip: predicted = np.exp(predicted * np.log(2.0**16.0)) - 1.0 predicted = np.log(predicted) / 11.09 predicted = (predicted - 0.64) / 0.18 predicted = (predicted + 1.) / 2 predicted[:, 0, 0, :] = 0. predicted[:, 1, 0, :] = 1. if task == 'segmentsemantic_rb': label = np.argmax(predicted, axis=-1) COLORS = ('white', 'red', 'blue', 'yellow', 'magenta', 'green', 'indigo', 'darkorange', 'cyan', 'pink', 'yellowgreen', 'black', 'darkgreen', 'brown', 'gray', 'purple', 'darkviolet') rgb = (input_batch + 1.) / 2. preds = [ color.label2rgb(np.squeeze(x), np.squeeze(y), colors=COLORS, kind='overlay')[np.newaxis, :, :, :] for x, y in zip(label, rgb) ] predicted = np.vstack(preds) if task in ['class_1000', 'class_places']: for file_idx, predict_output in zip(data_idx, predicted): to_store_name = list_of_fname[file_idx].decode( 'utf-8').replace('video', task) to_store_name = os.path.join('/home/ubuntu', to_store_name) sorted_pred = np.argsort(predict_output)[::-1] top_5_pred = [synset[sorted_pred[i]] for i in range(5)] to_print_pred = "Top 5 prediction: \n {}\n {}\n {}\n {} \n {}".format( *top_5_pred) img = Image.new('RGBA', (400, 200), (255, 255, 255)) d = ImageDraw.Draw(img) fnt = ImageFont.truetype( '/usr/share/fonts/truetype/dejavu/DejaVuSerifCondensed.ttf', 25) d.text((20, 5), to_print_pred, fill=(255, 0, 0), font=fnt) img.save(to_store_name, 'PNG') elif task == 'vanishing_point_well_defined': counter = 0 for file_idx, predict_output in zip(data_idx, predicted): to_store_name = list_of_fname[file_idx].decode( 'utf-8').replace('video', task) to_store_name = os.path.join('/home/ubuntu', to_store_name) curr_vp.append( plot_vanishing_point_smoothed( predict_output, (input_batch[counter] + 1.) / 2., to_store_name, curr_vp)) if len(curr_vp) > 5: curr_vp.pop(0) counter += 1 #scipy.misc.toimage(result, cmin=0.0, cmax=1.0).save(to_store_name) elif task == 'room_layout': mean = np.array([ 0.006072743318127848, 0.010272365569691076, -3.135909774145468, 1.5603802322235532, 5.6228218371102496e-05, -1.5669352793761442, 5.622875878174759, 4.082800262277375, 2.7713941642895956 ]) std = np.array([ 0.8669452525283652, 0.687915294956501, 2.080513632043758, 0.19627420479282623, 0.014680602791251812, 0.4183827359302299, 3.991778013006544, 2.703495278378409, 1.2269185938626304 ]) predicted = predicted * std + mean counter = 0 for file_idx, predict_output in zip(data_idx, predicted): to_store_name = list_of_fname[file_idx].decode( 'utf-8').replace('video', task) to_store_name = os.path.join('/home/ubuntu', to_store_name) plot_room_layout(predict_output, (input_batch[counter] + 1.) / 2., to_store_name, curr_layout, cube_only=True) curr_layout.append(predict_output) if len(curr_layout) > 5: curr_layout.pop(0) #scipy.misc.toimage(result, cmin=0.0, cmax=1.0).save(to_store_name) counter += 1 elif task == 'segmentsemantic_rb': for file_idx, predict_output in zip(data_idx, predicted): to_store_name = list_of_fname[file_idx].decode( 'utf-8').replace('video', task) to_store_name = os.path.join('/home/ubuntu', to_store_name) process_semseg_frame(predict_output, to_store_name) elif task == 'jigsaw': predicted = np.argmax(predicted, axis=1) counter = 0 for file_idx, predict_output in zip(data_idx, predicted): to_store_name = list_of_fname[file_idx].decode( 'utf-8').replace('video', task) to_store_name = os.path.join('/home/ubuntu', to_store_name) perm = cfg['target_dict'][predict_output] show_jigsaw((input_batch[counter] + 1.) / 2., perm, to_store_name) counter += 1 else: for file_idx, predict_output in zip(data_idx, predicted): to_store_name = list_of_fname[file_idx].decode( 'utf-8').replace('video', task) to_store_name = os.path.join('/home/ubuntu', to_store_name) scipy.misc.toimage(np.squeeze(predict_output), cmin=0.0, cmax=1.0).save(to_store_name) # subprocess.call('tar -czvf /home/ubuntu/{c}_{vid_id}.tar.gz /home/ubuntu/{t}/vid{vid_id}'.format( # c=config_name, t=task, vid_id=args.vid), shell=True) # subprocess.call('ffmpeg -r 29.97 -f image2 -s 256x256 -i /home/ubuntu/{t}/vid{vid_id}/0{vid_id}0%04d.png -vcodec libx264 -crf 15 {c}_{vid_id}.mp4'.format( # c=config_name, t=task, vid_id=args.vid), shell=True) subprocess.call( 'ffmpeg -r 29.97 -f image2 -s 256x256 -i /home/ubuntu/{t}/vid{vid_id}/0{vid_id}0%04d.png -ss 00:01:54 -t 00:00:40 -c:v libvpx-vp9 -crf 10 -b:v 128k {c}_{vid_id}.webm' .format(c=config_name, t=task, vid_id=args.vid), shell=True) # subprocess.call('ffmpeg -r 29.97 -f image2 -s 256x256 -i /home/ubuntu/{t}/vid{vid_id}/0{vid_id}0%04d.png -vcodec libx264 -crf 15 -pix_fmt yuv420p {c}_{vid_id}.mp4'.format( # c=config_name, t=task, vid_id=args.vid), shell=True) subprocess.call( 'sudo mkdir -p /home/ubuntu/s3/video_new/{t}'.format(t=task), shell=True) #subprocess.call('sudo mkdir -p /home/ubuntu/s3/video_new_all/{t}'.format(t=task), shell=True) # subprocess.call('aws s3 cp /home/ubuntu/{c}_{vid_id}.tar.gz s3://task-preprocessing-512-oregon/video_new_all/{t}/'.format( # c=config_name, t=task, vid_id=args.vid), shell=True) subprocess.call( 'aws s3 cp {c}_{vid_id}.webm s3://task-preprocessing-512-oregon/video_new/{t}/' .format(c=config_name, t=task, vid_id=args.vid), shell=True) # subprocess.call('aws s3 cp /home/ubuntu/{c}_{vid_id}.tar.gz s3://taskonomy-unpacked-oregon/video_tar_all/{t}/'.format( # c=config_name, t=task, vid_id=args.vid), shell=True) # subprocess.call('aws s3 cp {c}_{vid_id}.mp4 s3://taskonomy-unpacked-oregon/video_all/{t}/'.format( # c=config_name, t=task, vid_id=args.vid), shell=True) ############## Clean Up ############## training_runners['coord'].request_stop() training_runners['coord'].join() print("Done: {}".format(config_name)) ############## Reset graph and paths ############## tf.reset_default_graph() training_runners['sess'].close() return
def makePCA(fn, validExcept, rows, ftype, fcols, n_cols, isTrain, target, exceptCols, comp, exva, mean, exceptTargetForPCA, useLog, logConstant): print('') print('+=======================+') print('| Function : makePCA |') print('+=======================+') # get dataFrame (dataSetDF, targetCol) = _MDF.makeDataFrame(fn, validExcept, rows, ftype, fcols, isTrain, target, exceptCols, useLog, logConstant) DFtoFindPCA = dataSetDF # dataFrame to find PCA # remove target column when exceptTargetForPCA is True if exceptTargetForPCA == True: newDataSetDF = dataSetDF.drop([target], axis='columns') # print newDataSetDF print('\n<<< [8] newDataSetDF.columns >>>') print(newDataSetDF.columns) print('\n<<< [9] newDataSetDF >>>') print(newDataSetDF) DFtoFindPCA = newDataSetDF # display correlation # https://seaborn.pydata.org/generated/seaborn.clustermap.html df = DFtoFindPCA.corr() # get correlation seab.clustermap(df, annot=True, cmap='RdYlBu_r', vmin=-1, vmax=1) plt.show() # to standard normal distribution scaled = StandardScaler().fit_transform(DFtoFindPCA) # PCA # https://medium.com/@john_analyst/pca-%EC%B0%A8%EC%9B%90-%EC%B6%95%EC%86%8C-%EB%9E%80-3339aed5afa1 initializePCA = False # initializing PCA? if str(comp) == 'None' or str(exva) == 'None' or str( mean) == 'None': # create PCA if does not exist pca = PCA(n_components=n_cols) pca.fit(scaled) # get components and explained variances of PCA comp = pca.components_ exva = pca.explained_variance_ mean = pca.mean_ initializePCA = True # https://machinelearningmastery.com/calculate-principal-component-analysis-scratch-python/ # print pca.components_ and pca.explained_variance_ print('\n<<< [10] pca.components_ >>>\n' + str(comp)) print('\n<<< [11] pca.explained_variance_ >>>\n' + str(exva)) print('\n<<< [12] pca.mean_ >>>\n' + str(mean)) # create PCA using comp and exva if initializePCA == False: pca = PCA(n_components=n_cols) pca.components_ = comp pca.explained_variance_ = exva pca.mean_ = mean # apply PCA to the data scaledPCA = pca.transform(scaled) print('\n<<< [13] scaledPCA.shape >>>\n' + str(scaledPCA.shape)) print('\n<<< [14] scaledPCA.data.shape >>>\n' + str(scaledPCA.data.shape)) print('\n<<< [15] scaledPCA >>>') print(scaledPCA) # for training data # (ID : ~original train data) -> (ID : ~except for validation data) if isTrain == True: print('\n<<< [15-1] dataSetDF[target] before >>>') print(dataSetDF[target]) # dataFrame -> list -> dataFrame targetList = list(dataSetDF[target]) targetListCopy = [] for i in range(len(targetList)): targetListCopy.append(targetList[i]) targetDF = pd.DataFrame(targetListCopy) print('\n<<< [15-2] dataSetDF[target] after : targetDF >>>') print(targetDF) # name each column for PCA transformed data pca_cols = [] for i in range(n_cols): pca_cols.append('pca' + str(i)) df_pca = pd.DataFrame(scaledPCA, columns=pca_cols) if isTrain == True: df_pca['target'] = targetDF print('\n<<< [16] df_pca >>>') print(df_pca) df_pcaCorr = df_pca.corr() seab.clustermap(df_pcaCorr, annot=True, cmap='RdYlBu_r', vmin=-1, vmax=1) plt.show() # immediately return the pca if testing if isTrain == False: print('') print('+=======================+') print('| Exit : makePCA |') print('+=======================+') return (df_pca, comp, exva, mean, targetCol) # print data as 2d or 3d space (run only on training data) _PD.printDataAsSpace(n_cols, df_pca, '(PCA) training data') print('') print('+=======================+') print('| Exit : makePCA |') print('+=======================+') return (df_pca, comp, exva, mean, targetCol)
def main(filename, xtrains_percent=0.8, maxfeature=3, fit_ylabel=False, nn_estimator=100, sepaLabel=True, treeLabel=False, seed=42, pcaLabel=False, n_comp=2, sepa2=False, time_label=False, stream=False, sfl=False): inf = float("inf") all_start = time.time() rng = np.random.RandomState(seed) # httpとsmtpのみ別の方法でデータ取得 if filename == '/home/anegawa/Dropbox/http.mat' or filename == '/home/anegawa/Dropbox/smtp.mat': mat = {} f = h5py.File(filename) for k, v in f.items(): mat[k] = np.array(v) X = mat['X'].T y2 = mat['y'][0] y3 = [] for i in range(len(y2)): y3.append(int(y2[i])) y = np.reshape(y3, [len(y3), 1]) else: mat = scipy.io.loadmat(filename) X = mat['X'] y = mat['y'] rate = xtrains_percent max_feat = int(maxfeature) if max_feat == 3: max_feat = X.shape[1] if not treeLabel: print('X_train\'s rate : ' + str(rate)) print('max_features : ' + str(max_feat)) print('fit_ylabel : ' + str(fit_ylabel)) print('nn_estimator : ' + str(nn_estimator)) print('sepaLabel : ' + str(sepaLabel)) clf = IsolationForest(random_state=rng) clf.n_estimators = nn_estimator clf.verbose = 0 clf.max_features = max_feat if (str(filename) == '/home/anegawa/Dropbox/shuttle.mat'): clf.contamination = 0.07 elif (str(filename) == '/home/anegawa/Dropbox/http.mat'): clf.contamination = 0.004 elif (str(filename) == '/home/anegawa/Dropbox/pima.mat'): clf.contamination = 0.35 elif (str(filename) == '/home/anegawa/Dropbox/mammography.mat'): clf.contamination = 0.02 elif (str(filename) == '/home/anegawa/Dropbox/cover.mat'): clf.contamination = 0.009 elif (str(filename) == '/home/anegawa/Dropbox/breastw.mat'): clf.contamination = 0.35 elif (str(filename) == '/home/anegawa/Dropbox/arrhythmia.mat'): clf.contamination = 0.15 elif (str(filename) == '/home/anegawa/Dropbox/ionosphere.mat'): clf.contamination = 0.36 elif (str(filename) == '/home/anegawa/Dropbox/satellite.mat'): clf.contamination = 0.32 elif (str(filename) == '/home/anegawa/Dropbox/annthyroid.mat'): clf.contamination = 0.07 elif (str(filename) == '/home/anegawa/Dropbox/smtp.mat'): clf.contamination = 0.03 / 100 else: raise Exception("error! cannot file it.") # 交差検証を何回行うか(例:8:2なら5回) # もっとうまい方法ありそう hoge = 1 / (1 - rate) cross_count = int(np.ceil(hoge)) if cross_count > hoge: cross_count = cross_count - 1 # cross_count分のauc,acc合計 sum_auc = 0 sum_accuracy = 0 pca_fit_time = 0 pca_transform_train_time = 0 pca_transform_test_time = 0 test_time = 0 fit_time = 0 if sepaLabel == True: # separated # data cut X_anomaly = [] X_normal = [] for i in range(len(X)): if y[i] == 1: X_anomaly.append(X[i]) else: X_normal.append(X[i]) cutter_anomaly = int(np.ceil(len(X_anomaly) * rate)) cutter_normal = int(np.ceil(len(X_normal) * rate)) for count in range(cross_count): if sepaLabel: part_anomaly = int(np.ceil(cutter_anomaly * count)) part_normal = int(np.ceil(cutter_normal * count)) X_train = [] X_train_correct = [] X_test = [] X_test_correct = [] for i, k in zip(range(len(X_anomaly)), range(part_anomaly, part_anomaly + len(X_anomaly))): while k >= len(X_anomaly): k = k - len(X_anomaly) if i < cutter_anomaly: X_train.append(X_anomaly[k]) X_train_correct.append(-1) else: X_test.append(X_anomaly[k]) X_test_correct.append(-1) for i, k in zip(range(len(X_normal)), range(part_normal, part_normal + len(X_normal))): while k >= len(X_normal): k = k - len(X_normal) if i < cutter_normal: X_train.append(X_normal[k]) X_train_correct.append(1) else: X_test.append(X_normal[k]) X_test_correct.append(1) # シャッフルするかどうか if sfl: X_train_set = [] X_test_set = [] for i in range(len(X_train)): buf = [] buf.append(X_train[i]) buf.append(X_train_correct[i]) X_train_set.append(buf) for i in range(len(X_test)): buf = [] buf.append(X_test[i]) buf.append(X_test_correct[i]) X_test_set.append(buf) random.shuffle(X_train_set) random.shuffle(X_test_set) X_train = [] X_test = [] X_train_correct = [] X_test_correct = [] for i in range(len(X_train_set)): X_train.append(X_train_set[i][0]) X_train_correct.append(X_train_set[i][1]) for i in range(len(X_test_set)): X_test.append(X_test_set[i][0]) X_test_correct.append(X_test_set[i][1]) else: # mixed cutter = len(X) * rate part = int(np.ceil(cutter * count)) X_train = [] X_train_correct = [] X_test = [] X_test_correct = [] for i, k in zip(range(len(X)), range(part, part + len(X))): while k >= len(X): k = k - len(X) if i < len(X) * rate: X_train.append(X[k]) X_train_correct.append(y[k]) else: X_test.append(X[k]) X_test_correct.append(y[k]) for q in range(len(X_train_correct)): j = X_train_correct[q] if (j == 1): X_train_correct[q] = -1 else: X_train_correct[q] = 1 for w in range(len(X_test_correct)): j = X_test_correct[w] if (j == 1): X_test_correct[w] = -1 else: X_test_correct[w] = 1 # owari # finished cutting data if pcaLabel: if sepa2: # if False: pca2 = PCA(copy=True, iterated_power='auto', random_state=None, svd_solver='auto', tol=0.0, whiten=False) pca2.fit(X_train_normal) component = pca2.components_ component2 = np.sort(pca2.components_) if n_comp < len(component2): pca2.components_ = component2[0:n_comp] X_train = pca2.transform(X_train) X_test = pca2.transform(X_test) else: pca_fit_start = time.time() pca = PCA(copy=True, iterated_power='auto', n_components=n_comp, random_state=None, svd_solver='auto', tol=0.0, whiten=False) pca.fit(X_train) pca_fit_finish = time.time() pca_transform_train_start = time.time() X_train = pca.transform(X_train) pca_transform_train_finish = time.time() clf.max_features = n_comp pca_fit_time += (pca_fit_finish - pca_fit_start) pca_transform_train_time += (pca_transform_train_finish - pca_transform_train_start) fit_start = time.time() # fit_ylabelはFalseで固定 if fit_ylabel: clf.fit(X_train, X_train_correct, sample_weight=None) else: clf.fit(X_train, y=None, sample_weight=None) fit_finish = time.time() fit_time += (fit_finish - fit_start) if stream: sum_score_auc = [] sum_score_acc = [] for i in range(len(X_test)): if pcaLabel: pca_transform_test_start = time.time() a = [X_test[i]] X_test_pca = pca.transform(a) pca_transform_test_finish = time.time() pca_transform_test_time += (pca_transform_test_finish - pca_transform_test_start) else: X_test_pca = [X_test[i]] test_start = time.time() y_pred_test, a_score = clf.predict(X_test_pca) test_finish = time.time() test_time += (test_finish - test_start) sum_score_auc.append(a_score) sum_score_acc.append(y_pred_test) a_score = sum_score_auc y_pred_test = sum_score_acc else: # batch if pcaLabel: pca_transform_test_start = time.time() X_test = pca.transform(X_test) # stream version pca_transform_test_finish = time.time() pca_transform_test_time += (pca_transform_test_finish - pca_transform_test_start) test_start = time.time() y_pred_test, a_score = clf.predict(X_test) # a_score = clf.decision_function(X_test) test_finish = time.time() test_time += (test_finish - test_start) acc = calc_accuracy(X_test_correct, y_pred_test, treeLabel) AUC = calc_AUC(X_test_correct, a_score, treeLabel) sum_auc += AUC sum_accuracy += acc # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # plot the line, the samples, and the nearest vectors to the plane # # X_train = np.array(X_train) # X_test = np.array(X_test) # # lim = True # x = (-200, 200) # y = (-200, 300) # # for i,j in zip(range(2), [True, False]): # small = j # trueがsmallestね # # plt.subplot(2, 2, i+1) # if small: # plt.title("smallest") # else: # plt.title("largest") # # if small: # # b1 = plt.scatter(X_train[:, X_train.shape[1]-1], X_train[:, X_train.shape[1]-2], c='white', s=20, edgecolor='k') # b2 = plt.scatter(X_test[:, X_test.shape[1]-1], X_test[:, X_test.shape[1]-2], c='green', s=20, edgecolor='k') # else: # # b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=20, edgecolor='k') # b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green', s=20, edgecolor='k') # # c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red', s=20, edgecolor='k') # plt.axis('tight') # if lim: # plt.xlim(x) # plt.ylim(y) # # plt.legend([b1, b2], # # ["training observations", # # "testing observations"], # # loc="upper left") # plt.legend([b2],["testing observations"], # loc="upper left") # # plt.legend([b1], ["training observations"], # # loc="upper left") # # # # plt.subplot(2, 2, i+3) # if small: # b1 = plt.scatter(X_train[:, X_train.shape[1]-1], X_train[:, X_train.shape[1]-2], c='white', s=20, edgecolor='k') # # b2 = plt.scatter(X_test[:, X_test.shape[1] - 1], X_test[:, X_test.shape[1] - 2], c='green', s=20, edgecolor='k') # else: # b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=20, edgecolor='k') # # b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green', s=20, edgecolor='k') # # c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red', s=20, edgecolor='k') # plt.axis('tight') # if lim: # plt.xlim(x) # plt.ylim(y) # # plt.legend([b1, b2], # # ["training observations", # # "testing observations"], # # loc="upper left") # # plt.legend([b2], ["testing observations"], # # loc="upper left") # plt.legend([b1], ["training observations"], # loc="upper left") # plt.show()ter(X_train[:, X_train.shape[1]-1], X_train[:, X_train.shape[1]-2], c='white', s=20, edgecolor='k') # b2 = plt.scatter(X_test[:, X_test.shape[1]-1], X_test[:, X_test.shape[1]-2], c='green', s=20, edgecolor='k') # else: # # b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=20, edgecolor='k') # b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green', s=20, edgecolor='k') # # c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red', s=20, edgecolor='k') # plt.axis('tight') # if lim: # plt.xlim(x) # plt.ylim(y) # # plt.legend([b1, b2], # # ["training observations", # # "testing observations"], # # loc="upper left") # plt.legend([b2],["testing observations"], # loc="upper left") # # plt.legend([b1], ["training observations"], # # loc="upper left") # # # # plt.subplot(2, 2, i+3) # if small: # b1 = plt.scatter(X_train[:, X_train.shape[1]-1], X_train[:, X_train.shape[1]-2], c='white', s=20, edgecolor='k') # # b2 = plt.scatter(X_test[:, X_test.shape[1] - 1], X_test[:, X_test.shape[1] - 2], c='green', s=20, edgecolor='k') # else: # b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=20, edgecolor='k') # # b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green', s=20, edgecolor='k') # # c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red', s=20, edgecolor='k') # plt.axis('tight') # if lim: # plt.xlim(x) # plt.ylim(y) # # plt.legend([b1, b2], # # ["training observations", # # "testing observations"], # # loc="upper left") # # plt.legend([b2], ["testing observations"], # # loc="upper left") # plt.legend([b1], ["training observations"], # loc="upper left") # plt.show() # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ auc2 = sum_auc / cross_count acc2 = sum_accuracy / cross_count # calc time all_finish = time.time() all_time = all_finish - all_start pca_fit_time = pca_fit_time / cross_count pca_transform_train_time = pca_transform_train_time / cross_count pca_transform_test_time = pca_transform_test_time / cross_count test_time = test_time / cross_count fit_time = fit_time / cross_count sum_train_time = fit_time + pca_fit_time + pca_transform_train_time sum_test_time = pca_transform_test_time + test_time # print("sum_train_time : " + str(sum_train_time)) # print("pca_transform_train_time : " + str(pca_transform_train_time)) # print("pca_fit_time : " + str(pca_fit_time)) # print("test_time : " + str(test_time)) # print("fit_time : " + str(fit_time)) # print("all_time : " + str(all_time)) if time_label: return all_time, pca_fit_time + pca_transform_train_time, fit_time, pca_transform_test_time, test_time, sum_train_time, sum_test_time elif treeLabel: if math.isnan(auc2): raise Exception("error! auc is NaN!.") return auc2 else: return auc2, acc2
train_data_downsampled = train_data_normalized[:, :, :: decim_factor] test_data_downsampled = test_data_normalized[:, :, ::decim_factor] train_x = train_data_downsampled.reshape( train_data_downsampled.shape[0], -1) # put the last dimension into the preceding one test_x = test_data_downsampled.reshape( test_data_downsampled.shape[0], -1) # put the last dimension into the preceding one # next: apply PCA if True: pca = PCA(0.95) pca.fit(train_x) pca.components_ = -pca.components_ # train_x = pca.transform(train_x) test_x = pca.transform(test_x) # oversampling the least present sample if False: idx_offset = balance_idx(train_label) oversampled_train_label = np.append(train_label, train_label[idx_offset]) oversampled_train_x = np.concatenate( (train_x, train_x[idx_offset]), 0) train_label = oversampled_train_label train_x = oversampled_train_x cls.fit(train_x, np.unique(train_label)) # cls.fit( oversampled_train_x, oversampled_train_label )
def run(self) -> None: """ Run method of the module. The position and contrast of a planet is measured by injecting negative copies of the PSF template and applying a simplex method (Nelder-Mead) for minimization of a figure of merit at the planet location. Returns ------- NoneType None """ for item in self.m_res_out_port: item.del_all_data() item.del_all_attributes() for item in self.m_flux_pos_port: item.del_all_data() item.del_all_attributes() parang = self.m_image_in_port.get_attribute('PARANG') pixscale = self.m_image_in_port.get_attribute('PIXSCALE') aperture = (self.m_position[1], self.m_position[0], self.m_aperture/pixscale) self.m_sigma /= pixscale if self.m_cent_size is not None: self.m_cent_size /= pixscale if self.m_edge_size is not None: self.m_edge_size /= pixscale psf = self.m_psf_in_port.get_all() images = self.m_image_in_port.get_all() if psf.shape[0] != 1 and psf.shape[0] != images.shape[0]: raise ValueError('The number of frames in psf_in_tag does not match with the number ' 'of frames in image_in_tag. The DerotateAndStackModule can be ' 'used to average the PSF frames (without derotating) before applying ' 'the SimplexMinimizationModule.') center = center_subpixel(psf) if self.m_reference_in_port is not None and self.m_merit != 'poisson': raise NotImplementedError('The reference_in_tag can only be used in combination with ' 'the \'poisson\' figure of merit.') def _objective(arg, count, n_components, sklearn_pca): pos_y = arg[0] pos_x = arg[1] mag = arg[2] sep_ang = cartesian_to_polar(center, pos_y, pos_x) fake = fake_planet(images=images, psf=psf, parang=parang, position=(sep_ang[0], sep_ang[1]), magnitude=mag, psf_scaling=self.m_psf_scaling) mask = create_mask(fake.shape[-2:], (self.m_cent_size, self.m_edge_size)) if self.m_reference_in_port is None: im_res_rot, im_res_derot = pca_psf_subtraction(images=fake*mask, angles=-1.*parang+self.m_extra_rot, pca_number=n_components, pca_sklearn=sklearn_pca, im_shape=None, indices=None) else: im_reshape = np.reshape(fake*mask, (im_shape[0], im_shape[1]*im_shape[2])) im_res_rot, im_res_derot = pca_psf_subtraction(images=im_reshape, angles=-1.*parang+self.m_extra_rot, pca_number=n_components, pca_sklearn=sklearn_pca, im_shape=im_shape, indices=None) res_stack = combine_residuals(method=self.m_residuals, res_rot=im_res_derot, residuals=im_res_rot, angles=parang) self.m_res_out_port[count].append(res_stack, data_dim=3) chi_square = merit_function(residuals=res_stack[0, ], merit=self.m_merit, aperture=aperture, sigma=self.m_sigma) position = rotate_coordinates(center, (pos_y, pos_x), -self.m_extra_rot) res = np.asarray([position[1], position[0], sep_ang[0]*pixscale, (sep_ang[1]-self.m_extra_rot) % 360., mag, chi_square]) self.m_flux_pos_port[count].append(res, data_dim=2) sys.stdout.write('\rSimplex minimization... ') sys.stdout.write(f'{n_components} PC - chi^2 = {chi_square:.8E}') sys.stdout.flush() return chi_square pos_init = rotate_coordinates(center, (self.m_position[1], self.m_position[0]), # (y, x) self.m_extra_rot) for i, n_components in enumerate(self.m_pca_number): sys.stdout.write(f'\rSimplex minimization... {n_components} PC ') sys.stdout.flush() if self.m_reference_in_port is None: sklearn_pca = None else: ref_data = self.m_reference_in_port.get_all() im_shape = images.shape ref_shape = ref_data.shape if ref_shape[1:] != im_shape[1:]: raise ValueError('The image size of the science data and the reference data ' 'should be identical.') # reshape reference data and select the unmasked pixels ref_reshape = ref_data.reshape(ref_shape[0], ref_shape[1]*ref_shape[2]) mean_ref = np.mean(ref_reshape, axis=0) ref_reshape -= mean_ref # create the PCA basis sklearn_pca = PCA(n_components=n_components, svd_solver='arpack') sklearn_pca.fit(ref_reshape) # add mean of reference array as 1st PC and orthogonalize it to the PCA basis mean_ref_reshape = mean_ref.reshape((1, mean_ref.shape[0])) q_ortho, _ = np.linalg.qr(np.vstack((mean_ref_reshape, sklearn_pca.components_[:-1, ])).T) sklearn_pca.components_ = q_ortho.T minimize(fun=_objective, x0=[pos_init[0], pos_init[1], self.m_magnitude], args=(i, n_components, sklearn_pca), method='Nelder-Mead', tol=None, options={'xatol': self.m_tolerance, 'fatol': float('inf')}) sys.stdout.write(' [DONE]\n') sys.stdout.flush() history = f'merit = {self.m_merit}' for item in self.m_flux_pos_port: item.copy_attributes(self.m_image_in_port) item.add_history('SimplexMinimizationModule', history) for item in self.m_res_out_port: item.copy_attributes(self.m_image_in_port) item.add_history('SimplexMinimizationModule', history) self.m_res_out_port[0].close_port()
def run_to_task(task_to): import general_utils from general_utils import RuntimeDeterminedEnviromentVars import models.architectures as architectures from data.load_ops import resize_rescale_image import utils from data.task_data_loading import load_and_specify_preprocessors_for_representation_extraction import lib.data.load_ops as load_ops import pdb global synset synset_1000 = [" ".join(i.split(" ")[1:]) for i in synset] select = np.asarray([ 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0.]) with open('/home/ubuntu/task-taxonomy-331b/lib/data/places_class_names.txt', 'r') as fp: synset_places = [x.rstrip()[4:-1] for x,y in zip(fp.readlines(), select) if y == 1.] tf.logging.set_verbosity(tf.logging.ERROR) args = parser.parse_args() if args.task is not 'NONE': args.idx = list_of_tasks.index(args.task) for idx, task in enumerate(list_of_tasks): if idx != args.idx and args.idx != -1: continue if task == 'class_places': synset = synset_places elif task == 'class_1000': synset = synset_1000 print("Doing {task}".format(task=task)) general_utils = importlib.reload(general_utils) tf.reset_default_graph() training_runners = { 'sess': tf.InteractiveSession(), 'coord': tf.train.Coordinator() } # task = '{f}__{t}__{hs}'.format(f=task_from, t=task_to, hs=args.hs) CONFIG_DIR = '/home/ubuntu/task-taxonomy-331b/experiments/final/{TASK}'.format(TASK=task) ############## Load Configs ############## cfg = utils.load_config( CONFIG_DIR, nopause=True ) RuntimeDeterminedEnviromentVars.register_dict( cfg ) split_file = os.path.join('/home/ubuntu/task-taxonomy-331b/assets/aws_data/', 'video2_info.pkl') cfg['train_filenames'] = split_file cfg['val_filenames'] = split_file cfg['test_filenames'] = split_file cfg['num_epochs'] = 2 cfg['randomize'] = False root_dir = cfg['root_dir'] cfg['num_read_threads'] = 1 print(cfg['log_root']) cfg['model_path'] = os.path.join( cfg['log_root'], task, 'model.permanent-ckpt' ) print( cfg['model_path']) if cfg['model_path'] is None: continue cfg['dataset_dir'] = '/home/ubuntu' cfg['preprocess_fn'] = load_and_specify_preprocessors_for_representation_extraction ############## Set Up Inputs ############## # tf.logging.set_verbosity( tf.logging.INFO ) inputs = utils.setup_input( cfg, is_training=ON_TEST_SET, use_filename_queue=False ) # is_training determines whether to use train/validaiton RuntimeDeterminedEnviromentVars.load_dynamic_variables( inputs, cfg ) RuntimeDeterminedEnviromentVars.populate_registered_variables() start_time = time.time() # utils.print_start_info( cfg, inputs[ 'max_steps' ], is_training=False ) ############## Set Up Model ############## model = utils.setup_model( inputs, cfg, is_training=IN_TRAIN_MODE ) m = model[ 'model' ] model[ 'saver_op' ].restore( training_runners[ 'sess' ], cfg[ 'model_path' ] ) ############## Start dataloading workers ############## data_prefetch_init_fn = utils.get_data_prefetch_threads_init_fn( inputs, cfg, is_training=ON_TEST_SET, use_filename_queue=False ) prefetch_threads = threading.Thread( target=data_prefetch_init_fn, args=( training_runners[ 'sess' ], training_runners[ 'coord' ] )) prefetch_threads.start() list_of_fname = np.load('/home/ubuntu/task-taxonomy-331b/assets/aws_data/video2_fname.npy') import errno try: os.mkdir('/home/ubuntu/{}'.format(task)) os.mkdir('/home/ubuntu/{}/vid1'.format(task)) os.mkdir('/home/ubuntu/{}/vid2'.format(task)) os.mkdir('/home/ubuntu/{}/vid3'.format(task)) os.mkdir('/home/ubuntu/{}/vid4'.format(task)) except OSError as e: if e.errno != errno.EEXIST: raise curr_comp = np.zeros((3,64)) curr_fit_img = np.zeros((256,256,3)) embeddings = [] ############## Run First Batch ############## for step_num in range(inputs['max_steps'] - 1): #for step_num in range(1): #if step_num > 0 and step_num % 20 == 0: print(step_num) if not hasattr(m, 'masks'): ( input_batch, target_batch, data_idx, predicted, loss, ) = training_runners['sess'].run( [ m.input_images, m.targets, model[ 'data_idxs' ], m.decoder_output, m.total_loss] ) mask_batch = 1. else: ( input_batch, target_batch, mask_batch, data_idx, predicted, loss, ) = training_runners['sess'].run( [ m.input_images, m.targets, m.masks, model[ 'data_idxs' ], m.decoder_output, m.total_loss] ) if task == 'segment2d' or task == 'segment25d': from sklearn.decomposition import PCA x = np.zeros((32,256,256,3), dtype='float') k_embed = 8 # for i in range(predicted.shape[0]): # embedding_flattened = np.squeeze(predicted[i]).reshape((-1,64)) # pca = PCA(n_components=3) # pca.fit(embedding_flattened) # min_order = None # min_dist = float('inf') # for order in itertools.permutations([0,1,2]): # reordered = pca.components_[list(order), :] # dist = np.linalg.norm(curr_comp-reordered) # if dist < min_dist: # min_order = list(order) # min_dist = dist # print(min_order) # pca.components_ = pca.components_[min_order, :] # curr_comp = pca.components_ # lower_dim = pca.transform(embedding_flattened).reshape((256,256,-1)) # lower_dim = (lower_dim - lower_dim.min()) / (lower_dim.max() - lower_dim.min()) # x[i] = lower_dim for i in range(predicted.shape[0]): embedding_flattened = np.squeeze(predicted[i]).reshape((-1,64)) embeddings.append(embedding_flattened) if len(embeddings) > k_embed: embeddings.pop(0) pca = PCA(n_components=3) pca.fit(np.vstack(embeddings)) min_order = None min_dist = float('inf') copy_of_comp = np.copy(pca.components_) for order in itertools.permutations([0,1,2]): #reordered = pca.components_[list(order), :] #dist = np.linalg.norm(curr_comp-reordered) pca.components_ = copy_of_comp[order, :] lower_dim = pca.transform(embedding_flattened).reshape((256,256,-1)) lower_dim = (lower_dim - lower_dim.min()) / (lower_dim.max() - lower_dim.min()) dist = np.linalg.norm(lower_dim - curr_fit_img) if dist < min_dist: min_order = order min_dist = dist pca.components_ = copy_of_comp[min_order, :] lower_dim = pca.transform(embedding_flattened).reshape((256,256,-1)) lower_dim = (lower_dim - lower_dim.min()) / (lower_dim.max() - lower_dim.min()) curr_fit_img = np.copy(lower_dim) x[i] = lower_dim predicted = x if task == 'curvature': std = [31.922, 21.658] mean = [123.572, 120.1] predicted = (predicted * std) + mean predicted[:,0,0,:] = 0. predicted[:,1,0,:] = 1. predicted = np.squeeze(np.clip(predicted.astype(int) / 255., 0., 1. )[:,:,:,0]) just_rescale = ['autoencoder', 'denoise', 'edge2d', 'edge3d', 'keypoint2d', 'keypoint3d', 'reshade', 'rgb2sfnorm'] if task in just_rescale: predicted = (predicted + 1.) / 2. predicted = np.clip(predicted, 0., 1.) predicted[:,0,0,:] = 0. predicted[:,1,0,:] = 1. just_clip = ['rgb2depth', 'rgb2mist'] if task in just_clip: predicted[:,0,0,:] = 0. predicted[:,1,0,:] = 1. if task == 'segmentsemantic_rb': label = np.argmax(predicted, axis=-1) COLORS = ('white','red', 'blue', 'yellow', 'magenta', 'green', 'indigo', 'darkorange', 'cyan', 'pink', 'yellowgreen', 'black', 'darkgreen', 'brown', 'gray', 'purple', 'darkviolet') rgb = (input_batch + 1.) / 2. preds = [color.label2rgb(np.squeeze(x), np.squeeze(y), colors=COLORS, kind='overlay')[np.newaxis,:,:,:] for x,y in zip(label, rgb)] predicted = np.vstack(preds) if task in ['class_1000', 'class_places']: for file_idx, predict_output in zip(data_idx, predicted): to_store_name = list_of_fname[file_idx].decode('utf-8').replace('video', task) to_store_name = os.path.join('/home/ubuntu', to_store_name) sorted_pred = np.argsort(predict_output)[::-1] top_5_pred = [synset[sorted_pred[i]] for i in range(5)] to_print_pred = "Top 5 prediction: \n {}\n {}\n {}\n {} \n {}".format(*top_5_pred) img = Image.new('RGBA', (400, 200), (255, 255, 255)) d = ImageDraw.Draw(img) fnt = ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSerifCondensed.ttf', 25) d.text((20, 5), to_print_pred, fill=(255, 0, 0), font=fnt) img.save(to_store_name, 'PNG') else: for file_idx, predict_output in zip(data_idx, predicted): to_store_name = list_of_fname[file_idx].decode('utf-8').replace('video', task) to_store_name = os.path.join('/home/ubuntu', to_store_name) scipy.misc.toimage(np.squeeze(predict_output), cmin=0.0, cmax=1.0).save(to_store_name) subprocess.call('tar -czvf /home/ubuntu/{t}.tar.gz /home/ubuntu/{t}'.format(t=task), shell=True) subprocess.call('aws s3 cp /home/ubuntu/{t}.tar.gz s3://task-preprocessing-512-oregon/video2/'.format(t=task), shell=True) subprocess.call('ffmpeg -r 29.97 -f image2 -s 256x256 -i /home/ubuntu/{t}/vid2/020%04d.png -vcodec libx264 -crf 15 -pix_fmt yuv420p {t}_2.mp4'.format(t=task), shell=True) subprocess.call('aws s3 cp {t}_2.mp4 s3://task-preprocessing-512-oregon/video2/'.format(t=task), shell=True) ############## Clean Up ############## training_runners[ 'coord' ].request_stop() training_runners[ 'coord' ].join() # if os.path.isfile(pickle_dir): # with open(pickle_dir, 'rb') as fp: # all_outputs = pickle.load(fp) ############## Store to dict ############## print("Done: {}".format(task)) # os.system("sudo cp {d} /home/ubuntu/s3/model_log".format(d=pickle_dir)) ############## Reset graph and paths ############## tf.reset_default_graph() training_runners['sess'].close() return
def grid_search_approach(technique, n, clf, parameters, X, y, test_size, random_state, cv=7, iid=False, n_jobs=-1, sss_flag=False, type_classifier=None): '''Performs grid search technique, against a defined classifier or pipeline object and a dictionary of hyper-params. Params: ------- - n: number or list of numbers, so numbers of principal components to be retained, exploited, in order to improve the overall performances. - clf: scikit-learn Pipeline object, made up of all the operations to be performed in a given order. - cv: integer, default=7, number to refer to attempt performed by cross-validation technique to create cv models picking up their mean. - iid: boolean, default=False, shows whether input data should be treated as independent and identically distributed data samples. - n_jobs: integer, default=-1, allows, or enables to let the work station within which the training script is lauched to discover and eventually exploit a baunch of cpu for increasing the performance during training phase. ''' # === Splitting dataset into training and test sets, respectively, both features and labels === # if sss_flag is False: X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state) else: sss = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state) sss.get_n_splits(X, y) for train_index, test_index in sss.split(X, y): # print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] # sss = StratifiedShuffleSplit(n_splits=cv, test_size=test_size * 2, random_state=random_state) # cv = sss.split(X_train, y_train) # === Performing only once for all Principal Component === # n_components = X.shape[1] pca = PCA(n_components=n_components) pca = pca.fit(X_train) backup_pcs_ = copy.copy(pca.components_) # print(f"Shape principal componets: {backup_pcs_.shape}") print( f'==== GRID SEARCH METHOD APPLYED ON: {technique.split(",")[0]} Technique ====' ) print( f'==== PREPROCESSING METHOD: {technique.split(",")[1]} Technique ====') for pos, n_components in enumerate(n): print('\n', "*" * 20, sep='') print(f"Grid Search attempt no. : {pos+1}") tmp_cv = cv if True: # === Preparing Feature Space by means of retained Principal Components === # n = len(pca.components_[n_components:]) pca.components_[n_components:] = [[0] * X.shape[1]] * n X_train_pca = pca.transform(X_train) if sss_flag is True: sss = StratifiedShuffleSplit(n_splits=cv, test_size=test_size * 0.5, random_state=random_state) cv = sss.split(X_train, y_train) # === Performing training phase === # gs_clf = GridSearchCV(clf, parameters, cv=cv, iid=iid, n_jobs=n_jobs) gs_clf = gs_clf.fit(X_train_pca, y_train) # === Evaluating performances === # predicted = gs_clf.predict(pca.transform(X_test)) print("--- Classification Report ---") print( metrics.classification_report( y_test, predicted, target_names=['negative', 'positive'])) print("--- Confusion Matrix ---") print(metrics.confusion_matrix(y_test, predicted)) print(f"{np.mean(predicted == y_test)}") print(f"Best Score: {gs_clf.best_score_}") print("--- Best Params ---") print(f"n_components: {n_components}") for param_name in sorted(parameters.keys()): print("%s: %r" % (param_name, gs_clf.best_params_[param_name])) try: evaluate_best_current_model_(X, y, pca, gs_clf, test_size, random_state, type_classifier) # raise Exception('Ok') except Exception as err: print(err) # evaluate_best_current_model_(X, y, pca, gs_clf, test_size, random_state, type_classifier) # === Restoring overall pcs for further, subsequent evaluation === # pca.components_ = copy.copy(backup_pcs_) cv = tmp_cv # except Exception as err: else: pass #print(err) pass
# find correspondce between the skirt and the SMPL body import pickle import os.path as osp import numpy as np import cv2 from utils.rotation import get_Apose from global_var import ROOT from smpl_torch import SMPLNP, TorchSMPL4Garment from sklearn.decomposition import PCA from utils.renderer import Renderer # load template skirt style_model = np.load(osp.join(ROOT, 'skirt_female', 'style_model.npz')) pca = PCA(n_components=4) pca.components_ = style_model['pca_w'] pca.mean_ = style_model['mean'] skirt_v = pca.inverse_transform(np.zeros([1, 4])).reshape([-1, 3]) # move the skirt to the right position with open(osp.join(ROOT, 'garment_class_info.pkl'), 'rb') as f: garment_meta = pickle.load(f) skirt_f = garment_meta['skirt']['f'] vert_indices = garment_meta['pant']['vert_indices'] up_bnd_inds = np.load(osp.join(ROOT, 'skirt_upper_boundary.npy')) pant_up_bnd_inds = np.load(osp.join(ROOT, 'pant_upper_boundary.npy')) waist_body_inds = vert_indices[pant_up_bnd_inds] smpl = SMPLNP(gender='female') apose = get_Apose() body_v, _ = smpl(np.zeros([300]), apose, None, None) trans = np.mean(body_v[waist_body_inds], 0, keepdims=True) - np.mean(
def compute_features(signals, dataset_type, sfreq, l_freq, h_freq, decim_factor, shiftFactor, scaleFactor, pca, tmin, tmax, tlow, thigh, filter_method, verbose=False): ''' Compute the features fed into the classifier for training or testing purpose. It does filtering, cropping, DC removal, normalization, downsamplin, and finally PCA. Parameters ---------- signals : 3D numpy array Signal to be computed. Dimension is (trial x channel x sample) dataset_type: string Either 'train' or 'test' l_freq, h_freq: float Frequencies for bandpass filtering decim_factor: int decimation factor for downsampling (e.g. 4 -> takes on sample every 4) shiftFactor,scaleFactor: 2D array or None Normalization factor. Dimension is (channel x sample) pca: pca object tmin,tmax,tlow,thigh: int Timing parameter relative to onset ( -> t=0) tlow tmin tmax thigh ---0-------|-------|--------|---------|------- onset <-------> cue feature window <--------------------------> total window filter_method: string Either 'WINDOWED' or something else ('NC', 'LFILT') verbose : bool Verbosity level ''' if filter_method == 'WINDOWED': signals_bp = mne.filter.band_pass_filter(signals, sfreq, l_freq, h_freq, method='fft', copy=True, iir_params=None) if verbose: print('Compute Features: window based filtering') else: # == if FILTER_METHOD = 'NC' or FILTER_METHOD = 'LFILT' signals_bp = signals if verbose: print('Compute Features:No filtering') tlow_idx = int(sfreq * tlow) thigh_idx = int(sfreq * thigh) signals_bp = signals_bp[:, :, tlow_idx:thigh_idx] # Crop the padding area for bp paddingBefore_idx = int(round(sfreq * (tmin - tlow))) paddingAfter_idx = int(round(sfreq * (thigh - tmax))) tmin_idx = int(sfreq * tmin) tmax_idx = int(sfreq * tmax) duration_idxs = tmax_idx - tmin_idx # signals_bp= signals_bp[:,:,paddingIdx:(signals_bp.shape[2]-paddingIdx)] signals_bp = signals_bp[:, :, paddingBefore_idx:paddingBefore_idx + duration_idxs] if verbose: print('Compute Features: Crop the padding area for BP') # Remove DC offset due to filtering for trial in range(signals_bp.shape[0]): for ch in range(signals_bp.shape[1]): signals_bp[trial, ch, :] = signals_bp[trial, ch, :] - np.mean(signals_bp[trial, ch, :]) if verbose: print('Compute Features:Removed DC offset') # Normalization if dataset_type == 'train': (signals_normalized, trainShiftFactor, trainScaleFactor) = normalizeAcrossEpoch(signals_bp, 'MinMax') elif dataset_type == 'test': # TODO: make sure shift and scale factor are actually existing signals_normalized = (signals_bp - shiftFactor) / scaleFactor trainShiftFactor = shiftFactor trainScaleFactor = scaleFactor if verbose: print('Compute Features: Normalized according to given shift and scale factor') # Downsample signals_downsampling = signals_normalized[:, :, ::decim_factor] if verbose: print('Compute Features:Removed DC offset') # Merge channel and time dimension signals_reshaped = signals_downsampling.reshape(signals_downsampling.shape[0], -1) if dataset_type == 'train': pca = PCA(0.95) pca.fit(signals_reshaped) pca.components_ = -pca.components_ # inversion of vector to be constistant with Inaki's code signals_pcaed = pca.transform(signals_reshaped) elif dataset_type == 'test': # PCA switch if pca is not None: signals_pcaed = pca.transform(signals_reshaped) if verbose: print('Compute Features: PCA according to given PCA factor') else: signals_pcaed = signals_reshaped return (signals_pcaed, pca, trainShiftFactor, trainScaleFactor)