def test_additive_chi2_sampler(): # test that AdditiveChi2Sampler approximates kernel on random data # compute exact kernel # abbreviations for easier formula X_ = X[:, np.newaxis, :] Y_ = Y[np.newaxis, :, :] large_kernel = 2 * X_ * Y_ / (X_ + Y_) # reduce to n_samples_x x n_samples_y by summing over features kernel = (large_kernel.sum(axis=2)) # approximate kernel mapping transform = AdditiveChi2Sampler(sample_steps=3) X_trans = transform.fit_transform(X) Y_trans = transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) assert_array_almost_equal(kernel, kernel_approx, 1) X_sp_trans = transform.fit_transform(csr_matrix(X)) Y_sp_trans = transform.transform(csr_matrix(Y)) assert_array_equal(X_trans, X_sp_trans.A) assert_array_equal(Y_trans, Y_sp_trans.A) # test error is raised on negative input Y_neg = Y.copy() Y_neg[0, 0] = -1 assert_raises(ValueError, transform.transform, Y_neg) # test error on invalid sample_steps transform = AdditiveChi2Sampler(sample_steps=4) assert_raises(ValueError, transform.fit, X) # test that the sample interval is set correctly sample_steps_available = [1, 2, 3] for sample_steps in sample_steps_available: # test that the sample_interval is initialized correctly transform = AdditiveChi2Sampler(sample_steps=sample_steps) assert transform.sample_interval is None # test that the sample_interval is changed in the fit method transform.fit(X) assert transform.sample_interval_ is not None # test that the sample_interval is set correctly sample_interval = 0.3 transform = AdditiveChi2Sampler(sample_steps=4, sample_interval=sample_interval) assert transform.sample_interval == sample_interval transform.fit(X) assert transform.sample_interval_ == sample_interval
class _AdditiveChi2SamplerImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
def test_additive_chi2_sampler(): """test that AdditiveChi2Sampler approximates kernel on random data""" # compute exact kernel # appreviations for easier formular X_ = X[:, np.newaxis, :] Y_ = Y[np.newaxis, :, :] large_kernel = 2 * X_ * Y_ / (X_ + Y_) # reduce to n_samples_x x n_samples_y by summing over features kernel = (large_kernel.sum(axis=2)) # appoximate kernel mapping transform = AdditiveChi2Sampler(sample_steps=3) X_trans = transform.fit_transform(X) Y_trans = transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) assert_array_almost_equal(kernel, kernel_approx, 1) X_sp_trans = transform.fit_transform(csr_matrix(X)) Y_sp_trans = transform.transform(csr_matrix(Y)) assert_array_equal(X_trans, X_sp_trans.A) assert_array_equal(Y_trans, Y_sp_trans.A)
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def test_additivechi2sampler_get_feature_names_out(): """Check get_feature_names_out for AdditiveChi2Sampler.""" rng = np.random.RandomState(0) X = rng.random_sample(size=(300, 3)) chi2_sampler = AdditiveChi2Sampler(sample_steps=3).fit(X) input_names = ["f0", "f1", "f2"] suffixes = [ "f0_sqrt", "f1_sqrt", "f2_sqrt", "f0_cos1", "f1_cos1", "f2_cos1", "f0_sin1", "f1_sin1", "f2_sin1", "f0_cos2", "f1_cos2", "f2_cos2", "f0_sin2", "f1_sin2", "f2_sin2", ] names_out = chi2_sampler.get_feature_names_out(input_features=input_names) expected_names = [f"additivechi2sampler_{suffix}" for suffix in suffixes] assert_array_equal(names_out, expected_names)
class AdditiveChi2SamplerImpl(): def __init__(self, sample_steps=2, sample_interval=None): self._hyperparams = { 'sample_steps': sample_steps, 'sample_interval': sample_interval } self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
def test_additive_chi2_sampler_exceptions(): """Ensures correct error message""" transformer = AdditiveChi2Sampler() X_neg = X.copy() X_neg[0, 0] = -1 with pytest.raises(ValueError, match="X in AdditiveChi2Sampler.fit"): transformer.fit(X_neg) with pytest.raises(ValueError, match="X in AdditiveChi2Sampler.transform"): transformer.fit(X) transformer.transform(X_neg)
def test_input_validation(): # Regression test: kernel approx. transformers should work on lists # No assertions; the old versions would simply crash X = [[1, 2], [3, 4], [5, 6]] AdditiveChi2Sampler().fit(X).transform(X) SkewedChi2Sampler().fit(X).transform(X) RBFSampler().fit(X).transform(X) X = csr_matrix(X) RBFSampler().fit(X).transform(X)
def find_chord(model, file, code): fs, y = scipy.io.wavfile.read(file) y = bandpass_filter(y, 20, 7000, fs, order=5) X = mPCP(y, fs) sampler = AdditiveChi2Sampler() X = np.array([X]) if code == 1: X = sampler.fit_transform(X) pred = model.predict(X) return NtoC(pred[0])
def approx_kernel(kernel_structure,data_x,data_y): #print("Approx kernel") #pdb.set_trace() if kernel_structure.iloc[0].loc['kernel_type']=='RBF': #pdb.set_trace() rbf_feature = RBFSampler(gamma=1,n_components=10,random_state=1) X_features = rbf_feature.fit_transform(data_x) if kernel_structure.iloc[0].loc['kernel_type']=='ACHI2': chi2sampler = AdditiveChi2Sampler(sample_steps=10,sample_interval=1) X_features = chi2sampler.fit_transform(X, y) #todo implement the other methods return X_features
def transform_chi2(data): chi2 = AdditiveChi2Sampler(sample_steps=2) if isinstance(data.X[0], np.ndarray): X_new = [chi2.fit_transform(x).astype(np.float32) for x in data.X] elif len(data.X[0]) == 2: X_new = [(chi2.fit_transform(x[0]), x[1]) for x in data.X] elif len(data.X[0]) == 3: X_new = [(chi2.fit_transform(x[0]), x[1], x[2]) for x in data.X] else: raise ValueError("len(x) is weird: %d" % len(data.X[0])) return DataBunch(X_new, data.Y, data.file_names, data.superpixels)
def find_chord(model, file, code): fs, y = scipy.io.wavfile.read(file) y = bandpass_filter(y, 20, 7000, fs, order=5) X = mPCP(y, fs).reshape(1, -1) sampler = AdditiveChi2Sampler() if sum(X.ravel()) == 0: return '__' if code == 1: X = sampler.fit_transform(X) pred = model.predict(X) # print(pred) return NtoC(pred[0])
def approx_kernel(kernel_structure,data_x,data_y): print("A") pdb.set_trace() if kernel_structure.iloc[0].loc['kernel_type']=='RBF': pdb.set_trace() rbf_feature = RBFSampler(gamma=1, random_state=1) X_features = rbf_feature.fit_transform(data_x) if kernel_structure.iloc[0].loc['kernel_type']=='ACHI2': chi2sampler = AdditiveChi2Sampler(sample_steps=10,sample_interval=1) X_features = chi2sampler.fit_transform(X, y) print(X_features) return X_features
def test_additive_chi2_sampler(): """test that AdditiveChi2Sampler approximates kernel on random data""" # compute exact kernel # appreviations for easier formular X_ = X[:, np.newaxis, :] Y_ = Y[np.newaxis, :, :] large_kernel = 2 * X_ * Y_ / (X_ + Y_) # reduce to n_samples_x x n_samples_y by summing over features kernel = (large_kernel.sum(axis=2)) # approximate kernel mapping transform = AdditiveChi2Sampler(sample_steps=3) X_trans = transform.fit_transform(X) Y_trans = transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) assert_array_almost_equal(kernel, kernel_approx, 1) X_sp_trans = transform.fit_transform(csr_matrix(X)) Y_sp_trans = transform.transform(csr_matrix(Y)) assert_array_equal(X_trans, X_sp_trans.A) assert_array_equal(Y_trans, Y_sp_trans.A) # test error is raised on negative input Y_neg = Y.copy() Y_neg[0, 0] = -1 assert_raises(ValueError, transform.transform, Y_neg) # test error on invalid sample_steps transform = AdditiveChi2Sampler(sample_steps=4) assert_raises(ValueError, transform.fit, X)
def generate_data_transformers(self): # Data Transformation (Scaling, Normalization) if self.data_transform: if self.data_transform == 'EXP': transformer = '' transformer.name = '' elif data_transform == 'NORM': pass transformer.params = utils.get_params_string( self.data_transform_params) self.transformer = transformer # Feature Selection (Var, Chi^2) if self.feature_selection: if self.feature_selection == 'VAR': selector = VarianceThreshold(**self.feature_selection_params) selector.name = 'VarianceThreshold' elif self.feature_selection == 'CHI2': pass selector.params = utils.get_params_string( self.feature_selection_params) self.selector = selector # Kernel Approximation (RBF, Chi^2) if self.approximation_kernel: if self.approximation_kernel == 'RBF': approx_kernel_map = RBFSampler( **self.kernel_approximation_params) approx_kernel_map.name = 'RBFSampler' elif self.approximation_kernel == 'CHI2': approx_kernel_map = AdditiveChi2Sampler( **self.kernel_approximation_params) approx_kernel_map.name = 'AdditiveChi2Sampler' approx_kernel_map.params = utils.get_params_string( self.kernelapproximation_params) self.approx_kernel_map = approx_kernel_map
def a_chi(df, drop=None, lags=1, sample_steps=2): if drop: keep = df[drop] df = df.drop([drop], axis=1) df_2 = df.shift(lags) df = df.iloc[lags:, :] df_2 = df_2.dropna().reset_index(drop=True) chi2sampler = AdditiveChi2Sampler(sample_steps=sample_steps) df_2 = chi2sampler.fit_transform(df_2, df["Close"]) df_2 = pd.DataFrame(df_2, index=df.index) df_2 = df.add_prefix('achi_') if drop: df = pd.concat([keep, df, df_2], axis=1) else: df = pd.concat([df, df_2], axis=1) return df
def train_svm(C=0.1, grid=False): pascal = PascalSegmentation() files_train = pascal.get_split("kTrain") superpixels = [ slic_n(pascal.get_image(f), n_superpixels=100, compactness=10) for f in files_train ] bow = SiftBOW(pascal, n_words=1000, color_sift=True) data_train = bow.fit_transform(files_train, superpixels) data_train = add_global_descriptor(data_train) svm = LinearSVC(C=C, dual=False, class_weight='auto') chi2 = AdditiveChi2Sampler() X, y = np.vstack(data_train.X), np.hstack(data_train.Y) X = chi2.fit_transform(X) svm.fit(X, y) print(svm.score(X, y)) eval_on_sp(pascal, data_train, [svm.predict(chi2.transform(x)) for x in data_train.X], print_results=True) files_val = pascal.get_split("kVal") superpixels_val = [ slic_n(pascal.get_image(f), n_superpixels=100, compactness=10) for f in files_val ] data_val = bow.transform(files_val, superpixels_val) data_val = add_global_descriptor(data_val) eval_on_sp(pascal, data_val, [svm.predict(chi2.transform(x)) for x in data_val.X], print_results=True) tracer()
def chi_squared_projection(features): chi2_feature = AdditiveChi2Sampler() X_transformed = chi2_feature.fit_transform(features) X_transformed = X_transformed.tocsr() return X_transformed
while i < 12: X[:, i] = data_set[str(i)] i += 1 # Manually creating label values according to data per chord # It is assumed that the chords are listed in the order # A, Am, Bm, C, D, Dm, E, Em, F, G in the dataset y = np.zeros((X.shape)[0]) counter = 0 value = 1 data_per_chord = 200 for i in range(0, (X.shape)[0]): if counter == data_per_chord: value += 1 counter = 0 y[i] = value counter += 1 sampler = AdditiveChi2Sampler() # Comment the above sampler and uncomment the lower one to change kernels #sampler = RBFSampler(gamma=1, random_state=1) X = sampler.fit_transform(X) model.fit(X, y) filename = 'trained_ML_model_ver3.sav' # Fit and save the model with filename pickle.dump(model, open(filename, 'wb')) # Load back the model to test for training accuracy myModel = pickle.load(open('trained_ML_model_ver3.sav', 'rb')) pred = myModel.predict(X) print(accuracy_score(pred, y))
def gen_pipeline(args): """Generating pipeline of results based on grid search parameters required. """ #TODO include argument for paramgrid as json for further use and refactor code into a simplified loop. if args.classifier.lower() == 'log_reg': param_grid = [{ 'ovr__solver': ['saga'], 'ovr__penalty': ['l1', 'l2'], 'ovr__C': np.logspace(0, 4, 10), 'ovr__multi_class': ['ovr', 'multinomial'] }, { 'ovr__solver': ['saga'], 'ovr__penalty': ['elasticnet'], 'ovr__C': np.logspace(0, 4, 10), 'ovr__multi_class': ['ovr', 'multinomial'], 'ovr__l1_ratio': np.array([0.1, 0.3, 0.5, 0.9]) }, { 'ovr__solver': ['sag'], 'ovr__penalty': ['l2'], 'ovr__C': np.logspace(0, 4, 10), 'ovr__multi_class': ['ovr', 'multinomial'] }] OVR_pipe = Pipeline([ ('ovr', LogisticRegression(random_state=0, max_iter=1000)), ]) elif args.classifier.lower() == 'svm_nystrom': # param_grid = [{ 'nystreum__gamma': [100, 10, 1, 0.1], 'nystreum__n_components': [300, 60, 11], 'nystreum__kernel': ['rbf'], 'ovr__penalty': ['l1', 'l2'], 'ovr__loss': ['hinge', 'modified_huber', 'perceptron'] }, { 'nystreum__gamma': [100, 10, 1, 0.1], 'nystreum__n_components': [300, 60, 11], 'nystreum__kernel': ['sigmoid', 'polynomial'], 'ovr__penalty': ['l2'], 'ovr__loss': ['hinge', 'modified_huber', 'perceptron'] }] OVR_pipe = Pipeline( [ ('nystreum', Nystroem(random_state=1)), ('ovr', SGDClassifier(max_iter=5000, tol=1e-3)), ] ) #BaggingClassifier(SVC(random_state=0,max_iter=1000),n_estimators=50) elif args.classifier.lower() == 'svm_linear': param_grid = { 'ovr__base_estimator__C': [10, 100, 1000], 'ovr__base_estimator__kernel': ['linear'] } svc_pipe = Pipeline([ ('svc', SVC()), ], verbose=True) OVR_pipe = Pipeline([ ('ovr', BaggingClassifier(svc_pipe)), ], verbose=True) elif args.classifier.lower() == 'svm_chi': param_grid = [{ 'chi_sqr__sample_steps': [1, 2, 3], 'ovr__penalty': ['l1', 'l2'], 'ovr__loss': ['hinge', 'modified_huber', 'perceptron'] }] OVR_pipe = Pipeline([ ('chi_sqr', AdditiveChi2Sampler()), ('ovr', SGDClassifier(max_iter=5000, tol=1e-3)), ]) else: raise Exception( "Grid seach is only possible for SVM and Logistic regression classifiers." ) #ipdb.set_trace() if args.cls_weights_bool == True: tmp_dict = {'ovr__class_weight': ['balanced']} [x.update(tmp_dict) for x in param_grid] return OVR_pipe, param_grid
else: # soundnet feature # X = numpy.loadtxt(os.path.join('soundnetfeat', 'result_{}.csv'.format(feat_type.split('_')[1])), delimiter=',') X = numpy.loadtxt(os.path.join('soundnetfeat', 'result_08.csv'), delimiter=',') # for c in ['04', '06', '08']: # X_cur = numpy.loadtxt(os.path.join('soundnetfeat', 'result_{}.csv'.format(c)), delimiter=',') # X = numpy.concatenate((X, X_cur), axis=1) # val_file = [line.strip() for line in open('../all_val.lst', 'r')] test_file = [line.strip() for line in open('../all_test_fake.lst', 'r')] val_X = X[-len(val_file) - len(test_file):-len(test_file)] test_X = X[-len(test_file):] clf = pickle.load(open(model_file, 'rb')) if feat_type == 'mfcc': chi_feature = AdditiveChi2Sampler(sample_steps=2) val_X = chi_feature.fit_transform(val_X) test_X = chi_feature.fit_transform(test_X) val_conf = clf.decision_function(val_X) test_conf = clf.decision_function(test_X) output_dir = output_file.split('/')[0] if not os.path.exists(output_dir): os.mkdir(output_dir) numpy.savetxt(output_file, val_conf, fmt='%2.4f') test_file_name = '_'.join(output_file.split('/')[1].split('_')[:2]).upper() test_output_file = os.path.join(output_dir,
def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams)
from sklearn.cluster.bicluster import SpectralCoclustering from sklearn.manifold.spectral_embedding_ import SpectralEmbedding from sklearn.preprocessing.data import StandardScaler from sklearn.manifold.t_sne import TSNE from sklearn.linear_model.theil_sen import TheilSenRegressor from sklearn.mixture.dpgmm import VBGMM from sklearn.feature_selection.variance_threshold import VarianceThreshold import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) clf_dict = {'ARDRegression':ARDRegression(), 'AdaBoostClassifier':AdaBoostClassifier(), 'AdaBoostRegressor':AdaBoostRegressor(), 'AdditiveChi2Sampler':AdditiveChi2Sampler(), 'AffinityPropagation':AffinityPropagation(), 'AgglomerativeClustering':AgglomerativeClustering(), 'BaggingClassifier':BaggingClassifier(), 'BaggingRegressor':BaggingRegressor(), 'BayesianGaussianMixture':BayesianGaussianMixture(), 'BayesianRidge':BayesianRidge(), 'BernoulliNB':BernoulliNB(), 'BernoulliRBM':BernoulliRBM(), 'Binarizer':Binarizer(), 'Birch':Birch(), 'CCA':CCA(), 'CalibratedClassifierCV':CalibratedClassifierCV(), 'DBSCAN':DBSCAN(), 'DPGMM':DPGMM(), 'DecisionTreeClassifier':DecisionTreeClassifier(),
############################ # Compute spatial histograms ############################ if VERBOSE: print str(datetime.now()) + ' start computing hists' if (not exists(conf.histPath)) | OVERWRITE: hists = birdid_utils.computeHistograms(all_images, model, conf) savemat(conf.histPath, {'hists': hists}) else: if VERBOSE: print 'using old hists from ' + conf.histPath hists = loadmat(conf.histPath)['hists'] ##################### # Compute feature map ##################### if VERBOSE: print str(datetime.now()) + ' start computing feature map' transformer = AdditiveChi2Sampler() histst = transformer.fit_transform(hists) train_data = histst[selTrain] test_data = histst[selTest] ########### # Train SVM ########### if (not exists(conf.modelPath)) | OVERWRITE: if VERBOSE: print str(datetime.now()) + ' training liblinear svm' if VERBOSE == 'SVM': verbose = True else: verbose = False clf = svm.LinearSVC(C=conf.svm.C) if VERBOSE: print clf
valid_datagen = ImageDataGenerator() train_data_dir = cwd + '/data/sorted/train' valid_data_dir = cwd + '/data/sorted/valid' test_data_dir = cwd + '/data/sorted/test' train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='sparse' ) train_data_n = len(os.listdir(train_data_dir + '/1')) + len(os.listdir(train_data_dir + '/0')) + len(os.listdir(train_data_dir + '/2')) chi_feature = AdditiveChi2Sampler() clf = SGDClassifier(class_weight={0:1.0, 1:1.2, 2:1.0}) classes_ = np.array([0, 1, 2]) rbf_feature = RBFSampler(gamma=4.0, n_components=3000) #rbf_feature = Nystroem(n_components=100, gamma=1.0, random_state=1) """ feature_train_stack = np.zeros((100, 2048)) - 1 label_train_stack = np.zeros((100, 1)) - 1 for i in range(train_data_n // batch_size): #for i in range(2): print("======= data reading! =======") print("batch No." + str(i) )
def main(): VOCABULARY_SIZE = 1000 STEP_SIZE = 4 bow = BagOfWordsDescriptor(const.IMAGE_SIZE, VOCABULARY_SIZE, STEP_SIZE, scale_data=False) data = [] target = [] # for entry in list(os.scandir(const.PATH_TO_ROOT_UECFOOD256))[0:4]: for entry in os.scandir(const.PATH_TO_ROOT_UECFOOD256): if entry.is_dir(follow_symlinks=False): bb_info = [] read_bb_info_txt(entry.path + "/bb_info.txt", bb_info) df = pd.DataFrame(bb_info, columns=['_img_name', '_x1', '_y1', '_x2', '_y2', '_cat', '_abs_path']) label = int(entry.name) print(label) # for image_path in list(glob.iglob(entry.path + '/*.jpg', recursive=False))[0:25]: for image_path in glob.iglob(entry.path + '/*.jpg', recursive=False): filename_without_jpg = int(os.path.basename(image_path).replace(".jpg", '')) gt_bboxes = df.loc[df._img_name == filename_without_jpg].as_matrix(["_x1", "_y1", "_x2", "_y2"]) image = imread(image_path) for bbox in gt_bboxes: # print(bbox) sub_image = get_sub_image_from_rectangle(image, bbox, True) sub_image = resize(sub_image, const.IMAGE_SIZE) data.append(bow.get_feature(sub_image)) target.append(label) print(len(data), len(target)) X, y = bow.post_process_data(data, target) print("X (type: %s) shape: %s || target (type: %s) shape: %s" % (X.dtype, X.shape, y.dtype, y.shape)) # "Free memory" to avoid MemoryError data = [] bow = [] target = [] print("gc.collect() = ", gc.collect()) chi2 = AdditiveChi2Sampler(sample_steps=2) X = chi2.fit_transform(X) X = scale(X) print("X (type: %s) shape: %s || target (type: %s) shape: %s" % (X.dtype, X.shape, y.dtype, y.shape)) classifier = LinearSVC(fit_intercept=False, dual=False) print(classifier) cv_scores = cross_val_multiple_scores(classifier, X=X, y=y, n_folds=10, n_jobs=1) print(cv_scores) save_object(cv_scores['cv_confusion_matrix'], "cm_bow", overwrite=True)
def __init__(self, sample_steps=2, sample_interval=None): self._hyperparams = { 'sample_steps': sample_steps, 'sample_interval': sample_interval } self._wrapped_model = Op(**self._hyperparams)
def main(visualize=False, learn=False, actions=None, subjects=None, n_frames=220): # learn = True # learn = False if actions is []: actions = [2] if subjects is []: subjects = [2] # actions = [1] # actions = [1, 2, 3, 4, 5] # subjects = [1] if 1: MHAD = True cam = MHADPlayer(base_dir='/Users/colin/Data/BerkeleyMHAD/', kinect=1, actions=actions, subjects=subjects, reps=[1], get_depth=True, get_color=True, get_skeleton=True, fill_images=False) else: MHAD = False cam = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) bg = Image.open( '/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_A.tif') bg = Image.open( '/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_B.tif') cam.bgSubtraction.backgroundModel = np.array(bg.getdata()).reshape( [240, 320]).clip(0, 4500) height, width = cam.depthIm.shape skel_previous = None # clf_geo = pickle.load(open('geodesic_svm_sorted_scaled_5class.pkl')) # clf_color,color_approx = pickle.load(open('color_histogram_approx_svm_5class.pkl')) # clf_lbp,lbp_approx = pickle.load(open('lbp_histogram_approx_svm_5class.pkl')) face_detector = FaceDetector() hand_detector = HandDetector(cam.depthIm.shape) curve_detector = CurveDetector(cam.depthIm.shape) # Video writer # video_writer = cv2.VideoWriter("/Users/colin/Desktop/test.avi", cv2.cv.CV_FOURCC('M','J','P','G'), 15, (320,240)) # Save Background model # im = Image.fromarray(cam.depthIm.astype(np.int32), 'I') # im.save("/Users/Colin/Desktop/k2.png") # Setup pose database append = True append = False pose_database = PoseDatabase("PoseDatabase.pkl", learn=learn, search_joints=[0, 4, 7, 10, 13], append=append) # Per-joint classification head_features = [] hand_features = [] feet_features = [] joint_features = { 'geodesic': [None] * 14, 'color_histograms': [None] * 14, 'lbp': [None] * 14 } # Evaluation accuracy_all = [] joint_accuracy_all = [] geo_accuracy = [] color_accuracy = [] lbp_accuracy = [] frame_count = 0 frame_rate = 2 if not MHAD: cam.next(350) frame_prev = 0 try: # if 1: while cam.next(frame_rate): # and frame_count < n_frames: if frame_count - frame_prev > 100: print "" print "Frame #{0:d}".format(frame_count) frame_prev = frame_count if not MHAD: if len(cam.users) == 0: continue else: # cam.users = [np.array(cam.users[0]['jointPositions'].values())] if np.any(cam.users[0][0] == -1): continue cam.users[0][:, 1] *= -1 cam.users_uv_msr = [ cam.camera_model.world2im(cam.users[0], [240, 320]) ] # Apply mask to image if MHAD: mask = cam.get_person(2) > 0 else: mask = cam.get_person() > 0 if np.all(mask == False): continue im_depth = cam.depthIm cam.depthIm[cam.depthIm > 3000] = 0 im_color = cam.colorIm * mask[:, :, None] cam.colorIm *= mask[:, :, None] pose_truth = cam.users[0] pose_truth_uv = cam.users_uv_msr[0] # Get bounding box around person box = nd.find_objects(mask)[0] d = 20 # Widen box box = (slice(np.maximum(box[0].start-d, 0), \ np.minimum(box[0].stop+d, height-1)), \ slice(np.maximum(box[1].start-d, 0), \ np.minimum(box[1].stop+d, width-1))) box_corner = [box[0].start, box[1].start] ''' ---------- ----------------------------------- --------''' ''' ----------- Feature Detector centric approach ---------''' ''' ---------- ----------------------------------- --------''' ''' ---- Calculate Detectors ---- ''' # Face detection face_detector.run(im_color[box]) # Skin detection hand_markers = hand_detector.run(im_color[box], n_peaks=3) # curve detection # curve_markers = curve_detector.run((im_depth*mask)[box], n_peaks=3) # Calculate LBPs ##Max P=31 for LBPs becuase of datatype # x = local_occupancy_pattern(cam.depthIm[box]*mask[box], [5,5,5],[3,3,3]) # lop_texture = local_binary_pattern_depth(cam.depthIm[box]*mask[box], 10, 20, px_diff_thresh=100)*mask[box] # lop_markers = []#peak_local_max(lop_texture, min_distance=20, num_peaks=5, exclude_border=False) # lbp_texture = local_binary_pattern(cam.depthIm[box]*mask[box], 6, 20)*mask[box] # Calculate Geodesic Extrema im_pos = cam.camera_model.im2PosIm( cam.depthIm * mask)[box] * mask[box][:, :, None] geodesic_markers = geodesic_extrema_MPI(im_pos, iterations=5, visualize=False) # geodesic_markers, geo_map = geodesic_extrema_MPI(im_pos, iterations=5, visualize=True) geodesic_markers_pos = im_pos[geodesic_markers[:, 0], geodesic_markers[:, 1]] markers = list(geodesic_markers) + list( hand_markers) #+ list(lop_markers) + curve_markers markers = np.array([list(x) for x in markers]) if 1: ''' ---- Database lookup ---- ''' pts_mean = im_pos[(im_pos != 0)[:, :, 2]].mean(0) if learn: # Normalize pose pose_uv = cam.users_uv[0] if np.any(pose_uv == 0): print "skip" frame_count += frame_rate continue # print pose_truth[2], pts_mean pose_database.update(pose_truth - pts_mean) else: # Concatenate markers markers = list(geodesic_markers) + hand_markers # markers = list(geodesic_markers) + list(lop_markers) + curve_markers + hand_markers markers = np.array([list(x) for x in markers]) # Normalize pose pts = im_pos[markers[:, 0], markers[:, 1]] pts = np.array([x for x in pts if x[0] != 0]) pts -= pts_mean # Get closest pose pose = pose_database.query(pts, knn=1) # pose = pose_database.weighted_query(pts, knn=1) # pose = pose_database.reverse_query(pts[:,[1,0,2]]) # im_pos -= pts_mean # R,t = IterativeClosestPoint(pose, im_pos.reshape([-1,3])-pts_mean, max_iters=5, min_change=.001, pt_tolerance=10000) # pose = np.dot(R.T, pose.T).T - t # pose = np.dot(R, pose.T).T + t pose += pts_mean pose_uv = cam.camera_model.world2im( pose, cam.depthIm.shape) # Constrain if 0: try: ''' This does worse because the joint may fall to a different part of the body (e.g. hand to torso) which throws the error upward ''' surface_map = nd.distance_transform_edt( im_pos[:, :, 2] == 0, return_distances=False, return_indices=True) pose_uv[:, :2] = surface_map[:, pose_uv[:, 0] - box_corner[0], pose_uv[:, 1] - box_corner[1]].T + [ box_corner[0], box_corner[1] ] pose = cam.camera_model.im2world(pose_uv) # skel_current = link_length_constraints(skel_current, constraint_links, constraint_values, alpha=.5) # skel_current = geometry_constraints(skel_current, joint_size, alpha=0.5) # skel_current = collision_constraints(skel_current, constraint_links) # embed() # pose_uv_box = pose_uv - [box_corner[0], box_corner[1], 0] # pose_uv_box = pose_uv_box.clip([0,0,0], [cam.depthIm.shape[0]-1, cam.depthIm.shape[1]-1, 9999]) # joint_size = np.array([75]*14) # pose_n, pose_uv_n = ray_cast_constraints(pose, pose_uv_box, im_pos, surface_map, joint_size) # print 'Pose',pose,pose_n # pose = pose_n # pose_uv = pose_uv_n + [box_corner[0], box_corner[1], 0] except: print 'error constraining' # skel_previous = np.array(pose, copy=True) display_markers(cam.colorIm, hand_markers[:2], box, color=(0, 250, 0)) if len(hand_markers) > 2: display_markers(cam.colorIm, [hand_markers[2]], box, color=(0, 200, 0)) display_markers(cam.colorIm, geodesic_markers, box, color=(200, 0, 0)) # display_markers(cam.colorIm, curve_markers, box, color=(0,100,100)) # display_markers(cam.colorIm, lop_markers, box, color=(0,0,200)) if 0: ''' ---------- ----------------------------------- --------''' ''' ---------- Feature Descriptor centric approach --------''' ''' ---------- ----------------------------------- --------''' ''' ---- Calculate Descriptors ---- ''' hand_markers = np.array(hand_markers) # Geodesics geodesic_features = relative_marker_positions( im_pos, geodesic_markers_pos[:, [1, 0, 2]]) geodesic_features = np.sort(geodesic_features) # Color Histogram skin = skimage.exposure.rescale_intensity( hand_detector.im_skin, out_range=[0, 255]).astype(np.uint8) color_histograms = local_histograms( skin, n_bins=5, max_bound=255, patch_size=11) * mask[box][:, :, None] # LBP Histogram lbp_texture = local_binary_pattern( cam.depthIm[box] * mask[box], 6, 5) * mask[box] lbp_histograms = local_histograms( lbp_texture.astype(np.uint8), n_bins=10, max_bound=2**6, patch_size=11) * mask[box][:, :, None] # for i in range(10): # subplot(2,5,i+1) # imshow(lbp_histograms[:,:,i]) ''' ---- Per Joint Learning ---- ''' if learn: for ii, i in enumerate(pose_truth_uv): if i[0] != 0: try: if joint_features['geodesic'][ii] is None: joint_features['geodesic'][ ii] = geodesic_features[i[1] - box_corner[0], i[0] - box_corner[1]] else: joint_features['geodesic'][ii] = np.vstack( [ joint_features['geodesic'][ii], (geodesic_features[i[1] - box_corner[0], i[0] - box_corner[1]]) ]) if joint_features['color_histograms'][ ii] is None: joint_features['color_histograms'][ ii] = color_histograms[i[1] - box_corner[0], i[0] - box_corner[1]] else: joint_features['color_histograms'][ ii] = np.vstack([ joint_features['color_histograms'] [ii], deepcopy(color_histograms[ i[1] - box_corner[0], i[0] - box_corner[1]]) ]) if joint_features['lbp'][ii] is None: joint_features['lbp'][ii] = lbp_histograms[ i[1] - box_corner[0], i[0] - box_corner[1]] else: joint_features['lbp'][ii] = np.vstack([ joint_features['lbp'][ii], deepcopy(lbp_histograms[i[1] - box_corner[0], i[0] - box_corner[1]]) ]) except: print "error" ''' ---- Per Joint Classification ---- ''' if not learn: try: # Geodesic clasification tmp = geodesic_features.reshape([-1, 6]) tmp = np.array([x / x[-1] for x in tmp]) tmp = np.nan_to_num(tmp) geo_clf_map = clf_geo.predict(tmp).reshape( im_pos.shape[:2]) * mask[box] geo_clf_labels = geo_clf_map[ pose_truth_uv[[0, 1, 4, 7, 10, 13], 1] - box_corner[0], pose_truth_uv[[0, 1, 4, 7, 10, 13], 0] - box_corner[1]] geo_accuracy += [ geo_clf_labels == [0, 1, 4, 7, 10, 13] ] print 'G', np.mean( geo_accuracy, 0), geo_clf_labels == [0, 1, 4, 7, 10, 13] cv2.imshow('Geo', geo_clf_map / float(geo_clf_map.max())) except: pass try: # Color histogram classification color_test = color_approx.transform( color_histograms.reshape([-1, 5])) color_clf_map = clf_color.predict(color_test).reshape( im_pos.shape[:2]) * mask[box] color_clf_labels = color_clf_map[ pose_truth_uv[[0, 1, 4, 7, 10, 13], 1] - box_corner[0], pose_truth_uv[[0, 1, 4, 7, 10, 13], 0] - box_corner[1]] color_accuracy += [ color_clf_labels == [0, 1, 4, 7, 10, 13] ] print 'C', np.mean( color_accuracy, 0), color_clf_labels == [0, 1, 4, 7, 10, 13] cv2.imshow('Col', color_clf_map / float(color_clf_map.max())) except: pass try: # lbp histogram classification lbp_test = color_approx.transform( lbp_histograms.reshape([-1, 10])) lbp_clf_map = clf_lbp.predict(lbp_test).reshape( im_pos.shape[:2]) * mask[box] lbp_clf_labels = lbp_clf_map[ pose_truth_uv[[0, 1, 4, 7, 10, 13], 1] - box_corner[0], pose_truth_uv[[0, 1, 4, 7, 10, 13], 0] - box_corner[1]] lbp_accuracy += [ lbp_clf_labels == [0, 1, 4, 7, 10, 13] ] print 'L', np.mean( lbp_accuracy, 0), lbp_clf_labels == [0, 1, 4, 7, 10, 13] cv2.imshow('LBP', lbp_clf_map / float(lbp_clf_map.max())) except: pass pose_uv = pose_truth_uv pose = pose_truth # ''' ---- Accuracy ---- ''' if 1 and not learn: # pose_truth = cam.users[0] error = pose_truth - pose # print "Error", error error_l2 = np.sqrt(np.sum(error**2, 1)) # error_l2 = np.sqrt(np.sum(error[:,:2]**2, 1)) joint_accuracy_all += [error_l2] accuracy = np.sum(error_l2 < 150) / 14. accuracy_all += [accuracy] print "Current", accuracy # print "Running avg:", np.mean(accuracy_all) # print "Joint avg (per-joint):", np.mean(joint_accuracy_all, -1) # print "Joint avg (overall):", np.mean(joint_accuracy_all) ''' --- Visualization --- ''' cam.colorIm = display_skeletons(cam.colorIm, pose_truth_uv, skel_type='Kinect', color=(0, 255, 0)) cam.colorIm = display_skeletons(cam.colorIm, pose_uv, skel_type='Kinect') cam.visualize() # print "Extrema:", geo_clf_map[geodesic_markers[:,0], geodesic_markers[:,1]] # print "Skin:", geo_clf_map[hand_markers[:,0], hand_markers[:,1]] # print "Skin val:", hand_detector.skin_match[hand_markers[:,0], hand_markers[:,1]] # hand_data += [[x[0] for x in hand_markers], # [x[1] for x in hand_markers], # list(hand_detector.skin_match[hand_markers[:,0], hand_markers[:,1]])] # ------------------------------------------------------------ # video_writer.write((geo_clf_map/float(geo_clf_map.max())*255.).astype(np.uint8)) # video_writer.write(cam.colorIm[:,:,[2,1,0]]) frame_count += frame_rate except: pass print "-- Results for subject {:d} action {:d}".format( subjects[0], actions[0]) print "Running avg:", np.mean(accuracy_all) print "Joint avg (overall):", np.mean(joint_accuracy_all) # print 'Done' if learn: pose_database.save() print 'Pose database saved' embed() return ''' --- Format Geodesic features ---''' geodesics_train = [] geodesics_labels = [] for i in xrange(len(joint_features['geodesic'])): # joint_features['geodesic'][i] = np.array([np.sort(x) for x in joint_features['geodesic'][i] if x[0] != 0]) joint_features['geodesic'][i] = np.array( [x / x.max() for x in joint_features['geodesic'][i] if x[0] != 0]) ii = i if i not in [0, 1, 4, 7, 10, 13]: ii = 1 else: geodesics_labels += [ i * np.ones(len(joint_features['geodesic'][i])) ] geodesics_train = np.vstack( [joint_features['geodesic'][x] for x in [0, 1, 4, 7, 10, 13]]) # geodesics_train = np.vstack(joint_features['geodesic']) geodesics_labels = np.hstack(geodesics_labels) figure(1) title('Distances of each joint to first 6 geodesic extrema') for i in range(14): subplot(4, 4, i + 1) ylabel('Distance') xlabel('Sample') plot(joint_features['geodesic'][i]) axis([0, 400, 0, 1600]) # Learn geodesic classifier clf_geo = SGDClassifier(n_iter=10000, alpha=.01, n_jobs=-1, class_weight='auto') clf_geo.fit(geodesics_train, geodesics_labels) print clf_geo.score(geodesics_train, geodesics_labels) geodesic_features = np.sort(geodesic_features) sgd_map = clf_geo.predict(geodesic_features.reshape([-1, 6])).reshape( im_pos.shape[:2]) pickle.dump(clf_geo, open('geodesic_svm_sorted_scaled_5class.pkl', 'w'), pickle.HIGHEST_PROTOCOL) # clf_geo = pickle.load(open('geodesic_svm_sorted_scaled_5class.pkl')) ''' --- Color Histogram features ---''' color_train = [] color_labels = [] for i in xrange(len(joint_features['color_histograms'])): ii = i if i not in [0, 1, 4, 7, 10, 13]: ii = 1 else: color_labels += [ i * np.ones(len(joint_features['color_histograms'][i])) ] # color_labels += [i*np.ones(len(joint_features['color_histograms'][i]))] # color_train = np.vstack(joint_features['color_histograms']) color_train = np.vstack( [joint_features['color_histograms'][x] for x in [0, 1, 4, 7, 10, 13]]) color_labels = np.hstack(color_labels) color_approx = AdditiveChi2Sampler() color_approx_train = color_approx.fit_transform(color_train) clf = SGDClassifier(n_iter=10000, alpha=.01, n_jobs=-1, class_weight='auto') clf.fit(color_approx_train, color_labels) print clf.score(color_approx_train, color_labels) color_test = color_approx.transform(color_histograms.reshape([-1, 5])) sgd_map = clf.predict(color_test).reshape(im_pos.shape[:2]) * mask[box] figure(1) title('Color Histograms per Joint') for i in range(14): subplot(4, 4, i + 1) ylabel('Count') xlabel('Sample') plot(joint_features['color_histograms'][i]) axis([0, 10, 0, 30]) for i in range(5): subplot(1, 5, i + 1) imshow(color_histograms[:, :, i]) pickle.dump([clf, color_approx], open('color_histogram_approx_svm_5class.pkl', 'w'), pickle.HIGHEST_PROTOCOL) # clf_color,color_approx = pickle.load(open('color_histogram_approx_svm_5class.pkl')) ''' --- LBP Histogram features ---''' color_train = [] color_labels = [] for i in xrange(len(joint_features['lbp'])): ii = i if i not in [0, 1, 4, 7, 10, 13]: ii = 1 else: color_labels += [i * np.ones(len(joint_features['lbp'][i]))] # color_labels += [i*np.ones(len(joint_features['color_histograms'][i]))] # color_train = np.vstack(joint_features['color_histograms']) color_train = np.vstack( [joint_features['lbp'][x] for x in [0, 1, 4, 7, 10, 13]]) color_labels = np.hstack(color_labels) color_approx = AdditiveChi2Sampler() color_approx_train = color_approx.fit_transform(color_train) clf = SGDClassifier(n_iter=10000, alpha=.01, n_jobs=-1, class_weight='auto') clf.fit(color_approx_train, color_labels) print clf.score(color_approx_train, color_labels) color_test = color_approx.transform(lbp_histograms.reshape([-1, 10])) sgd_map = clf.predict(color_test).reshape(im_pos.shape[:2]) * mask[box] figure(1) title('LBP Histograms per Joint') for i in range(14): subplot(4, 4, i + 1) ylabel('Count') xlabel('Sample') plot(joint_features['lbp'][i]) axis([0, 10, 0, 30]) for i in range(5): subplot(1, 5, i + 1) imshow(color_histograms[:, :, i]) pickle.dump([clf, color_approx], open('lbp_histogram_approx_svm_5class.pkl', 'w'), pickle.HIGHEST_PROTOCOL)
def cross_validate_bow(filename): ''' Adapted from this example: http://scikit-learn.org/stable/auto_examples/grid_search_digits.html#example-grid-search-digits-py ''' from sklearn.cross_validation import train_test_split from sklearn.grid_search import GridSearchCV from sklearn.metrics import classification_report from sklearn.metrics import precision_score from sklearn.metrics import recall_score from sklearn.svm import SVC from sklearn.kernel_approximation import AdditiveChi2Sampler chi = AdditiveChi2Sampler() chi.fit(hogsH, labels) X = chi.fit_transform(hogsH, labels) # clf = svm.SVC(kernel='rbf', C=100) # clf.fit(X, np.array(labels)) # print "Training accuracy: %f"%(clf.score(X, labels)*100.) scores = [ ('precision', precision_score), ('recall', recall_score), ] for score_name, score_func in scores: X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.5, random_state=0) tuned_parameters = [{ 'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000] }, { 'kernel': ['linear'], 'C': [1, 10, 100, 1000] }] clf = GridSearchCV(SVC(C=1), tuned_parameters, score_func=score_func) clf.fit(X_train, y_train, cv=5) print "Best parameters set found on development set:" print print clf.best_estimator_ print print "Grid scores on development set:" print for params, mean_score, scores in clf.grid_scores_: print "%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() / 2, params) print print "Detailed classification report:" print print "The model is trained on the full development set." print "The scores are computed on the full evaluation set." print y_true, y_pred = y_test, clf.predict(X_test) print classification_report(y_true, y_pred) print print "Best score: %f" % clf.best_score_