def TrainMyClassifierGPR(X_train, y_train, **kwargs): if 'kernel' in kwargs: gpc = GPC(multi_class='one_vs_rest', **kwargs) else: kern = RBF(length_scale=0.4) gpc = GPC(kernel=kern, multi_class='one_vs_rest') gpc.fit(X_train, y_train) return gpc
def GPAL(X, Y, train_ind, candidate_ind, test_ind, sample='En', kernel='rbf', Niter=500, eta=10): ourRes = [] train_index = train_ind.copy() test_index = test_ind.copy() candidate_index = candidate_ind.copy() varRes = [] enRes = [] for i in range(Niter): print(i) if (kernel == 'linear'): dotkernel = DotProduct(sigma_0=1) model = GPC(kernel=dotkernel) else: model = GPC() model.fit(X[train_index], Y[train_index]) ourRes.append(model.score(X[test_index, :], Y[test_index])) print(ourRes[-1]) if (sample == 'rand'): sampleIndex = np.random.randint(len(candidate_index)) elif (sample == 'En'): proba = model.predict_proba(X[candidate_index, :]) en = sp.stats.entropy(proba.T) sampleScore = en sampleIndex = np.argmax(sampleScore) elif (sample == 'var'): model.predict_proba(X[candidate_index, :]) meanVar = np.zeros(len(candidate_index)) for tem in model.base_estimator_.estimators_: meanVar = meanVar + tem.var sampleIndex = np.argmax(meanVar) elif (sample == 'varEN'): proba = model.predict_proba(X[candidate_index, :]) en = sp.stats.entropy(proba.T) meanVar = np.zeros(len(candidate_index)) enRes.append(np.mean(en)) for tem in model.base_estimator_.estimators_: meanVar = meanVar + tem.var sampleIndex = np.argmax(meanVar / len(np.unique(Y)) * eta + en) varRes.append(np.mean(meanVar)) print('max var %f----selected var %f-----selected en %f ' % (np.max(meanVar), meanVar[sampleIndex], en[sampleIndex])) sampleIndex = candidate_index[sampleIndex] train_index = train_index + [sampleIndex] candidate_index = [ x for x in candidate_index if x not in [sampleIndex] ] return [ourRes, varRes, enRes]
def __init__(self, kernel=None, optimizer='fmin_l_bfgs_b', n_restarts_optimizer=0, max_iter_predict=100, warm_start=False, copy_X_train=True, random_state=None, multi_class='one_vs_rest', n_jobs=None): self.kernel = kernel self.n_restarts_optimizer = n_restarts_optimizer self.random_state = random_state self.multi_class = multi_class self.n_jobs = n_jobs self.max_iter_predict = max_iter_predict self.warm_start = warm_start self.copy_X_train = copy_X_train self.optimizer = optimizer self.model = GPC(copy_X_train=self.copy_X_train, n_jobs=self.n_jobs, max_iter_predict=self.max_iter_predict, n_restarts_optimizer=self.n_restarts_optimizer, optimizer=self.optimizer, warm_start=self.warm_start, multi_class=self.multi_class, kernel=self.kernel, random_state=self.random_state)
def task3(feature_sets, label_sets): sets = ["A", "B", "crashes", "diabetes", "ionosphere"] kernel = 1.0 * RBF(1.0) for i in range(5): n = len(label_sets[i]) m = np.linspace(10, .6 * n, num=10, dtype=int) div = int(n * .4) x_train = feature_sets[i][div:] x_test = feature_sets[i][:div] y_train = label_sets[i][div:] y_test = label_sets[i][:div] gpc_errors = [] for j in range(10): gpc = GPC(kernel=kernel, random_state=0) gpc.fit(x_train[:m[j] - 1], np.ravel(y_train[:m[j] - 1])) gpc_errors.append(1 - gpc.score(x_test, np.ravel(y_test))) plt.legend() plt.ylabel("Error") plt.xlabel("M value") plt.title(sets[i]) plt.plot(m, gpc_errors, label="GPC") plt.show() return
def compute_per_gaussian(self, max_iter=100): """Compute SVM per feature""" # per feature for feature_index in range(int(len(X[0])/45)): X_train_mod = [] # define training dataset for example in range(len(self.X_train)): # for each example (469) X_train_mod.append([self.X_train[example][self.epoch*self.neuron_num + self.counter]]) X_test_mod = [] # define testing dataset for example in range(len(self.X_test)): # for each example (469) X_test_mod.append([self.X_test[example][self.epoch*self.neuron_num + self.counter]]) gamma = 1e-2 c = 10 kernel = 'linear' clf = GPC(max_iter_predict=max_iter) # GPC model clf.fit(X_train_mod, self.y_train) # compute with only one feature score = clf.score(X_test_mod, self.y_test) self.features_accuracy.append(score) self.counter += 1
def compute_per_gaussian(self, max_iter=100): """Compute SVM per feature""" print(len(self.X_train)) print(len(self.X_train[0])) # per feature for feature_index in range(int(len(self.X[0]))): X_train_mod = [] # define training dataset for example in range(len(self.X_train)): # for each example (469) X_train_mod.append([self.X_train[example][self.counter]]) X_test_mod = [] # define testing dataset for example in range(len(self.X_test)): # for each example (469) X_test_mod.append([self.X_test[example][self.counter]]) clf = GPC(max_iter_predict=max_iter) # GPC model clf.fit(X_train_mod, self.y_train) # compute with only one feature score = clf.score(X_test_mod, self.y_test) self.features_accuracy.append(score) self.counter += 1
def __init__(self, **kwargs): r"""Initialize GaussianProcess instance. """ warnings.filterwarnings(action='ignore', category=ChangedBehaviorWarning) warnings.filterwarnings(action='ignore', category=ConvergenceWarning) warnings.filterwarnings(action='ignore', category=DataConversionWarning) warnings.filterwarnings(action='ignore', category=DataDimensionalityWarning) warnings.filterwarnings(action='ignore', category=EfficiencyWarning) warnings.filterwarnings(action='ignore', category=FitFailedWarning) warnings.filterwarnings(action='ignore', category=NonBLASDotWarning) warnings.filterwarnings(action='ignore', category=UndefinedMetricWarning) self._params = dict( max_iter_predict=ParameterDefinition(MinMax(50, 200), np.uint), warm_start=ParameterDefinition([True, False]), multi_class=ParameterDefinition(['one_vs_rest', 'one_vs_one'])) self.__gaussian_process = GPC()
# words = get_dict(smiles, save_path='D:/工作文件/work.Data/CNN/dict.json') features = [] for i, smi in enumerate(tqdm(smiles)): xi = one_hot_coding(smi, words, max_len=600) if xi is not None: features.append(xi.todense()) features = np.asarray(features) targets = np.asarray(targets) X_test=features Y_test=targets # kernel = 1.0 * RBF(0.8) #model = RandomForestClassifier(n_estimators=10,max_features='auto', max_depth=None,min_samples_split=2, bootstrap=True) model = GPC( random_state=111) # earlyStopping = EarlyStopping(monitor='val_loss', patience=0.05, verbose=0, mode='min') #mcp_save = ModelCheckpoint('C:/Users/sunjinyu/Desktop/FingerID Reference/drug-likeness/CNN/single_model.h5', save_best_only=True, monitor='accuracy', mode='auto') # reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-4, mode='min') from tensorflow.keras import backend as K X_train = K.cast_to_floatx(X_train).reshape((np.size(X_train,0),np.size(X_train,1)*np.size(X_train,2))) Y_train = K.cast_to_floatx(Y_train) # X_train,Y_train = make_blobs(n_samples=300, n_features=n_features, centers=6) model.fit(X_train, Y_train) # model = load_model('C:/Users/sunjinyu/Desktop/FingerID Reference/drug-likeness/CNN/single_model.h5') Y_predict = model.predict(K.cast_to_floatx(X_test).reshape((np.size(X_test,0),np.size(X_test,1)*np.size(X_test,2))))
default=os.environ['SM_CHANNEL_TRAIN']) # args holds all passed-in arguments args = parser.parse_args() # Read in csv training file training_dir = args.data_dir train_data = pd.read_csv(os.path.join(training_dir, "train.csv"), header=None, names=None) # Labels are in the first column train_y = train_data.iloc[:, 0] train_x = train_data.iloc[:, 1:] # Define Gaussian Process Classifier and hyperparameter tuner gpc = GPC() model = GridSearchCV( estimator=gpc, n_jobs=3, verbose=10, param_grid={'kernel': [1.0 * RBF([1.0]), 1.0 * RBF([1.0, 1.0, 1.0])]}) # Train model and select best performing set of hyperparameters by default model.fit(train_x, train_y) print('Best Parameters: ', model.best_params_) print('Best Estimator: ', model.best_estimator_) # Save the trained model joblib.dump(model, os.path.join(args.model_dir, "model.joblib"))
X_train, X_test, y_train, y_test = train_test_split(select_X, y1, test_size=0.2, random_state=0) print(X_train.shape) print(y_train.shape) print(X_test.shape) print(y_test.shape) #cross validation param_dist = {'n_restarts_optimizer ': range(0, 10)} cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=0) grid = GridSearchCV(GPC(random_state=0), param_grid=param_dist, cv=cv) grid.fit(X_train, y_train.values.ravel()) best_estimator = grid.best_estimator_ print(best_estimator) #nach cross validation bekommen wir best_estimator. clf = best_estimator print('the acuracy for all is:') print(clf.score(X_test, y_test.values.ravel())) prediction = clf.predict(X_test) print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, prediction)) print("Classification report:\n %s\n" %
def voting_svm(self): """Voting implementation of SVM for a unique epoch""" per_neuron_prediction = [] """ STRUCTURE: -> Key neurons -> Each epoch -> Number of tasks (~100) -> Results for each neuron """ # Choosing features # train data print("test") for neuron in self.features_index: # for good neurons neuron_votes = [] X_for_neuron = [] for example in range(len(self.X_train)): # for each of tasks X_for_neuron.append([self.X_train[example][self.epoch*self.neuron_num + neuron]]) X_test = [] for example in range(len(self.X_test)): # for each of tasks X_test.append([self.X_test[example][self.epoch*self.neuron_num + neuron]]) clf = GPC() # prediction on individual neuron clf.fit(X_for_neuron, self.y_train) # add predictions to data for each sample pred = clf.predict(X_test) neuron_votes.append(pred) per_neuron_prediction.append(neuron_votes) # test data accuracy = 0 print(per_neuron_prediction[0]) print(len(self.X_test)) features_num = len(self.features_index) # check if voting legnth is even """ if len(per_neuron_prediction)%2==0: del per_neuron_prediction[-1] features_num =- 1 """ print(per_neuron_prediction) # for each testing task per session per epoch for test_task in range(len(self.X_test)): # count the most number of votes as predicted by SVC # classifier per individual neuron temp_task = [] for neuron in range(features_num): temp_task.append(per_neuron_prediction[neuron][0][test_task]) vote_result = mode(temp_task) if vote_result == self.y_test[test_task]: accuracy += 1 print("ACCURACY {}".format(accuracy/(test_task+1))) accuracy = accuracy/len(self.X_test) return accuracy
def GenerateImage(center, img_pth, cat, prob, r=70.0): """ use gaussian processes to visualize the classification and tumor extraction. Arguments: - center : (numpy array) pixel-center coordinates - img_pth : (str) path to HE-image - cat : (numpy array) class assigned to each spot, same order as center - prob : (numpy array) probability vectors/matrix from classification - r: (float) radius of spot Output: - simg : (PIL image) image with colors corresponding to each subtype """ img = Image.open(img_pth) center = np.floor(center / r) center = center.astype(int) new_size = [np.round(x / r, 0).astype(int) for x in img.size] scaling_factor = [ float(img.size[0]) / float(new_size[0]), float(img.size[1]) / float(new_size[1]) ] rgb_values = dict(luma=(73, 216, 166), lumb=(12, 138, 153), her2lum=(255, 124, 148), tnbc=(122, 172, 73), her2nonlum=(255, 202, 79), nontumor=(66, 66, 66)) simg = np.ones(shape=(new_size[0], new_size[1], 3), dtype=np.uint8) * 255 coordinates = [] tmp_coordinates = np.array([[x, y] for x in range(0, simg.shape[0]) for y in range(0, simg.shape[1])]) del img for ii in range(tmp_coordinates.shape[0]): if np.min(np.linalg.norm(center - tmp_coordinates[ii, :], axis=1)) <= 3.0: coordinates.append(tmp_coordinates[ii, :]) coordinates = np.array(coordinates) gpc = GPC(kernel=None, optimizer='fmin_l_bfgs_b', n_restarts_optimizer=3) prob_adj = np.zeros((prob.shape[0], prob.shape[1] + 1)) prob_adj[:, 0:prob.shape[1]] = prob prob_adj[cat == 'nontumor', 0:prob.shape[1]] = 0.0 prob_adj[cat == 'nontumor', -1] = 1.0 fitted_gpc = gpc.fit(center, prob_adj) res = fitted_gpc.predict(coordinates) res = np.argmax(res, axis=1) subtypes = np.array(['luma', 'lumb', 'her2nonlum', 'her2lum', 'tnbc']) l1 = LabelEncoder() l1.fit(subtypes) backmap = np.append(l1.transform(subtypes), subtypes.shape[0]) subtypes = np.append(subtypes, 'nontumor') prob_to_cat = { backmap[ii]: subtypes[ii] for ii in range(subtypes.shape[0]) } res = np.array(list(map(lambda x: prob_to_cat[x], res))) for ii in range(res.shape[0]): simg[coordinates[ii, 0], coordinates[ii, 1], :] = rgb_values[res[ii]] simg = Image.fromarray(simg, mode='RGB') simg = simg.transpose(method=Image.ROTATE_90) simg = simg.transpose(method=Image.FLIP_TOP_BOTTOM) simg = simg.resize((int(scaling_factor[0] * new_size[0]), int(scaling_factor[1] * new_size[1])), Image.ANTIALIAS) return simg
def bo_c(func, n_eval, n_init_eval, n_candidates, bounds, alpha=1e-4, save_dir=None): # kernel = gp.kernels.Matern() kernel = gp.kernels.ConstantKernel(1.0, (1., 1.)) * gp.kernels.RBF( 1.0, (1e-5, 1e5)) gp_model = GPR(kernel=kernel, alpha=alpha, n_restarts_optimizer=100, normalize_y=False) gpc_model = GPC(kernel=kernel, n_restarts_optimizer=100) dim = func.dim # Initial evaluations xs = lhs(dim, samples=n_init_eval, criterion='cm') xs = xs * (bounds[:, 1] - bounds[:, 0]) + bounds[:, 0] ys = func(xs) vs = func.is_feasible(xs) opt_idx = np.argmax(ys[vs]) opt_x = xs[vs][opt_idx] opt_y = ys[vs][opt_idx] opt_ys = [opt_y] for i in range(n_init_eval, n_eval): ys_normalized = normalize(ys[vs]) gp_model.fit(xs[vs], ys_normalized) f_prime = ys_normalized[opt_idx] acquisition_func = lambda x: expected_improvement(x, gp_model, f_prime) if np.any(vs) and np.any(np.logical_not(vs)): gpc_model.fit(xs, vs) constraint_proba_func = lambda x: constraint_proba(x, gpc_model) constraint_weighted_acquisition_func = lambda x: constraint_weighted_acquisition( x, acquisition_func, constraint_proba_func) else: constraint_weighted_acquisition_func = acquisition_func # Decide point to evaluate next n_candidates = 1000 * dim x = sample_next_point(dim, constraint_weighted_acquisition_func, bounds=bounds, strict_bounds=True, n_candidates=n_candidates) y = func(x) v = func.is_feasible(x) xs = np.append(xs, np.array(x, ndmin=2), axis=0) ys = np.append(ys, y) vs = np.append(vs, v) if v and y > opt_y: opt_idx = sum(vs) - 1 opt_x = xs[vs][opt_idx] opt_y = ys[vs][opt_idx] opt_ys.append(opt_y) # Best performance so far print('{}: x {} y {} v {} Best-so-far {}'.format( i + 1, x, y, v, opt_y)) return opt_x, opt_ys
def __init__(self, **kwargs): super().__init__() self.classifier = GPC() self.kwargs = kwargs