コード例 #1
0
ファイル: GPR.py プロジェクト: mdcarr941/CAP6610Project2
def TrainMyClassifierGPR(X_train, y_train, **kwargs):
    if 'kernel' in kwargs:
        gpc = GPC(multi_class='one_vs_rest', **kwargs)
    else:
        kern = RBF(length_scale=0.4)
        gpc = GPC(kernel=kern, multi_class='one_vs_rest')
    gpc.fit(X_train, y_train)
    return gpc
def GPAL(X,
         Y,
         train_ind,
         candidate_ind,
         test_ind,
         sample='En',
         kernel='rbf',
         Niter=500,
         eta=10):
    ourRes = []
    train_index = train_ind.copy()
    test_index = test_ind.copy()
    candidate_index = candidate_ind.copy()
    varRes = []
    enRes = []
    for i in range(Niter):
        print(i)
        if (kernel == 'linear'):
            dotkernel = DotProduct(sigma_0=1)
            model = GPC(kernel=dotkernel)
        else:
            model = GPC()
        model.fit(X[train_index], Y[train_index])
        ourRes.append(model.score(X[test_index, :], Y[test_index]))
        print(ourRes[-1])
        if (sample == 'rand'):
            sampleIndex = np.random.randint(len(candidate_index))
        elif (sample == 'En'):
            proba = model.predict_proba(X[candidate_index, :])
            en = sp.stats.entropy(proba.T)
            sampleScore = en
            sampleIndex = np.argmax(sampleScore)
        elif (sample == 'var'):
            model.predict_proba(X[candidate_index, :])
            meanVar = np.zeros(len(candidate_index))
            for tem in model.base_estimator_.estimators_:
                meanVar = meanVar + tem.var
            sampleIndex = np.argmax(meanVar)
        elif (sample == 'varEN'):
            proba = model.predict_proba(X[candidate_index, :])
            en = sp.stats.entropy(proba.T)
            meanVar = np.zeros(len(candidate_index))
            enRes.append(np.mean(en))

            for tem in model.base_estimator_.estimators_:
                meanVar = meanVar + tem.var
            sampleIndex = np.argmax(meanVar / len(np.unique(Y)) * eta + en)
            varRes.append(np.mean(meanVar))
            print('max var %f----selected var %f-----selected en %f ' %
                  (np.max(meanVar), meanVar[sampleIndex], en[sampleIndex]))
        sampleIndex = candidate_index[sampleIndex]
        train_index = train_index + [sampleIndex]
        candidate_index = [
            x for x in candidate_index if x not in [sampleIndex]
        ]
    return [ourRes, varRes, enRes]
コード例 #3
0
 def __init__(self,
              kernel=None,
              optimizer='fmin_l_bfgs_b',
              n_restarts_optimizer=0,
              max_iter_predict=100,
              warm_start=False,
              copy_X_train=True,
              random_state=None,
              multi_class='one_vs_rest',
              n_jobs=None):
     self.kernel = kernel
     self.n_restarts_optimizer = n_restarts_optimizer
     self.random_state = random_state
     self.multi_class = multi_class
     self.n_jobs = n_jobs
     self.max_iter_predict = max_iter_predict
     self.warm_start = warm_start
     self.copy_X_train = copy_X_train
     self.optimizer = optimizer
     self.model = GPC(copy_X_train=self.copy_X_train,
                      n_jobs=self.n_jobs,
                      max_iter_predict=self.max_iter_predict,
                      n_restarts_optimizer=self.n_restarts_optimizer,
                      optimizer=self.optimizer,
                      warm_start=self.warm_start,
                      multi_class=self.multi_class,
                      kernel=self.kernel,
                      random_state=self.random_state)
コード例 #4
0
def task3(feature_sets, label_sets):
    sets = ["A", "B", "crashes", "diabetes", "ionosphere"]
    kernel = 1.0 * RBF(1.0)
    for i in range(5):
        n = len(label_sets[i])
        m = np.linspace(10, .6 * n, num=10, dtype=int)
        div = int(n * .4)
        x_train = feature_sets[i][div:]
        x_test = feature_sets[i][:div]
        y_train = label_sets[i][div:]
        y_test = label_sets[i][:div]
        gpc_errors = []
        for j in range(10):
            gpc = GPC(kernel=kernel, random_state=0)
            gpc.fit(x_train[:m[j] - 1], np.ravel(y_train[:m[j] - 1]))
            gpc_errors.append(1 - gpc.score(x_test, np.ravel(y_test)))

        plt.legend()
        plt.ylabel("Error")
        plt.xlabel("M value")
        plt.title(sets[i])
        plt.plot(m, gpc_errors, label="GPC")
        plt.show()

    return
コード例 #5
0
    def compute_per_gaussian(self, max_iter=100):
        """Compute SVM per feature"""
        # per feature
        for feature_index in range(int(len(X[0])/45)):
            X_train_mod = []
            # define training dataset
            for example in range(len(self.X_train)):   # for each example (469)
                X_train_mod.append([self.X_train[example][self.epoch*self.neuron_num + self.counter]])

            X_test_mod = []
            # define testing dataset
            for example in range(len(self.X_test)):   # for each example (469)
                X_test_mod.append([self.X_test[example][self.epoch*self.neuron_num + self.counter]])

            gamma = 1e-2
            c = 10
            kernel = 'linear'

            clf = GPC(max_iter_predict=max_iter)  # GPC model
            clf.fit(X_train_mod, self.y_train) # compute with only one feature
            score = clf.score(X_test_mod, self.y_test)

            self.features_accuracy.append(score)

            self.counter += 1
コード例 #6
0
    def compute_per_gaussian(self, max_iter=100):
        """Compute SVM per feature"""

        print(len(self.X_train))
        print(len(self.X_train[0]))

        # per feature
        for feature_index in range(int(len(self.X[0]))):
            X_train_mod = []
            # define training dataset
            for example in range(len(self.X_train)):  # for each example (469)
                X_train_mod.append([self.X_train[example][self.counter]])

            X_test_mod = []
            # define testing dataset
            for example in range(len(self.X_test)):  # for each example (469)
                X_test_mod.append([self.X_test[example][self.counter]])

            clf = GPC(max_iter_predict=max_iter)  # GPC model
            clf.fit(X_train_mod, self.y_train)  # compute with only one feature
            score = clf.score(X_test_mod, self.y_test)

            self.features_accuracy.append(score)

            self.counter += 1
コード例 #7
0
    def __init__(self, **kwargs):
        r"""Initialize GaussianProcess instance.
        """
        warnings.filterwarnings(action='ignore',
                                category=ChangedBehaviorWarning)
        warnings.filterwarnings(action='ignore', category=ConvergenceWarning)
        warnings.filterwarnings(action='ignore',
                                category=DataConversionWarning)
        warnings.filterwarnings(action='ignore',
                                category=DataDimensionalityWarning)
        warnings.filterwarnings(action='ignore', category=EfficiencyWarning)
        warnings.filterwarnings(action='ignore', category=FitFailedWarning)
        warnings.filterwarnings(action='ignore', category=NonBLASDotWarning)
        warnings.filterwarnings(action='ignore',
                                category=UndefinedMetricWarning)

        self._params = dict(
            max_iter_predict=ParameterDefinition(MinMax(50, 200), np.uint),
            warm_start=ParameterDefinition([True, False]),
            multi_class=ParameterDefinition(['one_vs_rest', 'one_vs_one']))
        self.__gaussian_process = GPC()
コード例 #8
0
            
   # words = get_dict(smiles, save_path='D:/工作文件/work.Data/CNN/dict.json')
    
    features = []
    for i, smi in enumerate(tqdm(smiles)):
        xi = one_hot_coding(smi, words, max_len=600)
        if xi is not None:
            features.append(xi.todense())
    features = np.asarray(features)
    targets = np.asarray(targets)
    X_test=features
    Y_test=targets
   
  #  kernel = 1.0 * RBF(0.8)
    #model = RandomForestClassifier(n_estimators=10,max_features='auto', max_depth=None,min_samples_split=2, bootstrap=True)
    model = GPC(  random_state=111)
   
    # earlyStopping = EarlyStopping(monitor='val_loss', patience=0.05, verbose=0, mode='min')
    #mcp_save = ModelCheckpoint('C:/Users/sunjinyu/Desktop/FingerID Reference/drug-likeness/CNN/single_model.h5', save_best_only=True, monitor='accuracy', mode='auto')
  #  reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-4, mode='min')
    from tensorflow.keras import backend as K
    X_train = K.cast_to_floatx(X_train).reshape((np.size(X_train,0),np.size(X_train,1)*np.size(X_train,2)))

    Y_train = K.cast_to_floatx(Y_train)
    
#    X_train,Y_train = make_blobs(n_samples=300, n_features=n_features, centers=6)
    model.fit(X_train, Y_train)


 #   model = load_model('C:/Users/sunjinyu/Desktop/FingerID Reference/drug-likeness/CNN/single_model.h5')
    Y_predict = model.predict(K.cast_to_floatx(X_test).reshape((np.size(X_test,0),np.size(X_test,1)*np.size(X_test,2))))
コード例 #9
0
                        default=os.environ['SM_CHANNEL_TRAIN'])

    # args holds all passed-in arguments
    args = parser.parse_args()

    # Read in csv training file
    training_dir = args.data_dir
    train_data = pd.read_csv(os.path.join(training_dir, "train.csv"),
                             header=None,
                             names=None)

    # Labels are in the first column
    train_y = train_data.iloc[:, 0]
    train_x = train_data.iloc[:, 1:]

    # Define Gaussian Process Classifier and hyperparameter tuner
    gpc = GPC()
    model = GridSearchCV(
        estimator=gpc,
        n_jobs=3,
        verbose=10,
        param_grid={'kernel': [1.0 * RBF([1.0]), 1.0 * RBF([1.0, 1.0, 1.0])]})

    # Train model and select best performing set of hyperparameters by default
    model.fit(train_x, train_y)

    print('Best Parameters: ', model.best_params_)
    print('Best Estimator: ', model.best_estimator_)

    # Save the trained model
    joblib.dump(model, os.path.join(args.model_dir, "model.joblib"))
コード例 #10
0
X_train, X_test, y_train, y_test = train_test_split(select_X,
                                                    y1,
                                                    test_size=0.2,
                                                    random_state=0)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

#cross validation
param_dist = {'n_restarts_optimizer ': range(0, 10)}

cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
grid = GridSearchCV(GPC(random_state=0), param_grid=param_dist, cv=cv)
grid.fit(X_train, y_train.values.ravel())

best_estimator = grid.best_estimator_
print(best_estimator)

#nach cross validation bekommen wir best_estimator.
clf = best_estimator

print('the acuracy for all is:')
print(clf.score(X_test, y_test.values.ravel()))

prediction = clf.predict(X_test)
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, prediction))

print("Classification report:\n %s\n" %
コード例 #11
0
    def voting_svm(self):
        """Voting implementation of SVM for a unique epoch"""
        per_neuron_prediction = []

        """
        STRUCTURE:
        -> Key neurons
            -> Each epoch
                -> Number of tasks (~100)
                    -> Results for each neuron
        """

        # Choosing features
        # train data
        print("test")
        for neuron in self.features_index:    # for good neurons
            neuron_votes = []
            X_for_neuron = []
            for example in range(len(self.X_train)):     # for each of tasks
                X_for_neuron.append([self.X_train[example][self.epoch*self.neuron_num + neuron]])

            X_test = []
            for example in range(len(self.X_test)):     # for each of tasks
                X_test.append([self.X_test[example][self.epoch*self.neuron_num + neuron]])

            clf = GPC()
            # prediction on individual neuron
            clf.fit(X_for_neuron, self.y_train)
            # add predictions to data for each sample
            pred = clf.predict(X_test)


            neuron_votes.append(pred)
            per_neuron_prediction.append(neuron_votes)


        # test data
        accuracy = 0
        print(per_neuron_prediction[0])
        print(len(self.X_test))

        features_num = len(self.features_index)

        # check if voting legnth is even
        """
        if len(per_neuron_prediction)%2==0:
            del per_neuron_prediction[-1]
            features_num =- 1
        """

        print(per_neuron_prediction)

        # for each testing task per session per epoch
        for test_task in range(len(self.X_test)):
            # count the most number of votes as predicted by SVC
            # classifier per individual neuron
            temp_task = []
            for neuron in range(features_num):
                temp_task.append(per_neuron_prediction[neuron][0][test_task])
            vote_result = mode(temp_task)
            if vote_result == self.y_test[test_task]:
                accuracy += 1
            print("ACCURACY {}".format(accuracy/(test_task+1)))

        accuracy = accuracy/len(self.X_test)

        return accuracy
コード例 #12
0
def GenerateImage(center, img_pth, cat, prob, r=70.0):
    """
    use gaussian processes to visualize the classification and tumor extraction.
    
    Arguments: 
        - center : (numpy array) pixel-center coordinates
        - img_pth : (str) path to HE-image
        - cat : (numpy array) class assigned to each spot, same order as center
        - prob : (numpy array) probability vectors/matrix from classification
        - r: (float) radius of spot
        
    Output: 
        
        - simg : (PIL image) image with colors corresponding to each subtype
    
    """

    img = Image.open(img_pth)

    center = np.floor(center / r)
    center = center.astype(int)

    new_size = [np.round(x / r, 0).astype(int) for x in img.size]
    scaling_factor = [
        float(img.size[0]) / float(new_size[0]),
        float(img.size[1]) / float(new_size[1])
    ]

    rgb_values = dict(luma=(73, 216, 166),
                      lumb=(12, 138, 153),
                      her2lum=(255, 124, 148),
                      tnbc=(122, 172, 73),
                      her2nonlum=(255, 202, 79),
                      nontumor=(66, 66, 66))

    simg = np.ones(shape=(new_size[0], new_size[1], 3), dtype=np.uint8) * 255
    coordinates = []
    tmp_coordinates = np.array([[x, y] for x in range(0, simg.shape[0])
                                for y in range(0, simg.shape[1])])

    del img

    for ii in range(tmp_coordinates.shape[0]):
        if np.min(np.linalg.norm(center - tmp_coordinates[ii, :],
                                 axis=1)) <= 3.0:
            coordinates.append(tmp_coordinates[ii, :])
    coordinates = np.array(coordinates)

    gpc = GPC(kernel=None, optimizer='fmin_l_bfgs_b', n_restarts_optimizer=3)

    prob_adj = np.zeros((prob.shape[0], prob.shape[1] + 1))
    prob_adj[:, 0:prob.shape[1]] = prob
    prob_adj[cat == 'nontumor', 0:prob.shape[1]] = 0.0
    prob_adj[cat == 'nontumor', -1] = 1.0

    fitted_gpc = gpc.fit(center, prob_adj)
    res = fitted_gpc.predict(coordinates)
    res = np.argmax(res, axis=1)

    subtypes = np.array(['luma', 'lumb', 'her2nonlum', 'her2lum', 'tnbc'])
    l1 = LabelEncoder()
    l1.fit(subtypes)

    backmap = np.append(l1.transform(subtypes), subtypes.shape[0])
    subtypes = np.append(subtypes, 'nontumor')

    prob_to_cat = {
        backmap[ii]: subtypes[ii]
        for ii in range(subtypes.shape[0])
    }
    res = np.array(list(map(lambda x: prob_to_cat[x], res)))

    for ii in range(res.shape[0]):
        simg[coordinates[ii, 0], coordinates[ii, 1], :] = rgb_values[res[ii]]

    simg = Image.fromarray(simg, mode='RGB')
    simg = simg.transpose(method=Image.ROTATE_90)
    simg = simg.transpose(method=Image.FLIP_TOP_BOTTOM)
    simg = simg.resize((int(scaling_factor[0] * new_size[0]),
                        int(scaling_factor[1] * new_size[1])), Image.ANTIALIAS)

    return simg
コード例 #13
0
def bo_c(func,
         n_eval,
         n_init_eval,
         n_candidates,
         bounds,
         alpha=1e-4,
         save_dir=None):

    #    kernel = gp.kernels.Matern()
    kernel = gp.kernels.ConstantKernel(1.0, (1., 1.)) * gp.kernels.RBF(
        1.0, (1e-5, 1e5))
    gp_model = GPR(kernel=kernel,
                   alpha=alpha,
                   n_restarts_optimizer=100,
                   normalize_y=False)
    gpc_model = GPC(kernel=kernel, n_restarts_optimizer=100)

    dim = func.dim

    # Initial evaluations
    xs = lhs(dim, samples=n_init_eval, criterion='cm')
    xs = xs * (bounds[:, 1] - bounds[:, 0]) + bounds[:, 0]
    ys = func(xs)
    vs = func.is_feasible(xs)

    opt_idx = np.argmax(ys[vs])
    opt_x = xs[vs][opt_idx]
    opt_y = ys[vs][opt_idx]

    opt_ys = [opt_y]

    for i in range(n_init_eval, n_eval):

        ys_normalized = normalize(ys[vs])
        gp_model.fit(xs[vs], ys_normalized)
        f_prime = ys_normalized[opt_idx]
        acquisition_func = lambda x: expected_improvement(x, gp_model, f_prime)

        if np.any(vs) and np.any(np.logical_not(vs)):
            gpc_model.fit(xs, vs)
            constraint_proba_func = lambda x: constraint_proba(x, gpc_model)
            constraint_weighted_acquisition_func = lambda x: constraint_weighted_acquisition(
                x, acquisition_func, constraint_proba_func)
        else:
            constraint_weighted_acquisition_func = acquisition_func

        # Decide point to evaluate next
        n_candidates = 1000 * dim
        x = sample_next_point(dim,
                              constraint_weighted_acquisition_func,
                              bounds=bounds,
                              strict_bounds=True,
                              n_candidates=n_candidates)

        y = func(x)
        v = func.is_feasible(x)
        xs = np.append(xs, np.array(x, ndmin=2), axis=0)
        ys = np.append(ys, y)
        vs = np.append(vs, v)

        if v and y > opt_y:
            opt_idx = sum(vs) - 1

        opt_x = xs[vs][opt_idx]
        opt_y = ys[vs][opt_idx]
        opt_ys.append(opt_y)  # Best performance so far
        print('{}: x {} y {} v {} Best-so-far {}'.format(
            i + 1, x, y, v, opt_y))

    return opt_x, opt_ys
コード例 #14
0
 def __init__(self, **kwargs):
     super().__init__()
     self.classifier = GPC()
     self.kwargs = kwargs