def learn(X, Y):
    pca = None
    dictionary = None
    model = None

    # Data augmentation
    if DO_DATA_AUGMENTATION:
        print("Augmenting data")
        X, Y = transform_T(X, Y)
        print("Number of samples augmented to {}".format(X.shape[0]))

    # Dictionary learning
    if DO_DICTIONARY_LEARNING:
        dictionary = Dictionary(n_atoms=128, atom_width=16)
        if dictionary.weights_available:
            print("Loading dictionary")
            dictionary.load()
        else:
            print("Learning dictionary")
            tic = time.time()
            dictionary.fit(X)
            dictionary.save()
            print("Dictionary learned in {0:.1f}s".format(time.time() - tic))
        print("Getting dictionary representation")
        X = dictionary.get_representation(X)

    # PCA
    if DO_PCA:
        tic = time.time()
        print("Applying PCA")
        n_components = 100
        pca = PCA(n_components=n_components)
        X = pca.fit(X, scale=False)
        print("Variance explained: {:.2f}".format(np.sum(pca.e_values_ratio_)))
        print("PCA applied in {0:.1f}s".format(time.time() - tic))

    # Training
    print("Starting training")
    tic = time.time()
    model = KernelSVM(C=1, kernel='rbf')
    model.train(X, Y)
    print("Model trained in {0:.1f}s".format(time.time() - tic))

    return pca, dictionary, model
    def create(data_manager, components_limit=0):
        '''
        Creates statistical model by aligning models and performing PCA
        :param data_manager: data manager providing training data
        :param components_limit: Limit how much PCA components should be found (0 == all)
        :return: resulting PCA
        '''
        teeth = data_manager.get_all_teeth(True)
        mean_shape = deepcopy(teeth[0])
        assert isinstance(mean_shape, Tooth)
        mean_shape.move_to_origin()
        mean_shape.normalize_shape()

        error = float("inf")
        while error > 0.05:
            meanAcum = np.zeros(mean_shape.landmarks.shape)
            for i in range(0, len(teeth)):
                teeth[i].align(mean_shape)
                meanAcum += teeth[i].landmarks

            new_mean_shape = Tooth(meanAcum / len(teeth))
            new_mean_shape.align(mean_shape)
            error = new_mean_shape.sum_of_squared_distances(mean_shape)

            mean_shape = new_mean_shape

        # Realign all teeth with final mean shape
        for i in range(0, len(teeth)):
            teeth[i].align(mean_shape)

        data = np.zeros((len(teeth), teeth[0].landmarks.size))
        for i, tooth in enumerate(teeth):
            data[i, :] = tooth.landmarks.flatten()

        pca = PCA()
        pca.train(deepcopy(data), components_limit)
        return pca
Esempio n. 3
0
    def phase1(self, data, trueLabels):
        step1ResultsFolder = Path(self.config["resultsDir"]) / "phase1"
        step1ResultsFolder.mkdir(exist_ok=True, parents=True)

        # step 2 plot the original dataset (picked N_COMPONENTS dims with highest variance)
        stds = data.std(axis=0)
        dims = np.argsort(stds)[::-1][:N_COMPONENTS]
        Visualizer.labeledScatter3D(data[:, dims], trueLabels, path=step1ResultsFolder / f"{N_COMPONENTS}_dims_originalScatter.png")

        # step 3
        dataMean = np.mean(data, axis=0)
        print(f"Original data mean: {dataMean}")

        # step 4, 5, 6, 7
        pca = PCA(n_components=N_COMPONENTS, print_=True)
        reducedData = pca.fit_transform(data)

        # step 8
        Visualizer.labeledScatter3D(reducedData, trueLabels, path=step1ResultsFolder / f"{N_COMPONENTS}_dims_pcaScatter.png")

        # step 9
        reconstructedData = pca.inverse_transform(reducedData)
        Visualizer.labeledScatter3D(reconstructedData[:, dims], trueLabels, path=step1ResultsFolder / f"{N_COMPONENTS}_dims_reconstructedScatter.png")
        return reducedData
Esempio n. 4
0
from src.pca import PCA
# custom One-Versus-Rest SVM
from src.ovr import OVR

SHAPE = (46, 56)

M = 121
standard = False

data = fetch_data(ratio=0.8)

X_train, y_train = data['train']

D, N = X_train.shape

pca = PCA(n_comps=M, standard=standard)

W_train = pca.fit(X_train)

X_test, y_test = data['test']
I, K = X_test.shape

W_test = pca.transform(X_test)

params = {'C': 1, 'gamma': 2e-4, 'kernel': 'linear'}

ovr = OVR(**params)
ovr.fit(W_train, y_train)

y_hat = ovr.predict(W_test[::-1]).ravel()
Esempio n. 5
0
    # return features and labels
    return unsupervisedFeatures, y


def parseArguments():
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--jsonConfig", type=Path, required=True)
    return parser.parse_args()


if __name__ == "__main__":
    args = parseArguments()
    with open(args.jsonConfig, 'r') as f:
        config = json.load(f)

    step1ResultsFolder = Path(config["resultsDir"]) / "phase2"
    step1ResultsFolder.mkdir(exist_ok=True, parents=True)
    data, trueLabels = loadArffFile(Path(config["path"]))
    data = data.to_numpy()

    pca = PCA(data.shape[1])
    reducedData = pca.fit_transform(data)
    print(len(pca.varianceRatios))
    print(f"Explained Ratio Variance {np.round(pca.varianceRatios, 2)}")
    #print(f"Explained Ratio Variance {np.round(pca.explained_variance_ratio_, 2)}")
    #print(f"Sum {np.sum(pca.explained_variance_ratio_)}")
    #print(f"Mean {pca.mean_}")
    #print(f"Variance {ipca.var_}")
    #0print(f"Noise Variance {pca.noise_variance_}")
    #Visualizer.labeledScatter3D(reducedData, trueLabels, path=step1ResultsFolder / f"caca2_dims_pcaScatter.png")
Esempio n. 6
0
# prettify plots
plt.rcParams['figure.figsize'] = [12.0, 9.0]
sns.set_palette(sns.color_palette("muted"))
_palette = sns.color_palette("muted")
sns.set_style("ticks")

M = 121
standard = True

data = fetch_data(ratio=0.8)

X_train, y_train = data['train']

D, N = X_train.shape

pca = PCA(n_comps=M, standard=standard)

W_train = pca.fit(X_train)

X_test, y_test = data['test']
I, K = X_test.shape

W_test = pca.transform(X_test)

params = {'gamma': 2e-4, 'kernel': 'linear'}

fine = 5

# validate OVR
C_ovr = np.logspace(-5, 10, fine)
accuracy_ovr = []