예제 #1
0
    def _pca_legislaturas(self):
        """Wheel analysis by principal components legislature.

        Returns a dictionary where the keys are the ids of the legislaturas and the value
        of each key is a vector with n dimensions of the pca analysis"""

        if not self.pca_legislaturas:
            if not self.vetores_votacao:
                self._inicializa_vetores()
            ilnn = self._lista_de_indices_de_legislaturas_nao_nulas()
            matriz = self.vetores_votacao

            # Excludes missing legislaturas in all voting period
            matriz = matriz[ilnn, :]

            # Centralyses datas
            matriz = matriz - matriz.mean(axis=0)

            # DO the pca
            self.pca = pca.PCA(matriz, fraction=1)
            self._preenche_pca_de_legislaturas_nulas(ilnn)
            logger.info("PCA terminada com sucesso. ini=%s, fim=%s" %
                        (str(self.ini), str(self.fim)))

        # Create dictionary to be returned:
        dicionario = {}
        for legislatura, vetor in zip(self.legislaturas, self.pca.U):
            dicionario[legislatura.id] = vetor
        return dicionario
예제 #2
0
def train_model(slice_data, k, lr, random=True):
    global test_set
    global holdout_set
    global train_set
    global epochs
    percent_correct = []
    slice_data = bucket(slice_data, 6)
    for i in range(5):
        test_start_i = 2*i
        test_end_i = test_start_i + 2
        test_set = unbucket(slice_data[test_start_i:test_end_i], 6)
        remaining = slice_data[:test_start_i] + slice_data[test_end_i:]
        rand.shuffle(remaining)
        train_set = unbucket(remaining[:6], 6)
        holdout_set = unbucket(remaining[6:], 6)
        

        p_c_a = pca.PCA(k)
        p_c_a.fit(np.array([image for image,_ in train_set]))
        train_set = append_one([(p_c_a.transform(np.array(image)), label) for image, label in train_set])
        holdout_set = append_one([(p_c_a.transform(np.array(image)), label) for image, label in holdout_set])
        test_set = append_one([(p_c_a.transform(np.array(image)), label) for image, label in test_set])

        w = batch_gd_sm(epochs, lr, p_c_a.k + 1)
        visualize(p_c_a, np.array(w))
        percent_correct.append(sum([correct_category_sm(x, w) for x in test_set])/len(test_set))
    return percent_correct
예제 #3
0
    def pca(self, keep=None, center=False, weight=True):
        '''
        Performss principal component analysis on data field, and stores
        a PCA object. Please, remove climatology, detrend data etc before
        calling this method. 

        If center=True, PCA object will center data using mean and standard deviation.
        If weight=True, multiply data by area weights.
        '''

        nt, km, jm, im = self.data.shape

        # multiply data by area factor, reshape, return matrix
        if weight:
            factor = sp.cos(sp.deg2rad(self.grid['lat']))
            factor[factor < 0.] = 0.
            factor = sp.sqrt(factor)
        else:
            factor = sp.ones(self.grid['lat'].shape)
        mask = sp.ma.getmaskarray(self.data).copy()
        self.data[mask] = 0.0
        self.data *= factor[sp.newaxis, sp.newaxis]
        X = self.data.reshape((nt, km * jm * im)).view(sp.ndarray)

        self._pc = pca.PCA(X, center=center, keep=keep)

        self.data /= factor[sp.newaxis, sp.newaxis]
        self.data[mask] = self.data.fill_value
        self.data.mask = mask
    def fit_transform(self, X, epochs, optimizer):
        '''
        Parameters
        ----------
        X : shape (n_samples, n_features)
            Training data
        epochs : The number of epochs
        optimizer : Optimize algorithm, see also optimizer.py

        Returns
        -------
        s : shape (n_samples, n_features)
            Predicted source per sample.
        '''
        n_samples, n_features = X.shape

        pca_model = pca.PCA(n_features, True)
        X_whiten = pca_model.fit_transform(X)

        self.__W = np.random.rand(n_features, n_features)

        for _ in range(epochs):
            g_W = np.zeros_like(self.__W)
            for x in X_whiten:
                g_W += (1 - 2 * scipy.special.expit(self.__W.dot(x.T))
                        ).dot(x) + np.linalg.inv(self.__W.T)
            g_W /= n_samples

            g_W = optimizer.optimize([g_W])[0]
            self.__W += g_W

        return X_whiten.dot(self.__W)
예제 #5
0
def TestLaws(mgnames, NJ=100):

    # create laws filters
    filts = texture.BuildLawsFilters()
    # allocate for jets
    NI = len(mgnames)  # number of images
    jets = np.zeros((NJ * NI, 25))
    # for each image
    for i in xrange(NI):
        # load
        # correlate
        #corrs = BruteCorrelate( data, filts )
        data = mgnames[i] + 0
        corrs = map(lambda x: correlate2d(data, x), filts)
        for j in range(25):
            corrs[i] = cspline2d(abs(corrs[i]), 200)
        corrs = np.array(corrs)
        # extract random jets
        V, H = data.shape
        vs = range(V)
        hs = range(H)
        np.random.shuffle(vs)
        np.random.shuffle(hs)
        for j in range(NJ):
            jets[i * NJ + j] = corrs[:, vs[j], hs[j]]
    # k-means clustering
    clust, mmb = kmeans.KMeans(NI, jets)
    #return jets
    cffs, evecs = pca.PCA(clust, 3)
    cffs = pca.Map2PCA(clust, evecs)
    gnu.Save('Laws_results.txt', cffs)
    return clust, cffs
예제 #6
0
def Train(brodatz_f, brodatz_256_mgs):

    #A container function that can be imported or run with user created arrays and filenames.
    #Coded Just for convenience incase of code testing!!

    qvecs = map(lambda x: basis.GaborCorr(x), brodatz_256_mgs)
    qvecs = np.array(qvecs)
    cffs, evecs = pca.PCA(qvecs, 3)
    cffs = pca.Map2PCA(qvecs, evecs)
    vecs_num = 0
    for name in brodatz_f:
        plot_vecs = cffs[vecs_num:vecs_num + 4]
        vecs_num = vecs_num + 4
        gnu.Save('graph_output\\' + name[0:-4] + '.txt', plot_vecs)

    for i in brodatz_f:
        print i + '\n'

    ss = 'unset key\n'
    ss += 'splot for [i=1:112] \'graph_output\\D\'.i.\'.txt\'\n'

    fp1 = open('plot_brodatz_256_textures.txt', 'w')
    fp1.write(ss)
    fp1.flush()
    return brodatz_f, cffs
예제 #7
0
def use_pca(dataset):
    dataset_rest = []
    dataset_main = []
    for road in dataset:
        pca_obj = pca.PCA(road, 2)
        dataset_rest.append(pca_obj.rest_x)
        dataset_main.append(pca_obj.main_x)
    return dataset_main, dataset_rest
예제 #8
0
 def train(self):
     train_pca = pca.PCA(self.pcs)
     train_pca.center(self.train_data)
     train_pca.svd()
     self.train_features = np.transpose(train_pca.compress(self.train_data))
     LREG = l_reg.linear_regression(self.train_features)
     self.w_opt = LREG.optimize()
     self.targets = LREG.targets
예제 #9
0
def train_model(slice_data, k, lr, random=True):
    """
    Performs 5 runs to create 5 different models, using a different train/holdout/test split each time
    param: slice_data: data to slice up
    param: k: number of principle components
    param: lr: learning rate
    param: random: boolean that determines if holdout set should be chosen randomly, default is True
    return: average of all 5 runs percent correct based on test set
    """
    global test_set
    global holdout_set
    global train_set
    percent_correct = []
    for i in range(5):
        holdout_set = []
        test_starting_index = (i * 4)
        test_ending_index = test_starting_index + 4
        test_set = slice_data[test_starting_index:test_ending_index]
        remaining = slice_data[:test_starting_index] + slice_data[
            test_ending_index:]
        for _ in range(2):
            # Either chooses random index or a set index
            # This is used when you want to not use the data itself as a factor when testing just the effects of changing the learning rate
            rand_index = rand.randint(0,
                                      len(remaining) - 1) if random else i * 2
            holdout_set += [remaining[::2][rand_index // 2]
                            ] + [remaining[1::2][rand_index // 2]]
            del remaining[(rand_index if rand_index % 2 == 0 else rand_index -
                           1):(rand_index + 2 if rand_index %
                               2 == 0 else rand_index + 1)]
        train_set = remaining

        p_c_a = pca.PCA(k)
        p_c_a.fit(np.array([image for image, _ in train_set]))
        train_set = append_one([(p_c_a.transform(np.array(image)), label)
                                for image, label in train_set])
        holdout_set = append_one([(p_c_a.transform(np.array(image)), label)
                                  for image, label in holdout_set])
        test_set = append_one([(p_c_a.transform(np.array(image)), label)
                               for image, label in test_set])
        # Only shuffles if order of images can be a factor
        # Turned off random when only testing learning rate effect
        if random:
            rand.shuffle(train_set)

        w = batch_gradient_descent(epochs, lr, p_c_a.k + 1)
        percent_correct.append(
            sum([
                correct_category(w, image, 1 if c1 in label else 0)
                for image, label in test_set
            ]) / len(test_set))
    return percent_correct
예제 #10
0
파일: tools.py 프로젝트: zzygyx9119/glint
def low_rank_approximation(O, k):
    """
    O dimensions are n X m
    """
    import time
    a = time.time()
    pca_out = pca.PCA(O)
    b = time.time()
    res = dot(pca_out.P[:, 0:k], pca_out.U[:, 0:k].transpose())
    c = time.time()
    logging.debug("PCA TOOK %s SECONDS AND DOT(MULTI) TOOK %s SECONDS" %
                  (b - a, c - b))
    return res
예제 #11
0
 def _pca_partido(self):
     """Roda a análise de componentes principais por partidos.
     Guarda o resultado em self.pca
     Retorna um dicionário no qual as chaves são as siglas dos partidos
     e o valor de cada chave é um vetor com as n dimensões da análise pca"""
     if not bool(self.pca_partido):
         if self.vetores_votacao == []:
             self._inicializa_vetores()
         matriz = self.vetores_votacao - self.vetores_votacao.mean(axis=0)
         self.pca_partido = pca.PCA(matriz)
     dicionario = {}
     for partido, vetor in zip(self.partidos, self.pca_partido.U):
         dicionario[partido.nome] = vetor
     return dicionario
예제 #12
0
    def _pca_partido(self):
        """Roda a análise de componentes principais por partido.

        Guarda o resultado em self.pca
        Retorna um dicionário no qual as chaves são as siglas dos partidos
        e o valor de cada chave é um vetor com as n dimensões da análise pca
        """
        # Fazer pca, se ainda não foi feita:
        if not self.pca_partido:
            if self.vetores_votacao == None or len(self.vetores_votacao) == 0:
                self._inicializa_vetores()
            # Partidos de tamanho nulo devem ser excluidos da PCA:
            ipnn = []  # lista de indices dos partidos nao nulos
            ip = -1
            for p in self.partidos:
                ip += 1
                if self.tamanhos_partidos[p.nome] != 0:
                    ipnn.append(ip)

            matriz = self.vetores_votacao
            matriz = matriz[ipnn, :]  # excluir partidos de tamanho zero
            # Centralizar dados:
            matriz = matriz - matriz.mean(axis=0)
            # Fazer pca:
            self.pca_partido = pca.PCA(matriz, fraction=1)
            # Recuperar partidos de tamanho nulo, atribuindo zero em
            # em todas as dimensões no espaço das componentes principais:
            U2 = self.pca_partido.U.copy()  # Salvar resultado da pca em U2
            self.pca_partido.U = numpy.zeros(
                (len(self.partidos), self.num_votacoes))
            ip = -1
            ipnn2 = -1
            for p in self.partidos:
                ip += 1
                if ip in ipnn:  # Se este partido for um partido não nulo
                    ipnn2 += 1
                    cpmaximo = U2.shape[1]
                    # colocar nesta linha os valores que eu salvei antes em U2
                    self.pca_partido.U[ip, 0:cpmaximo] = U2[ipnn2, :]
                else:
                    self.pca_partido.U[ip, :] = numpy.zeros(
                        (1, self.num_votacoes))
            logger.info("PCA terminada com sucesso. ini=%s, fim=%s" %
                        (str(self.ini), str(self.fim)))

        # Criar dicionario a ser retornado:
        dicionario = {}
        for partido, vetor in zip(self.partidos, self.pca_partido.U):
            dicionario[partido.nome] = vetor
        return dicionario
예제 #13
0
    def classify(self):
        test_pca = pca.PCA(self.pcs)
        test_pca.center(self.test_data)
        test_pca.svd()
        self.test_features = np.transpose(test_pca.compress(self.test_data))
        self.train_classifications = []
        self.test_classifications = []

        #looping through the features
        for i in range(0, self.test_features.shape[0]):
            self.test_classifications.append(
                np.matmul(self.w_opt, self.test_features[i]))
            self.train_classifications.append(
                np.matmul(self.w_opt, self.train_features[i]))
예제 #14
0
 def _pca_uf(self):
     """Roda a análise de componentes principais por UF.
     Guarda o resultado em self.pca
     Retorna um dicionário no qual as chaves são as siglas dos partidos
     e o valor de cada chave é um vetor com as n dimensões da análise pca"""
     if not bool(self.pca_uf):
         if self.vetores_votacao_uf == []:
             self._inicializa_vetores_uf()
         matriz = self.vetores_votacao_uf - self.vetores_votacao_uf.mean(
             axis=0)
         self.pca_uf = pca.PCA(matriz)
     dicionario = {}
     for uf, vetor in zip(self.lista_ufs, self.pca_uf.U):
         dicionario[uf] = vetor
     return dicionario
예제 #15
0
파일: serve.py 프로젝트: m4burns/glove
def getpcanns(word):
    response.content_type = 'application/json'
    nnv = n.nnv(n.vec(word), k=getk(request))
    words = n.words(nnv.indices)
    vecs = n.index_w(nnv.indices)
    pca, eigval, eigvec = p.PCA(vecs)
    pca -= pca.mean(axis=0)
    pca /= np.abs(pca).max(axis=0)
    # TODO why are there NaNs in the word list??
    res = [{
        'w': word if word == word else '',
        'p': pca_vec.tolist(),
        'd': float(val)
    } for word, pca_vec, val in zip(words, pca, nnv.values)]
    return json.dumps(res)
예제 #16
0
    def test_init(self):
        testDataArray = np.array([[1, 4.5, 4, 7],
                                  [2, 7.3, 5, 8],
                                  [3, 1.2, 9, 9]])
        testK = 2

        testP = pca.PCA(testDataArray, testK)
        #testPB = pca.PCABig("testDataFile.csv", "transformed.bin", testK)

        # Test Array Equality
        npt.assert_array_max_ulp(testP.data, testDataArray, maxulp = 0.)
        #npt.assert_array_max_ulp(testPB.data, "testDataFile.csv", maxulp = 0.)

        # Test requested dimension equality
        self.assertTrue(testP.k == testK)
예제 #17
0
    def __init__(self, d, pop, numOfGenerations, a, r, q_min, q_max, lower_bound, upper_bound, function, use_pca=True, levy=False, seed=0, alpha=1, gamma=1):
        # Number of dimensions
        self.d = d
        # Population size
        self.pop = pop
        # Generations
        self.numOfGenerations = numOfGenerations
        # Loudness and alpha parameter (0 < a < 1)
        self.A = np.array([a] * pop)
        self.alpha = alpha
        # Pulse rate and gamma parameter (y > 0)
        self.R = np.array([r] * pop)
        self.gamma = gamma
        # (Min/Max) frequency
        self.Q = np.zeros(self.pop)
        self.q_min = q_min
        self.q_max = q_max
        # Domain bounds
        self.lower_bound = lower_bound
        self.upper_bound = upper_bound

        self.levy = levy
        self.use_pca = use_pca
        if use_pca:
            self.PCA = pca.PCA()

        # Initialise fitness and solutions
        self.f_min = np.inf
        self.solutions = np.zeros((self.pop, self.d))
        self.pop_fitness = np.zeros(self.pop)  # fitness of population
        self.best = np.zeros(self.d)  # best solution

        # Random number generator
        self.rng = np.random.default_rng(seed)

        # Velocity
        self.V = np.zeros((self.pop, self.d))

        # Optimisation/fitness function
        self.func = function

        # History (for plots)
        self.best_history = []
        self.min_val_history = []
        self.loudness_history = []
        self.pulse_rate_history = []
        self.frequency_history = []
예제 #18
0
def semelhanca_pca(vetores):
    """Calcula as semelhanças partidárias gerando um gráfico bidimensional 
    Isto é feito com a Análise de Componentes Principais (PCA)
    Argumentos:
    vetores -- uma lista de listas, em que cada lista é um vetor de votações de um partido
    Retorna:
    Uma lista em que a i-ésima posição representa a coordenada bidimensional do partido 
    cujo vetor de votações era a i-ésima lista do argumento vetores
    """
    #PCA: linhas são amostras e colunas variáveis
    # vamos fazer linhas = partidos e colunas = votações
    # devemos também centralizar os valores
    # como todos os valores \in [0,1], não precisamos ajustar a escala
    matriz = numpy.array(vetores)
    matriz -= matriz.mean(axis=0)  # centralização
    p = pca.PCA(matriz)
    return p
예제 #19
0
def main():
    # prepare sample data and target variable
    wine_data = WineData()
    X = wine_data.X
    y = wine_data.y

    # split sample data into training data and test data and standardize them
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=0,
                                                        stratify=y)
    sc = StandardScaler().fit(X_train)
    X_train_std = sc.transform(X_train)
    X_test_std = sc.transform(X_test)

    pca_transformers = [pca.PCA(n_components=2), PCA(n_components=2)]
    for pca_transformer in pca_transformers:
        # execute PCA
        X_train_pca = pca_transformer.fit_transform(X_train_std)

        # show principal components and explained variance
        print('principal components:\n', pca_transformer.components_)
        print('explained variance:', pca_transformer.explained_variance_)
        plot_features(X_train_pca, y_train, xlabel='PC1', ylabel='PC2')

        # fit classifier and plot decigion regions
        classifier = LogisticRegression(C=100.0,
                                        random_state=1,
                                        solver='liblinear',
                                        multi_class='ovr').fit(
                                            X_train_pca, y_train)
        X_test_pca = pca_transformer.transform(X_test_std)
        print('score: ', classifier.score(X_test_pca, y_test))
        plot_decision_regions(X_test_pca,
                              y_test,
                              classifier=classifier,
                              xlabel='PC1',
                              ylabel='PC2')
예제 #20
0
    def _pca_legislaturas(self):
        """Roda a análise de componentes principais por legislatura.

        Retorna um dicionário no qual as chaves são os ids das legislaturas
        e o valor de cada chave é um vetor com as n dimensões da análise pca
        """
        if not self.pca_legislaturas:
            if not self.vetores_votacao:
                self._inicializa_vetores()
            ilnn = self._lista_de_indices_de_legislaturas_nao_nulas()
            matriz = self.vetores_votacao
            # exclui legislaturas ausentes em todas as votações do período
            matriz = matriz[ilnn, :]
            matriz = matriz - matriz.mean(axis=0)  # centraliza dados
            self.pca = pca.PCA(matriz, fraction=1)  # faz o pca
            self._preenche_pca_de_legislaturas_nulas(ilnn)
            logger.info("PCA terminada com sucesso. ini=%s, fim=%s" %
                        (str(self.ini), str(self.fim)))
        # Criar dicionario a ser retornado:
        dicionario = {}
        for legislatura, vetor in zip(self.legislaturas, self.pca.U):
            dicionario[legislatura.id] = vetor
        return dicionario
예제 #21
0
    def compute(self, waveforms, sampling_rate=None, n_pca_comp=8, sign='-'):
        """Return concatenation of PCA and OnlyMax features.
        
        OnlyMax is first component.
        """
        # shape (N_spikes, n_pca_comp)
        a1 = pca.PCA().compute(waveforms, sampling_rate, output_dim=n_pca_comp)

        # shape (N_spikes, trodness)
        #a2 = onlymax.OnlyMax().compute(waveforms, sampling_rate)
        print sign
        if sign == '-':
            a2 = np.min(waveforms, axis=2)
        elif sign == '+':
            a2 = np.max(waveforms, axis=2)
        elif sign == 'either':
            minny = np.min(waveforms, axis=2)
            maxxy = np.max(waveforms, axis=2)
            a2 = np.where(maxxy > np.abs(minny), maxxy, minny)
        elif sign == 'abs(either)':
            a2 = np.max(np.abs(waveforms), axis=2)

        return np.concatenate([a2, a1], axis=1)
예제 #22
0
    def _pca_partido(self):
        """Roda a análise de componentes principais por partido.

        Guarda o resultado em self.pca
        Retorna um dicionário no qual as chaves são as siglas dos partidos
        e o valor de cada chave é um vetor com as n dimensões da análise pca
        """
        if not self.pca_partido:
            if self.vetores_votacao == None or len(self.vetores_votacao) == 0:
                self._inicializa_vetores()
            ipnn = self._lista_de_indices_de_partidos_naos_nulos()
            matriz = self.vetores_votacao
            matriz = matriz[ipnn, :]  # exclui partidos de tamanho zero
            matriz = matriz - matriz.mean(axis=0)  # centraliza dados
            self.pca_partido = pca.PCA(matriz, fraction=1)  # faz o pca
            self._preenche_pca_de_partidos_nulos(ipnn)
            logger.info("PCA terminada com sucesso. ini=%s, fim=%s" %
                        (str(self.ini), str(self.fim)))
        # Criar dicionario a ser retornado:
        dicionario = {}
        for partido, vetor in zip(self.partidos, self.pca_partido.U):
            dicionario[partido.nome] = vetor
        return dicionario
예제 #23
0
def similarity_pca(vetores):
    """Calculates similarities party generating a two-dimensional graph
    This is done with the Principal Component Analysis (PCA)
    arguments:
    vectors - a list of lists, where each list is a vector of voting for a party
    returns:
    A list where the ith position represents the two-dimensional coordinate of the party
    whose vector voting was the i-th argument list of vectors."""

    # PCA: lines are samples. Columns are variable 
    # We do: linhas = partidos and colunas = votações
    # Should centralize values.
    # As all values ​​\ in [0,1], we need not to scale.
    #
    # Receive a list of lists of vectors of voting of a party
    matriz =  numpy.array(vetores)

    # Centralization:
    matriz -= matriz.mean(axis=0)

    # Receives centering matrix with PCA
    p = pca.PCA(matriz)

    return p
예제 #24
0
def pca_model():
    return pca.PCA(faces.IMAGES).run()
예제 #25
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    parser = argparse.ArgumentParser(description='Does principal component '
                                     'analysis on the penny colors')
    parser.add_argument('image',
                        type=str,
                        help='Image filename used to '
                        'generate histograms (not actually read)')
    args = parser.parse_args(argv)

    histogram_file_in = args.image + '_hist.csv'
    colors = np.array(penny_r.get_colors(histogram_file_in, rounded=False))

    pcao = pca.PCA(colors, fraction=1)

    #import pdb;pdb.set_trace()
    #print p
    # principal component vectors = pcao.Vt
    # RGB triplets in principal components = pcao.pc()
    penny_pc = pcao.pc()

    title_format = ':^10s'
    stat_format = ':10.2f'
    stat_titles = 'min', 'q1', 'median', 'mean', 'q3', 'max'
    stat_funcs = min, lambda a: sps.scoreatpercentile(
        a, 25), np.median, np.mean, lambda a: sps.scoreatpercentile(a, 75), max
    assert len(stat_titles) == len(stat_funcs)
    print ' | '.join(['{' + title_format + '}'] *
                     len(stat_titles)).format(*stat_titles)
    for pc_set in penny_pc.T:
        print ' | '.join(['{' + stat_format + '}'] * len(stat_funcs)).format(
            *[f(pc_set) for f in stat_funcs])

    # find image bounds to synthesize
    im_pc1_bounds = [min(penny_pc.T[0]), max(penny_pc.T[0])]
    im_pc1_range = im_pc1_bounds[1] - im_pc1_bounds[0]
    im_pc1_bounds[0] = im_pc1_bounds[0] - 0.25 * im_pc1_range
    im_pc1_bounds[1] = im_pc1_bounds[1] + 0.25 * im_pc1_range

    im_pc2_bounds = [min(penny_pc.T[1]), max(penny_pc.T[1])]
    im_pc2_range = im_pc2_bounds[1] - im_pc2_bounds[0]
    im_pc2_bounds[0] = im_pc2_bounds[0] - 0.25 * im_pc2_range
    im_pc2_bounds[1] = im_pc2_bounds[1] + 0.25 * im_pc2_range

    im_pc1_ax = np.arange(*[round(x) for x in im_pc1_bounds])
    im_pc2_ax = np.arange(*[round(x) for x in im_pc2_bounds])
    im_pc1_AX, im_pc2_AX = np.meshgrid(im_pc1_ax, im_pc2_ax)

    im = np.empty(im_pc1_AX.shape + (3, ))
    for i, row in enumerate(im):
        for j, pxl in enumerate(row):
            im[i, j] = pcao.Vt[0] * im_pc1_ax[j] + pcao.Vt[1] * im_pc2_ax[i]
            im[i, j] = im[i, j] / 255

    fig, ax = plt.subplots()
    ax.plot(penny_pc.T[0],
            penny_pc.T[1],
            linestyle='none',
            marker='o',
            color='lime',
            markersize=10,
            alpha=0.5)
    ax.imshow(im,
              extent=[
                  min(im_pc1_ax),
                  max(im_pc1_ax),
                  min(im_pc2_ax),
                  max(im_pc2_ax)
              ])
    ax.set_aspect('auto')
    ax.set_title('Penny Color Space')
    ax.set_xlabel('PC1 (1 unit = {} RGB units, of 255)'.format(', '.join(
        '{:0.3f}'.format(v) for v in pcao.Vt[0])))
    ax.set_ylabel('PC2 (1 unit = {} RGB units, of 255)'.format(', '.join(
        '{:0.3f}'.format(v) for v in pcao.Vt[1])))

    f2, ax2 = plt.subplots()
    pc1 = penny_pc.T[0]
    ax2.hist(pc1, bins=10, normed=True)
    norm_mean, norm_sd = sps.norm.fit(pc1)
    x = np.linspace(norm_mean - 4 * norm_sd, norm_mean + 4 * norm_sd, 100)
    y = sps.norm.pdf(x, norm_mean, norm_sd)
    ax2.autoscale(False, axis='x')
    ax2.plot(x, y, color='red', lw=5)
    ax2.set_title('Penny PC1 color distribution')
    ax2.set_xlabel('PC1')
    ax2.set_ylabel('Frequency')

    #print 'norm: ', sps.kstest(pc1, lambda x: sps.norm.cdf(x, *sps.norm.fit(pc1)))

    #spo.leastsq(lambda p, x: skew.skew(x, *p) - pc1, [0.5]*3, ())
    #print 'norm: ', sps.kstest(pc1, lambda x: sps.norm.cdf(x, *sps.norm.fit(pc1)))
    #print 'norm: ', sps.kstest(pc1, lambda x: sps.norm.cdf(x, *sps.norm.fit(pc1)))

    fig.savefig('fig.png')
    f2.savefig('fig2.png')
예제 #26
0
    digits_train), utils.get_hole_features(digits_test)
pix_train, pix_test = utils.get_pix_features(
    digits_train), utils.get_pix_features(digits_test)
X_train, X_test = np.hstack([pix_train,
                             holes_train]), np.hstack([pix_test, holes_test])

mean_normalizer = utils.normalization(X_train)
X_train = mean_normalizer.transform(X_train)
X_test = mean_normalizer.transform(X_test)

mx_score = 0
best = (-1, -1)
clf = knn.KNN(mode='weighted')
for n_component in range(3, 61, 3):
    for k in range(1, 11):
        _pca = pca.PCA(X_train)
        X_train_reduced = _pca.transform(X_train, n_component)
        X_test_reduced = _pca.transform(X_test, n_component)

        start_time = timeit.default_timer()
        validation_scores = []
        kf = KFold(n_splits=10)
        for t_idx, v_idx in kf.split(X_train_reduced):
            X_train_T, X_train_V = X_train_reduced[t_idx], X_train_reduced[
                v_idx]
            y_train_T, y_train_V = y_train[t_idx], y_train[v_idx]
            clf.fit(X_train_T, y_train_T)
            validation_score = clf.score(X_train_V, y_train_V, k)
            validation_scores.append(validation_score)
        avg_val_score = np.mean(validation_scores)
        if avg_val_score > mx_score:
예제 #27
0
import numpy as np
import struct
import matplotlib.pyplot as plt
import pylab
import pca
import tools
import linear

train_data = tools.get_train_data()
train_label = tools.get_train_label()

dimension = 50

train_data, eigenVectors = pca.PCA(train_data, dimension)
print(np.array(train_data).shape)

num1 = 1
num2 = 9

# 10 classes
classified_data = []
for i in range(10):
    classified_data.append([])

for i in range(len(train_label)):
    # print(train_label[i][0])
    classified_data[train_label[i][0]].append(train_data[i])

train_data1 = classified_data[num1]
train_data2 = classified_data[num2]
예제 #28
0
import csv

import matplotlib.pyplot as plt
import numpy as np

import pca


# データ読み込み
Xy = []
with open("winequality-red.csv") as fp:
    for row in csv.reader(fp, delimiter=";"):
        Xy.append(row)
Xy = np.array(Xy[1:], dtype=np.float64)
X = Xy[:, :-1]

# 学習
model = pca.PCA(n_components=2)
model.fit(X)

# 交換
Y = model.transform(X)

# 描画
plt.scatter(Y[:, 0], Y[:, 1], color="k")
plt.show()

예제 #29
0
#               'i1171n','m1166r','r1192p','r1275q','t1151m','y1278s']
mutation_list = ['a1200v', 'd1270g', 'd1349h', 'g1286r', 'r1231q']
#mutation_list=['v1229m'] #fix this one
directory = '../40ns-dry/'
num_frames = 2000  #want 40 ns worth of trajectory

for mutation in mutation_list:
    print "\nanalyzing {0} system".format(mutation)
    psf = '{0}alk.dry.{1}.psf'.format(directory, mutation)
    dcd = '{0}alk.40ns.dry.{1}.dcd'.format(directory, mutation)
    #    psf="{0}alki.{1}.ions.psf".format(directory,mutation)
    #    dcd="{0}alki.{1}.nvt-constrain.dcd".format(directory,mutation)
    out_file_name = "{0}_peter".format(mutation)
    PCA = pca.PCA(
        psf,
        dcd,
        align_selection=
        "name CA and not ((resid 1125:1129) or (resid 1157:1174) or (resid 1271:1292))",
        pca_selection="name CA",
        outname=out_file_name,
        last_frame=num_frames)
    PCA.make_covariance()
    PCA.write_data()
"""
    analysis=jma.analyzer(psf,dcd,out_file=out_file_name,last_frame=num_frames)
    analysis.get_rms()
    analysis.get_sasa()
    analysis.get_salts()
    analysis.get_hbonds()
"""
예제 #30
0
############Program Start#######################
fnames = ReadFiles('data')
mgs = map(lambda x: basis.LoadGray(x), fnames)

nmgs = []
for i in range(len(mgs)):
    nm = ip.Plop(mgs[i], 512, 512)
    nmgs.append(nm)

fids = map(lambda x: basis.MakeGrid(x), nmgs)
fids = np.array(fids)
morph = warp.Morph(nmgs[1], fids[1], nmgs[2], fids[2], NI=10)
Imageset = []
Imageset.append(nmgs[1])

for i in morph:
    Imageset.append(i)
Imageset.append(nmgs[2])

for i in range(len(Imageset)):
    nm = 'output\\morph_' + str(i) + '.png'
    smisc.imsave(nm, Imageset[i])

clust, cffs = TestLaws(Imageset)

qvecs, a = TestHaar(Imageset)
qvecs = qvecs.sum(3).sum(2)
cffs, evecs = pca.PCA(qvecs, 3)

cffs = pca.Map2PCA(qvecs, evecs)
gnu.Save('Haar_results.txt', cffs)