예제 #1
0
    def doStuff(self):

        self.readWFDEIOutput()
        self.makeArray()
            
        pca = PCA( self.array)

        print( pca.mu)

        print( pca.fracs)        

        out = pca.project(self.array,minfrac=0.1)

        print( out.shape)

        plt.subplot(1,3,1)

        plt.plot( out[:,0],out[:,1], 'k+')

        plt.subplot(1,3,2)
        plt.plot( out[:,0],out[:,2], 'k+')

        plt.subplot(1,3,3)
        plt.plot( out[:,1],out[:,2], 'k+')


        plt.show()
예제 #2
0
def plot(val_fn, pts_fn, output_fn):
    points = []
    with open(pts_fn) as fp:
        for line in fp.xreadlines():
            points.append(map(float, line.split()))

    values = []
    with open(val_fn) as fp:
        for line in fp.xreadlines():
            values.append(float(line.split()[1]))

    xx = [pt[0] for pt in points]
    yy = [pt[1] for pt in points]
    print "X:", min(xx), max(xx)
    print "Y:", min(yy), max(yy)

    m = min(values)
    values = [(v - m) % 1. for v in values]
    print "V:", min(values), max(values)
    # hsv()
    myData = numpy.array(points)
    #results = PCA(myData,2)
    pca = PCA(n_components=2)
    results = pca.fit_transform(points)
    fig = figure()
    scatter(results[:, 0], results[:, 1], s=10, c=values, cmap="spectral")
    colorbar()

    # ax = fig.add_axes([-.05,-.1,1.1,1.1])
    ax = axes()
    ax.set_axis_off()
    ax.set_aspect('equal', 'box')
    # adjust(0,0,1,1,0,0)

    fig.savefig(output_fn)
예제 #3
0
def plotWords():
    # get model, we use w2v only
    w2v, d2v = gensim.models.Doc2Vec.load_word2vec_format(
        "C:/Users/ghfiy/PycharmProjects/TwitterProcess/trained.word2vec")
    words_np = []
    # a list of labels (words)
    words_label = []
    for word in w2v.vocab.keys():
        words_np.append(w2v[word])
        words_label.append(word)
    print('Added %s words. Shape %s' % (len(words_np), np.shape(words_np)))

    pca = PCA(n_components=2)
    pca.fit(words_np)
    reduced = pca.transform(words_np)

    # plt.plot(pca.explained_variance_ratio_)
    for index, vec in enumerate(reduced):
        # print ('%s %s'%(words_label[index],vec))
        if index < 100:
            x, y = vec[0], vec[1]
            plt.scatter(x, y)
            plt.annotate(words_label[index], xy=(x, y))
    plt.show()
    plt.plot()
def PCA_on_waveforms(waveforms, minfrac, location):
    """
	This function performs principal component analysis on the spike waveforms extracted and returns the
	projection of the waveforms on these principal component axes.

	Inputs:
		waveforms: Numpy array containing the waveforms; in the form of
			(N_events x N_electrodes x N_spike_time_range_steps)
		minfrac: Principal component axes that counts for the variance greater than this minfrac
			value will be taken into account.
		params: Dictionary containing the recording and analysis parameters. Following entries must be present:
			spike_timerange: List containing the time range of spike waveform as an array

	Outputs:
		projection: Waveforms projected on the principal component axes
	"""
    """peak_of_spike_time_range = (len(params['spike_timerange']) / 2) + 1
	peaks = waveforms[:,:,peak_of_spike_time_range]

	true_electrode_inds = np.where(peaks[0] != 0) #Eliminating the broken or absent electrodes on the grid (for which the voltage equals 0 all the time) in order to avoid their contamination on the PCA.
	waveforms_true = waveforms[:,true_electrode_inds] #Waveforms from absent electrodes eliminated
	n_dimensions = len(true_electrode_inds[0]) * len(params['spike_timerange']) #Number of dimensions before dimensionality reduction
	waveforms_true = waveforms_true.reshape(len(peaks),n_dimensions) #Reshaping the array with respect to initial number of dimensions
	results = PCA(waveforms_true)"""

    experiment = location.experiment
    n_dimensions = len(waveforms[0]) * (experiment.spike_samples_before +
                                        experiment.spike_samples_after)
    waveforms = waveforms.reshape(len(waveforms), n_dimensions)
    results = PCA(waveforms)
    projection = results.project(waveforms, minfrac)
    return projection
예제 #5
0
def main():
    print "Loading Word2Vec model..."
    # 4 GB input file, uses about 20 GB of memory when loaded
    '''Uses the model from: http://bio.nlplab.org/'''
    model = gensim.models.Word2Vec.load_word2vec_format("../../PubMed/BioNLP/wikipedia-pubmed-and-PMC-w2v.bin", binary = True)
    model.init_sims(replace=True)
    vocab = model.index2word

    data_matrix = np.array([model[vocab[i]] for i in range(len(vocab))])
    
    print "Running PCA..."
    pca_results = PCA(data_matrix)
    
    seed_word_list = ["dopamine", "GABA", "serotonin", "5HT", "acetylcholine" , "glutamate","electrode", "stimulator", "cognitive", "behavioral", "ethological", "genetic", "biochemical", "channel", "concentration", "dynamics", "receptor", "antibody", "fMRI", "calcium", "nucleus", "axon", "soma", "dendrite", "synapse", "fNIRS", "EEG"]
    
    # seed_word_list = [s.lower() for s in seed_word_list]
    
    classes = [[] for s in seed_word_list]
    for i in range(len(seed_word_list)):
        classes[i].append(model[seed_word_list[i]])
        for s in model.most_similar(seed_word_list[i]):
            classes[i].append(model[s[0]])
            
    classes_projected = [[] for s in seed_word_list]
    for i in range(len(seed_word_list)):
        for f in classes[i]:
            classes_projected[i].append(pca_results.project(f))
    
    print "Plotting PCA results..."
    fig = plt.figure()
    ax = fig.add_subplot(111, projection = '3d')
    ax.set_title("Principal Components of Word Vectors")
    
    import itertools
    marker = itertools.cycle(['o', '^', '*', "s", "h", "8"])
    colorList = ["r", "b", "g", "y", "k", "c", "m", "w"]
    colors = itertools.cycle(colorList)
        
    m = marker.next()
    for i in range(len(seed_word_list)):
        col = colors.next()
        if i % len(colorList) == 0:
            m = marker.next()
        
        '''
        # plot the individual words
        ax.scatter([f[0] for f in classes_projected[i]], [f[1] for f in classes_projected[i]], [f[2] for f in classes_projected[i]], marker = m, s = 20, c = col)
        '''
        
        # plot the cluster means
        ax.plot([np.mean([f[0] for f in classes_projected[i]])], [np.mean([f[1] for f in classes_projected[i]])], [np.mean([f[2] for f in classes_projected[i]])], marker = m, markersize = 21, color = col, label = seed_word_list[i], linestyle = "none")
        
    
    ax.legend(numpoints = 1)
    plt.show()
예제 #6
0
파일: network.py 프로젝트: rkass/Footballer
def pca(minfrac):
  matrix = []
  for vector in vects:
    matrix.append(vector[0])
  print "Matrix Built"
  training = numpy.array(matrix)
  print "Training..."
  results = PCA(training)
  ret = []
  print "Projecting..."
  for vector in vects:
    ret.append(results.project(vector[0], minfrac))
  return ret
예제 #7
0
파일: pca.py 프로젝트: zhewang/lcvis
def calculate(ids, matrix, target=None):
    results = PCA(matrix)
    data = []
    for obj_id, row in zip(ids, matrix):
        data.append([round(results.project(row)[0],6),
                round(results.project(row)[1],6)])

    #target = []
    data = icp.align(data, target)

    #for obj_id, row in zip(ids, data):
        #row.append(obj_id)
    return data.tolist()
예제 #8
0
def calculate(ids, matrix, target=None):
    results = PCA(matrix)
    data = []
    for obj_id, row in zip(ids, matrix):
        data.append([
            round(results.project(row)[0], 6),
            round(results.project(row)[1], 6)
        ])

    #target = []
    data = icp.align(data, target)

    #for obj_id, row in zip(ids, data):
    #row.append(obj_id)
    return data.tolist()
예제 #9
0
def PCA_for_spec_power(mat,
                       fs=300,
                       average_every_x_minutes=5,
                       smooth=True,
                       normalize=True,
                       z_score=True):
    new_mat = []
    for i, band in enumerate(['Alpha', 'Beta', 'Gamma', 'Delta', 'Theta']):
        new_mat.append(
            power_spec_on_mat(mat,
                              fs=fs,
                              average_every_x_minutes=average_every_x_minutes,
                              band=band,
                              smooth=smooth,
                              normalize=normalize,
                              z_score=z_score).mean(axis=0))
    new_mat = np.array(new_mat)
    print(new_mat.shape)
    pca = PCA(new_mat.T)
    fig = plt.figure(figsize=(20, 15), dpi=1000)
    fig.clf()
    ax = fig.add_subplot(111)
    ax.plot(pca.Y[:, 0], pca.Y[:, 1])
    for i in range(pca.Y.shape[0]):
        ax.text(pca.Y[i, 0], pca.Y[i, 1], '{}'.format(i))
    plt.show()
예제 #10
0
def example_signaturesAndPCA():
    #     Calculate signatures of random+clique+lattices+circles mixture graph and plot their PCA
    crc = erdos_circles_cliques([1000, 5000],
                                nCliques=20,
                                cliqueSize=15,
                                nCircles=20,
                                circleSize=30,
                                save=False)
    gFinal = append_lattices(crc, nLatte=20, latticeSize=[5, 5])
    colors = colorNodes(gFinal, ["cliq", "circ", "late"])
    Lg = spectral.laplacian(gFinal)

    eigsToCompute = 100
    eigsWhere = "../Data/Spectrums/LM/mix2" + str(eigsToCompute + 1) + "_Eigs"
    evals, evecs = graph_analysis.IO.compute_or_load(gk.computeSpectrum,
                                                     eigsWhere,
                                                     False,
                                                     Lg,
                                                     eigsToCompute + 1,
                                                     small=True)
    timeSample = gk.heatTimeSample3(evals[0], evals[-1], 10)
    sig = gk.HKS(evals, evecs, eigsToCompute, timeSample)

    sigsPCA = PCA(sig)
    print "Fraction of Variance in the first three PCA dimensions: " + str(
        sum(sigsPCA.fracs[0:3]))
    saveAt = "gnp_cliques_circles__lattices_1K_5K_20_15_20_30_20_25.pdf"
    myPLT.plot_PCA_Res(
        sigsPCA,
        dim=3,
        legendLabel=["Gnp_Nodes", "Cliques", "Circles", "Lattices"],
        colorMap=[colors, palets("typeToColor")],
        save=False,
        saveAt=saveAt)
예제 #11
0
 def apply_PCA_on(self, kdd_data_10percent):
     training_input_data = np.asarray(
         kdd_data_10percent[num_features])  # load this from KDD data
     myData = np.array(training_input_data)
     from matplotlib.mlab import PCA
     results = PCA(myData, standardize=False)
     return results.Y
def plot_pca(data):
    clr1 = '#2026B2'
    fig = MPL.figure()
    ax1 = fig.add_subplot(111)
    data_resc, evals, evecs = PCA(data)
    ax1.plot(data_resc[:, 0], data_resc[:, 1], '.', mfc=clr1, mec=clr1)
    MPL.plot()
예제 #13
0
def pca(ids, matrix):
    print("{}: Calculating PCA...".format(timestamp()))

    results = PCA(matrix)

    pickle.dump(results, open('./pca_pickle.dat', 'w'))

    data = []

    for obj_id, row in zip(ids, matrix):
        data.append([round(results.project(row)[0],6),
                     round(results.project(row)[1],6),
                     obj_id])

    print("{}: Done.".format(timestamp()))
    return data
예제 #14
0
def calculate_article_metrics_pca(article_names):
    def make_article_exogenous_df(article_names):
        exogenous_arts = dict()
        for article_name in article_names:
            page = pywikibot.Page(enwp, article_name)
            try:
                page_text = page.get()
            except pywikibot.IsRedirectPage:
                redir_page = page.getRedirectTarget()
                page_text = redir_page.get()
            wikicode = pfh.parse(page_text)
            metrics = report_actionable_metrics(wikicode)
            for metric, val in metrics.iteritems():
                exogenous_arts[article_name] = metrics

        exogenous_df = pd.DataFrame.from_dict(exogenous_arts, orient='index')
        return exogenous_df.convert_objects(convert_numeric=True)

    article_exogenous_df = make_article_exogenous_df(article_names)
    #print article_exogenous_df
    article_exogenous_matrix = article_exogenous_df.as_matrix()
    pca_obj = PCA(article_exogenous_matrix)
    print 'PCA fractions: ', pca_obj.fracs
    #get the principal component the zscores in the PCA domain
    agg_metrics = pca_obj.Y[:, 0]
    named_aggregates = zip(list(article_exogenous_df.index), agg_metrics)
    #print named_aggregates
    aggregates_sorted = sorted(named_aggregates, key=operator.itemgetter(1))
    aggregates_ranks_sorted = [(identup[0], aggregates_sorted.index(identup))
                               for identup in aggregates_sorted]

    return aggregates_ranks_sorted
예제 #15
0
def pca_plot(axis1, axis2, genres_to_keep):
    # Process data and compute PCA
    gtzan = pr.MusicDB(p2_train, p2_train_label, p2_test, p2_test_label)
    genres_to_remove = gmap(genres_to_keep, rest=True)
    gtzan.remove_genres(genres_to_remove)
    mfcc_pca = PCA(gtzan.train.music.T)
    genre = 10 - len(genres_to_remove)
    spg = mfcc_pca.Wt[0].shape[0] / genre
    # Make sure plots folder exists
    mkdir('plots')
    # Plot
    fig, ax = plt.subplots()
    rest = remaining(genres_to_remove)
    tag = ''
    for genre in rest:
        tag += str(genres.index(genre))
    for i, genre in enumerate(rest):
        color = colors[i]
        X = mfcc_pca.Wt[axis1 - 1][i * spg:(i + 1) * spg]
        Y = mfcc_pca.Wt[axis2 - 1][i * spg:(i + 1) * spg]
        plt.scatter(X, Y, c=color, label=genre)
    plt.xlabel('pca' + str(axis1))
    plt.ylabel('pca' + str(axis2))
    plt.legend()
    plt.savefig('plots/pca_' + str(axis1) + '_' + str(axis2) + '_' + tag +
                '.png')
def module():
    import urllib
    from matplotlib.mlab import PCA
    import numpy as np
    import matplotlib.pyplot as plt
    pca = PCA(np.array([[2, 3], [4, 5]]))
    print(pca)
예제 #17
0
def plotPCA_01():
	df = dfTrain.drop(['Category'], axis=1)
	df = normalizacionZscore(df);
	print df
	dfcov = df.cov()
	dfcov.to_csv('csv/trainCov.csv', sep=',', index=False, header=True)#, encoding='UTF-8')
	plt.figure(figsize=(20,16))
	ax = sns.heatmap(dfcov)
	#plt.setp(ax.get_xticklabels(), rotation=45)
	#plt.setp(ax.get_xticklabels(), fontsize=5)
	#plt.setp(ax.get_yticklabels(), fontsize=5)

	plt.savefig("Ploteos/Matplotlib/PCA-COV2.png")
	
	pca = PCA(df)
	print 'fracs:', pca.fracs
	
	dfTmp = pd.DataFrame(pca.fracs, columns=['Fracs'])
	for id in dfTmp.index.values:
		dfTmp.ix[id,'Acumulado'] = dfTmp['Fracs'][0:id].sum()
	
	#dfTmp['Acumulado'] = 1
	print dfTmp
	dfTmp.plot(kind='line', title='PCA', ylim=(0,1))
	plt.savefig("Ploteos/Matplotlib/PCA2.png")
	plt.cla()
	plt.clf()
	plt.close()
예제 #18
0
def pca(data):
    print ("In PCA")
    results = PCA(data)
    print (results)

    x = []
    y = []
    z = []

    for item in results.Y:
        x.append(item[0])
        y.append(item[1])
        z.append(item[2])
    plt.close('all')
    fig1 = plt.figure()
    ax = Axes3D(fig1)
    pltData = [x,y,z]
    ax.scatter(pltData[0],pltData[1],pltData[2],'bo')
    xAxisLine = ((min(pltData[0]),max(pltData[0])),(0,0),(0,0))
    yAxisLine = ((min(pltData[1]),max(pltData[1])),(0,0),(0,0))
    zAxisLine = ((min(pltData[2]),max(pltData[2])),(0,0),(0,0))

    ax.set_xlabel('Lot Size')
    ax.set_ylabel('Age')
    ax.set_zlabel('Sale Price')
    ax.set_title('PCA analysis')
    plt.show()
예제 #19
0
    def pca_var(sub_dims):
        data = np.array([df[d] for d in sub_dims]).T
        try: pca = PCA(data, standardize=True)
        except: return 0,1,0,1,None,None,None,sub_dims

        classed_points = zip(classes, pca.Y)
        pos = [(it[0], it[1]) for c, it in classed_points if c]
        neg = [(it[0], it[1]) for c, it in classed_points if not c]
        P_hull = [pos[i] for i in ConvexHull(pos).vertices]; P_hull.append(P_hull[0])
        N_hull = [neg[i] for i in ConvexHull(neg).vertices]; N_hull.append(N_hull[0])
        P_hull = np.array(P_hull)
        N_hull = np.array(N_hull)
        P_path = Path(P_hull)
        N_path = Path(N_hull)

        N_sep = 0
        for it in neg:
            if not P_path.contains_point(it):
                N_sep += 1

        P_sep = 0
        for it in pos:
            if not N_path.contains_point(it):
                P_sep += 1

        return N_sep, float(len(neg)), P_sep, float(len(pos)), P_hull, N_hull, pca, sub_dims
예제 #20
0
def best_elements_order_pca(relations, elements=None, filter_order=None):
    present_elements, present_element_groups, properties, property_groups, element_2_property_2_relation, property_2_element_2_relation = relations_2_model(
        relations)
    if not elements: elements = present_elements

    import numpy
    from matplotlib.mlab import PCA

    array = []
    for element in elements:
        array.append([])
        for property in properties:
            if property in element_2_property_2_relation[element]:
                array[-1].append(1.0)
            else:
                array[-1].append(0.0)
    array = numpy.array(array)

    pca = PCA(array)

    element_2_x = {elements[i]: pca.Y[i, 0] for i in range(len(elements))}

    orders = list(elements)
    orders.sort(key=lambda element: element_2_x[element])
    return orders
예제 #21
0
    def main():
        print("add dataset into numpy array")
        train_dataset = append_feature(TRAIN_PATH)
        print("train set created successfully")
        test_dataset = append_feature(TEST_PATH)
        print("train set created successfully")

        n_samples, h, w = train_dataset.images.shape

        # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
        # X_train, X_test, y_train, y_test = train_test_split(image_dataset.data, image_dataset.target, test_size=0.1)
        X_train = train_dataset.data
        y_train = train_dataset.target

        X_test = test_dataset.data
        y_test = test_dataset.target

        # print(y_train)
        # print(y_test)

        n_components = 70
        pca = PCA(n_components=n_components).fit(X_train)
        eigenfaces = pca.components_.reshape((n_components, h, w))

        print("Projecting the input data on the eigenfaces orthonormal basis")
        X_train_pca = pca.transform(X_train)
        X_test_pca = pca.transform(X_test)

        eigenface_titles = [
            "eigenface %d" % i for i in range(eigenfaces.shape[0])
        ]
        # print(eigenfaces.shape[0])
        plot_gallery(eigenfaces, eigenface_titles, h, w)
        # plt.imshow(eigenfaces.shape[0])
        plt.show()

        k = 2
        knn_model = KNeighborsClassifier(n_neighbors=k)
        model_save = knn_model.fit(X_train_pca, y_train)
        saved_model = pickle.dumps(model_save)
        knn_from_pickle = pickle.loads(saved_model)

        # print(model_save)

        y_predict = knn_from_pickle.predict(X_test_pca)
        print(classification_report(y_test, y_predict))
예제 #22
0
    def get_pca(array, type_name):
        pca = PCA(array)

        dim = int(pca_name.split("_")[1].split(".")[0])
        limit_pca = pca.Wt[:dim, :]
        if type_name == "cloud":
            limit_pca.dump(pca_cloud_name)
        elif type_name == "visible":
            limit_pca.dump(pca_visible_name)
예제 #23
0
    def pca(self):
        if (self.inputDataUji.toPlainText() != ''):
            print("add dataset into numpy array")
            train_dataset = append_feature(TRAIN_PATH)
            print("train set created successfully")
            test_dataset = append_feature(TEST_PATH)
            print("train set created successfully")

            n_samples, h, w = train_dataset.images.shape

            X_train = train_dataset.data
            y_train = train_dataset.target

            X_test = test_dataset.data
            y_test = test_dataset.target

            n_components = 70
            pca = PCA(n_components=n_components).fit(X_train)
            eigenfaces = pca.components_.reshape((n_components, h, w))

            print(
                "Projecting the input data on the eigenfaces orthonormal basis"
            )
            X_train_pca = pca.transform(X_train)
            X_test_pca = pca.transform(X_test)

            eigenface_titles = [
                "eigenface %d" % i for i in range(eigenfaces.shape[0])
            ]
            plot_gallery(eigenfaces, eigenface_titles, h, w)
            plt.show()

            k = 2
            knn_model = KNeighborsClassifier(n_neighbors=k)
            model_save = knn_model.fit(X_train_pca, y_train)
            saved_model = pickle.dumps(model_save)
            knn_from_pickle = pickle.loads(saved_model)

            # print(model_save)

            y_predict = knn_from_pickle.predict(X_test_pca)
            self.RESULT_CLASSIFICATION = classification_report(
                y_test, y_predict)
def test_PCA(data, dims_rescaled_data=2):
    '''
    test by attempting to recover original data array from
    the eigenvectors of its covariance matrix & comparing that
    'recovered' array with the original data
    '''
    _, _, eigenvectors = PCA(data, dims_rescaled_data=2)
    data_recovered = NP.dot(eigenvectors, m).T
    data_recovered += data_recovered.mean(axis=0)
    assert NP.allclose(data, data_recovered)
예제 #25
0
def draw_pcca_memberships(original_data, pcca, discrete_trajectory, colormap_name="jet"):
    """
    Visualize the result of PCCA+ as colored plot of the PCA.
    """
    pca = PCA(original_data)

    cluster_ids = range(0, pcca.shape[1])
    colormap = matplotlib.cm.get_cmap(colormap_name, len(cluster_ids) + 1)

    membership = pcca > 0.5
    pcca_traj = np.where(membership[discrete_trajectory])[1]


    for index, cluster in enumerate(cluster_ids):
        datapoints = original_data[np.where(pcca_traj == cluster)]
        print('points in cluster ', cluster, ': ', len(datapoints))
        datapoints_transformed = pca.project(datapoints)
        plt.scatter(datapoints_transformed[:,0], datapoints_transformed[:,1], color=colormap(index), alpha=0.5)
    plt.title('pcca')
예제 #26
0
def pca_dim_reduction(input_data, target_dim):
    reduced_dataset = []
    # pca_obj = PCA(np.array(input_data))
    pca_obj = PCA(np.array(input_data), standardize=False)
    projected_dataset = pca_obj.Y.tolist()
    for projected_data in projected_dataset:
        reduced_data = []  # one data point with reduced dim
        for col in range(0, target_dim):
            reduced_data.append(projected_data[col])
        reduced_dataset.append(reduced_data)
    return reduced_dataset
예제 #27
0
    def draw(self):
        embeddings = self.embedding
        reversed_dictionary = self.doc_mapper.reversed_dictionary
        words_np = []
        words_label = []
        for i in range(0, len(embeddings)):
            words_np.append(embeddings[i])
            words_label.append(reversed_dictionary[i][0])

        pca = PCA(n_components=2)
        pca.fit(words_np)
        reduced = pca.transform(words_np)

        plt.rcParams["figure.figsize"] = (20, 20)
        for index, vec in enumerate(reduced):
            if index < 1000:
                x, y = vec[0], vec[1]
                plt.scatter(x, y)
                plt.annotate(words_label[index], xy=(x, y))
        plt.show()
예제 #28
0
def main():
    workbook = xlrd.open_workbook('PB_CereRMANewCDF.xlsx')
    worksheet = workbook.sheet_by_name('Sheet1')
    num_rows = worksheet.nrows - 1
    num_cells = worksheet.ncols - 1
    curr_row = -1
    x = np.zeros((20292, 28))
    dicgenes = dict()
    i = 0
    while curr_row < num_rows:
        curr_row += 1
        if curr_row == 0:
            continue
        row = worksheet.row(curr_row)
        #print 'Row:', curr_row
        curr_cell = -1
        j = 0
        while curr_cell < num_cells:
            curr_cell += 1
            # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank
            cell_type = worksheet.cell_type(curr_row, curr_cell)
            cell_value = worksheet.cell_value(curr_row, curr_cell)
            #print '	', cell_type, ':', cell_value
            if cell_type == 1:
                dicgenes[curr_row] = cell_value
            else:
                x[i][j] = cell_value
                j += 1
        i += 1
    '''i=0
	while i<20292:
	   j=0
	   while j<63:
	     print x[i][j],
	     j+=1
	   print "\n"
	   i+=1  '''
    results = PCA(x)
    #print (results.Y)
    res, idx = kmeans(results.Y, 8)
    #print res
    i = 0
    dicname = dict()
    dicclust = np.zeros(20292)
    retgenname(dicname)
    while i < len(idx):
        f = open('./' + str(idx[i]) + '.txt', 'a')
        f.write(dicname[i])
        f.write("\n")
        print "%s belongs to %d" % (dicgenes[i + 1], idx[i])
        dicclust[i] = int(idx[i])
        i += 1
    print "silhouette score",
    print silhouette_score(x, dicclust, metric='euclidean')
예제 #29
0
def draw_clusters(clusters, plotter=None, colormap_name="jet"):
    """
    Visualize clustered data and cluster membership in a new plot or with an existing axis object.
    """
    plotter = plotter or plt

    # use PCA to be able to visualize the data in two dimensions
    all_data = clusters.getOriginalData()
    pca = PCA(all_data)

    # for nicer visualization
    data_length = len(all_data)
    alpha = 1.0 / (math.sqrt(data_length))
    if alpha < 0.05: alpha = 0.05
    elif alpha > 0.75: alpha = 0.75
    cluster_ids = clusters.getClusterIDs()
    colormap = matplotlib.cm.get_cmap(colormap_name, len(cluster_ids) + 1)
    for index, cluster in enumerate(cluster_ids):
        datapoints = all_data[clusters._map == cluster,:]
        datapoints_transformed = pca.project(datapoints)
        plotter.scatter(datapoints_transformed[:,0], datapoints_transformed[:,1], color=colormap(index), alpha=0.5)
예제 #30
0
    def pcaTrafo(self):
        from dipy.tracking.streamline import set_number_of_points
        from matplotlib.mlab import PCA
        #use sklearn instead, matplotlib pca is depricated
        #from sklearn.decomposition import PCA

        #w = np.loadtxt( self.weights )

        #streams = nib.streamlines.load( self.tracks )
        #fibs = streams.streamlines#[w > 0]

        # load mrtrix streamlines ( subject space )
        streams = nib.streamlines.load(self.tracks)
        fibs = streams.streamlines

        fibs_resampled = self.resample(fibs)

        # calculate PCA transformation
        pcaResult = PCA(fibs_resampled, standardize=False)

        # trafo
        pcaTrafo = pcaResult.Wt

        # summed variance of dimensions 10 to 90
        sum(pcaResult.fracs[self.cutOff + 1:]) / sum(pcaResult.fracs)

        pca_trafo = self.pca_trafo

        # store pca-points for clustering
        np.savetxt(self.pca_pts, pcaResult.Y[:, :self.cutOff])
        #np.savetxt( pca_w, w[w>0] )

        # remove points for storage purposes
        pcaResult.a = []
        pcaResult.Y = []

        # save pca trafo to file
        with open(pca_trafo, 'wb+') as fid:
            pickle.dump(pcaResult, fid, -1)
예제 #31
0
def pca(image):
    """
    Function to perform PCA on a passed in image
    :param image:
    :return:
    """
    # PCA using OpenCV
    # mean, eigenvectors = cv.PCACompute(image, np.mean(image, axis=0).reshape(1, -1))

    # PCA using matplotlib
    results = PCA(image)

    return results
예제 #32
0
    def __init__(self, file_names, learning_rate=0.001):
        self.kmer = 2
        self.windowed = True
        self.windowsize = 50000
        self.boolean = True
        self.iterations = 200
        self.batch = True
        self.radiusfactor = 3
        [fv, self.trainHeaders] = self.generate_dataVectors(file_names)
        fv2 = np.array(fv)

        dataMatrix = fv2[:,
                         2:]  # Convert a list-of-lists into a numpy array.  aListOfLists is the data points in a regular list-of-lists type matrix.

        myPCA = PCA(
            dataMatrix)  # make a new PCA object from a numpy array object
        x_av = dataMatrix.mean(0)
        eigenvector_1 = myPCA.Wt[0]
        eigenvector_2 = myPCA.Wt[1]
        std1 = np.std(
            myPCA.Y[:, 0]
        )  #calculating the standard deviation accross the first PC
        std2 = np.std(
            myPCA.Y[:, 1]
        )  #calculating the standard deviation accross the second PC
        SOM_width = int(math.ceil(5 * std1))
        SOM_height = int(math.ceil((std2 / std1) * SOM_width))

        self.width = SOM_width
        self.height = SOM_height
        self.radius = max(self.height, self.width) / self.radiusfactor
        self.learning_rate = learning_rate
        self.FV_size = len(dataMatrix[0])
        self.trainV = fv2

        wt = scipy.array([[[0.0 for i in range(self.FV_size)]
                           for x in range(self.width)]
                          for y in range(self.height)])
        for i in range(SOM_height):
            for j in range(SOM_width):
                wt[i, j] = (x_av + ((eigenvector_1 * (j - (SOM_width / 2))) +
                                    (eigenvector_2 * (i - (SOM_height / 2)))))

        self.nodes = wt

        self.trainRecord = [[[-1 for i in range(0)] for x in range(self.width)]
                            for y in range(self.height)]
        self.colourFlags = [[0 for x in range(self.width)]
                            for y in range(self.height)]
        self.composition_map = [[-1 for x in range(self.width)]
                                for y in range(self.height)]
예제 #33
0
def eigenFunc( ix,iy,iz, H):
    indices = np.where(H[ix:ix+nvox,iy:iy+nvox,iz:iz+nvox])
    arr = np.array((np.arange(ix,ix+nvox)[indices[0]],np.arange(iy,iy+nvox)[indices[1]],np.arange(iz,iz+nvox)[indices[2]]))
    rep = np.array(H[ix:ix+nvox,iy:iy+nvox,iz:iz+nvox][indices],dtype='int64')
    xyz = np.repeat(arr,rep,axis=1)  # This is a 3xN matrix with each row of N one x,y,z coordinate wtd by energy deposition .
    xyz = arr # effectively don't wt by population of charge created in this voxel by this gamma, afterall
    pca = None
    try:
        pca = PCA(xyz.T)
    except:
        pass
    return pca

    '''
예제 #34
0
def make_pca(data):
    keys = sorted(list(data.keys()))
    data_np = np.concatenate([cuda.cupy.asnumpy(data[k]) for k in keys])
    mean = data_np.mean(axis=0)
    cleaned = np.delete(data_np, np.where(mean == 0), 1)
    pca = PCA(cleaned)
    index = 0
    result = {}
    for k in keys:
        k_samples = len(data[k])
        # result[k] = pca.Y[index:index+400]  # limit number of samples per key
        result[k] = pca.Y[index:index + k_samples]
        index += k_samples
    return result
 def sample_cluster_2Dmap(self, **kwargs):
     defaults = dict(
         genelist=None,
         samplenames=None,
         size=50,)
     for key in defaults:
         kwargs.setdefault(key, defaults[key])
     genearray = self.array
     if type(kwargs['genelist']) == list:
         validatedlist = self.validate_genelist(kwargs['genelist'])
         genearray = self.array.take(validatedlist, axis=0)
     elif kwargs['genelist']:
         raise('genelist should be list of genes')
     samplenames = [x for x in self.dataindexdic.keys()]
     if kwargs['samplenames']:
         if len(kwargs['samplenames']) != len(samplenames):
             raise('length of samplenames should be {}'.format(len(samplenames)))
         samplenames = kwargs['samplenames']
     covarray = numpy.cov(genearray.T)  # covariance array
     covPCA = PCA(covarray)  # matplotlib.mlab.PCA
     convertedcovs = covPCA.project(covarray)  # converted vector along PC
     data = numpy.array([[x[0] for x in convertedcovs], [x[1] for x in convertedcovs]])
     # auto color picking with sample numbers
     color = []
     colorlist = cm.rainbow(numpy.linspace(0, 1, len(samplenames)))
     keys = [x for x in self.dataindexdic.keys()]
     for c, key in zip(colorlist, keys):
         color.extend([c] * len(self.dataindexdic[key]))
     sampleindex = 0 
     for i in range(len(samplenames)):
         samplenumber = len(self.dataindexdic[keys[i]])
         subdata = numpy.take(
             data, range(sampleindex, sampleindex + samplenumber), axis=1)
         plt.scatter(
             subdata[0], subdata[1], color=colorlist[i], s=kwargs['size'], label=samplenames[i])
         sampleindex += samplenumber
     plt.legend(loc='upper left', fontsize=15, scatterpoints=1, bbox_to_anchor=(1, 1))
예제 #36
0
    def pca(self, Wt=None):
        if self.n_chan_in == 2:
            chans = [[0, self.m0], [1, self.m1]]
            Wt_ = [np.zeros((self.m0, self.m0)), np.zeros((self.m1, self.m1))]
        else:
            chans = [[0, self.m0]]
            Wt_ = [np.zeros((self.m0, self.m0)), []]

        if Wt is None:
            Wt = Wt_
            for [channel, m] in chans:
                if self.batch_size * self.n_steps > m:
                    mPCA = PCA(self.meas_in[channel].T)
                    self.meas_in[channel] = mPCA.Y.T
                else:
                    mPCA = PCA(self.meas_in[channel])
                    self.meas_in[channel] = mPCA.Y
                print 'Wt shape: ', mPCA.Wt.shape
                Wt[channel] = mPCA.Wt
        else:
            for [channel, m] in chans:
                if self.batch_size * self.n_steps > m:
                    mPCA = PCA(self.meas_in[channel].T)
                    mPCA.Wt = Wt[channel]
                    self.meas_in[channel] = mPCA.Y.T

                else:
                    mPCA = PCA(self.meas_in[channel])
                    mPCA.Wt = Wt[channel]
                    self.meas_in[channel] = mPCA.Y
                print 'Wt shape: ', mPCA.Wt.shape

        print 'PCA MEG: ', self.meas_in[0].shape
        print 'PCA EEG: ', self.meas_in[1].shape

        return Wt
예제 #37
0
g = mixture.GMM(n_components=3, covariance_type="full")


kf = cross_validation.KFold(len(X), k=folds, shuffle=True)
for train_index, test_index in kf:
    # print("TRAIN: %s TEST: %s" % (train_index, test_index))
    X_train, X_test = X[train_index], X[test_index]

    # generate knn analysis
    fits.append(g.fit(X_train))
    scores.append(g.bic(X_test))
print scores

fig = Figure(figsize=(6, 6))
canvas = FigureCanvas(fig)
myPCA = PCA(X)
pcDataPoint = myPCA.project(X)
ax = fig.add_subplot(111)
ax.scatter(pcDataPoint[:, 1], pcDataPoint[:, 2])
canvas.print_figure("PCA12.png", dpi=500)
#  print(scores)
# avg = float(sum(scores)/len(scores))
# for k in range(0,len(scores)):
#    diffs.append((scores[k]-avg)*(scores[k]-avg))
# print diffs

# var = float(sum(diffs)/len(scores))
# scoresavg.append(avg)
# scoresvar.append(var)
# print(scoresavg)
# print(scoresvar)
예제 #38
0
# Python script to perform principle component analysis on the face distance measures


import random, os
import numpy as np
from matplotlib.mlab import PCA

data = []
for line in open("emotions.train"):
	data.append([])
	for el in line[2:].strip().split(" "):
		data[-1].append(float(el[el.index(":")+1:]))
	if len(data[-1]) != 86: data.remove(data[-1])

results = PCA(np.array(data))
	
archive = open("pca_archive_wt.txt", "w")
for v in results.Wt: archive.write(",".join([str(float(x)) for x in v]) + "\n")
archive.close()

archive = open("pca_archive_mu.txt", "w")
archive.write(",".join([str(float(x)) for x in results.mu]) + "\n")
archive.close()

archive = open("pca_archive_sigma.txt", "w")
archive.write(",".join([str(float(x)) for x in results.sigma]) + "\n")
archive.close()

fout = open("emotions.train.pca", "w")
for line in open("emotions.train"):
	temp = []
예제 #39
0
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn import decomposition
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.feature_extraction import DictVectorizer
from sklearn.preprocessing import Imputer

features = np.loadtxt("features.dat", unpack=True)
response = np.loadtxt("response.dat", unpack=True)

X = np.array(features)
Y = np.array(response)

# print banned_data
pca = PCA(n_components=2)
Y_r = pca.fit(response).transform(response)
X_r = pca.fit(features).transform(features)
print Y_r
plt.figure()
plt.scatter(X_r[:,  0], X_r[:, 1])
plt.title('PCA of dataset')
plt.show()

#
# np.random.seed(5)
#
# centers = [[1, 1], [-1, -1], [1, -1]]
# features = datasets.x()
# X = features.data
# y = features.target
예제 #40
0
    matrix = list(x["splinedata"] for x in j)
    
    # metadata = dict((obj["LINEARobjectID"], obj) for obj in j["data"])
    # obj_ids = []

    # with open('{}/object_list.csv'.format(args.path)) as csvfile:
    #     objects = csv.reader(csvfile)
    #     next(objects, None)
    #     for row in objects:
    #         obj_id = int(row[0])
    #         period = float(row[1])
    #         if period > 0:
    #             v = loadMagData(args.path+'/'+str(obj_id)+'.fit.json')
    #             for i in range(50 - len(v)):
    #                 v.append(v[0])
    #             matrix.append(v)
    #             obj_ids.append(obj_id)

    vec = np.array(matrix)
    vec.shape = (len(matrix), 20)
    results = PCA(vec)

    data = []

    for obj, row in zip(j, matrix):
        data.append([results.project(row)[0], results.project(row)[1], obj])

    f_out = open('pca_transients.json', 'w')
    f_out.write(json.dumps(data))
    f_out.close()
예제 #41
0
파일: pca.py 프로젝트: ycchui/uncertainty
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')

#title = t1+t2
    
data = read_data(metrics[0],metrics_no[0])
#data = np.hstack((data, read_data('unit_shallowest')))
for metric in range(1,11,1):
    data = np.hstack((data, read_data(metrics[metric],metrics_no[metric])))
para = np.loadtxt('fold_parameter_change.txt',).reshape(100,6)
#print para.shape[-1]
combine = np.hstack((data[:,2],data[:,6],data[:,7],data[:,13])).reshape(100,4)

#print combine

results = PCA(data)
#print results.numcols
row_std = np.std(data, axis=0)
print results.Wt.shape
#print results.sigma
#print row_std
#print results.Wt[0]
#print results.Wt[1]
#print results.fracs #contribution of each axes
loading1 = results.Wt[0]/row_std
loading2 = results.Wt[1]/row_std
if not os.path.exists(os.path.join(os.getcwd(), 'pca')):
    os.makedirs(os.path.join(os.getcwd(),'pca'))
os.chdir(os.path.join(os.getcwd(),'pca'))
para_project = results.project(para)
print para_project.shape
            for var_name in ['tmin', 'tmax']:
                print('PROCESSSING VARIABLE ' + var_name)
                print('Extracting PCA data from index files')

                LIVNEH_PCA_data, LIVNEH_lons, LIVNEH_lats = get_LIVNEH_PCA_data(var_name, livneh_years, livneh_data_dir)
                LOCA_PCA_data, LOCA_lons, LOCA_lats = get_LOCA_PCA_data(rcp, var_name, loca_years, loca_data_dir)
                num_lons = LIVNEH_lons.shape[0]
                num_lats = LIVNEH_lats.shape[0]
                print('LIVNEH DATA MATRIX ' + str(LIVNEH_PCA_data.shape))
                print('LOCA DATA MATRIX ' + str(LOCA_PCA_data.shape))

                #print('Minutes elapsed: ' + str((dt.datetime.now() - start_time).total_seconds() / 60.0))
                print('Computing component matrix')
                num_comps = 6
                comp_indices = []
                pca = PCA(n_components=num_comps)
                X_pca = pca.fit_transform(LIVNEH_PCA_data) #(5490, 3)
                # Projection of original data onto component space
                corr_array = pca.inverse_transform(X_pca) #(5490, 103968)
                components = pca.components_.transpose() # (103968, 3)
                print('VARIANCE EXPLAINED: ')
                print (pca.explained_variance_ratio_)
                rotated_components = varimax(components).transpose() #(3, 103968)
                dates_dt = []
                dates_ts = []
                for year in loca_years:
                    dates_dt.append(dt.datetime(year,12,1))
                    dates_ts.append(datetime_to_date(dates_dt[-1], '-'))
                    for doy_idx in range(1,90):
                        dates_dt.append(advance_date(dates_dt[-1],1, 'forward'))
                        dates_ts.append(datetime_to_date(dates_dt[-1], '-'))
예제 #43
0
    pl.close('all') 
    pl.xlim([-10,20])
    pl.ylim([-15,15])
    pl.scatter(pca.Y[::1,0], pca.Y[::1,1])
    pl.savefig("2D_" + rat + "_" + date + ".png")


    
        
for rat in file.keys():
    #dataProcessing(file, rat, file[rat][0])
    date1 = file[rat].keys()[0]
    object = file[rat][date1]["valueMatrix"]
    data = np.array(object)
    pca = PCA(data)
    print pca.fracs[0], pca.fracs[1], pca.fracs[2], pca.fracs[3]

    pl.close('all') 
    fig1 = pl.figure()
    ax = Axes3D(fig1)
    ax.scatter(pca.Y[::1,0], pca.Y[::1,1], pca.Y[::1,2], 'bo')
    ax.set_xlim([-10,20])
    ax.set_ylim([-15,20])
    ax.set_zlim([-15,15])
    pl.savefig("3D_" +rat + "_" + date1+".png")

    pl.close('all') 
    pl.xlim([-10,20])
    pl.ylim([-15,15])
    pl.scatter(pca.Y[::1,0], pca.Y[::1,1])
print 'numFeatures', numFeatures
for foldid in range(10):
    controlTest = getFold(control, controlUserFoldDict, foldid, lambda x,y:x==y, useNgram)
    controlTrain = getFold(control, controlUserFoldDict, foldid, lambda x,y:x!=y, useNgram)
    schTest = getFold(sch, schUserFoldDict, foldid, lambda x,y:x==y, useNgram)
    schTrain = getFold(sch, schUserFoldDict, foldid, lambda x,y:x!=y, useNgram)


    XTrain, YTrain = randomShuffle(controlTrain + schTrain, [1]*len(controlTrain) + [0]*len(schTrain))

    #findCorrelation(XTrain)  #plots graph of feature correlations

    #[meanFt, varFt] = normFeatParams(XTrain)  #both meanFt and varFt are of length = numberoffeatures
    #XTrain = normFeat(XTrain, meanFt, varFt)

    PCAObject = PCA(np.asarray(XTrain))

    XTrain = PCAObject.center(XTrain)
    if doPCA:
        numFeatures =  retainPerc(PCAObject.fracs, 0.99)
        XTrain = PCAObject.project(XTrain)[:,0:numFeatures]
        [meanFt, varFt] = normFeatParams(XTrain)  #both meanFt and varFt are of length = numberoffeatures
        XTrain = np.asarray(normFeat(XTrain, meanFt, varFt))
        #print numFeatures, XTrain.shape

    #TODO: SHUFFLE UP THE INPUT

    clf = svm.SVC(kernel='rbf')
    clf.fit(XTrain, YTrain)

    XTest = controlTest + schTest
예제 #45
0
        #matrix_with_id[i] = BandB_sampled[i]+BandR_sampled[i]+BandU_sampled[i]+BandV_sampled[i]


    obj_ids = []
    matrix = []
    row_length = 40
    for i in matrix_with_id:
        obj_ids.append(i)
        matrix.append(matrix_with_id[i])
        if len(matrix_with_id[i]) != row_length:
            print('row length is not {}'.format(row_length))

    # PCA calculating
    vec = np.array(matrix)
    vec.shape = (len(matrix), row_length)
    results = PCA(vec)

    data = []

    for obj_id, row in zip(obj_ids, matrix):
        obj_type = BandB_sampled[obj_id]["stype"]
        data.append([results.project(row)[0], results.project(row)[1], obj_type, obj_id])

    f_out = open(args.path+'/pca_supernova.json', 'w')
    f_out.write(json.dumps(data))
    f_out.close()

    #matrix = []
    #j = json.load(open('{}/PLV_LINEAR.json'.format(args.path)))

    #metadata = dict((obj["LINEARobjectID"], obj) for obj in j["data"])
예제 #46
0
파일: pca.py 프로젝트: hdc-arizona/lcvis
    with open('{}/object_list.csv'.format(args.path)) as csvfile:
        objects = csv.reader(csvfile)
        next(objects, None)
        for row in objects:
            obj_id = int(row[0])
            period = float(row[1])
            if period > 0:
                v = loadMagData(args.path+'/'+str(obj_id)+'.fit.json')
                for i in range(row_length - len(v)):
                    v.append(v[0])
                matrix.append(v)
                obj_ids.append(obj_id)

    vec = np.array(matrix)
    vec.shape = (len(matrix), row_length)
    results = PCA(vec)

    with open('pca_result.dat', 'wb') as f:
        pickle.dump(results, f)

    with open('pca_matrix.dat', 'wb') as f:
        pickle.dump(vec, f)

    data = []

    for obj_id, row in zip(obj_ids, matrix):
        data.append([results.project(row)[0], results.project(row)[1], metadata[obj_id]["LCtype"], obj_id])

    f_out = open(args.path+'/pca.json', 'w')
    f_out.write(json.dumps(data))
    f_out.close()
예제 #47
0
def pca(dim):
    pca = PCA(data[:, 0:9])
    return pca.project(data[:, 0:9])[:, 0:dim]