def displayData(X, example_width): example_width = round(math.sqrt(X.shape[1])) m = X.shape[0] n = X.shape[1] example_height = int((n / example_width)) # Compute number of items to display display_rows = math.floor(math.sqrt(m)) display_cols = math.ceil(m / display_rows) # Between images padding pad = 1 # Setup blank display w1 = pad + display_rows * (example_height + pad) h1 = int(pad + display_cols * (example_width + pad)) display_array = -np.ones(shape=(w1, h1)) #display_array[0:32,0:32]=X[0, :].reshape( example_height, example_width,order='F') #/ max_val; #display_array[0:32,32:64]=X[1, :].reshape( example_height, example_width,order='F') # Copy each example into a patch on the display array curr_ex = 0 for j in range(1, display_rows + 1): for i in range(1, display_cols + 1): max_val = max(abs(X[curr_ex, :])) row0 = pad + (j - 1) * (example_height + pad) row1 = pad + (j - 1) * (example_height + pad) + example_height col0 = pad + (i - 1) * (example_width + pad) col1 = pad + (i - 1) * (example_width + pad) + example_width display_array[row0:row1, col0:col1] = X[curr_ex, :].reshape( example_height, example_width, order='F') / max_val curr_ex = curr_ex + 1 # Display Image h = io.imshow(display_array, cmap='gray') io.show() return h, display_array
def main(sc, outputDir, outputIndices, filename, replicates, minTime, maxTime, speciesToBin, outputFileNum, skipTime, sparse, interactive): global globalSpeciesToBin, globalSkipTime, globalSparse, globalReplicates, globalMinTime, globalMaxTime # Broadcast the global variables. globalSpeciesToBin = sc.broadcast(speciesToBin) globalReplicates = sc.broadcast(replicates) globalMinTime = sc.broadcast(minTime) globalMaxTime = sc.broadcast(maxTime) globalSkipTime = sc.broadcast(skipTime) globalSparse = sc.broadcast(sparse) # Load the records from the sfile. allRecords = sc.newAPIHadoopFile( filename, "robertslab.hadoop.io.SFileInputFormat", "robertslab.hadoop.io.SFileHeader", "robertslab.hadoop.io.SFileRecord", keyConverter="robertslab.spark.sfile.SFileHeaderToPythonConverter", valueConverter="robertslab.spark.sfile.SFileRecordToPythonConverter") # Bin the species counts records and sum across all of the bins. results = allRecords.filter(filterLatticeTimeSeries).map( binLatticeOccupancy).reduceByKey(addLatticeBins).values().collect() totalTimePoints = results[0][0] bins = results[0][1] bins[:, :, :, :, 0] += totalTimePoints print "Recovered bins from %d total time points" % (totalTimePoints) print bins.shape # Get the file: inputFile = "ltable/grad_bc/yeast_cell/molar/vol_full/data_rdme_Dkp15/cell_modelII_48reps_gradient_0_c1b_1.0e-6_c2b_2.0e-6_c3b_6.0e-5_c0a_5.0e-4_c4_2.0e-4_c5_2.0e-3_c6_2.0e-6_Dk_5.0e-12_Dkp_5.0e-15_Dr_5.0e-12_Drl_5.0e-15.lm" f = h5py.File(inputFile, 'r') print("Processing %s file." % (inputFile)) # Get the membrane sites lattice = f["/Model/Diffusion/LatticeSites"].value MembraneSites = (lattice == 1).astype(int) l, m, n = MembraneSites.nonzero() print "Length of membrane sites = " + str(len(l)) # Calculating the mean: particle = np.zeros((bins.shape[0], bins.shape[1], bins.shape[2], bins.shape[3], bins.shape[4])) data = np.zeros((bins.shape[0], bins.shape[1], bins.shape[2], bins.shape[3], bins.shape[4])) for p in range(0, bins.shape[4]): particle[:, :, :, :, p] = p counts = np.zeros((len(l), 5)) for i in range(0, len(speciesToBin)): for mem in range(0, len(l)): data[i, l[mem], m[mem], n[mem], :] = particle[i, l[mem], m[mem], n[mem], :] * bins[i, l[mem], m[mem], n[mem], :] mean = np.sum(data, axis=4) for mem in range(0, len(l)): counts[mem, 0] = speciesToBin[i] counts[mem, 1] = l[mem] counts[mem, 2] = m[mem] counts[mem, 3] = n[mem] counts[mem, 4] = float(mean[i, l[mem], m[mem], n[mem]]) / float(totalTimePoints) # Save the counts into a .mat file in the output directory named according to the output indices. # cellio.cellsave(outputDir,counts,outputIndices); outputFile = 'counts_event_%s.mat' % (outputFileNum) scipy.io.savemat(outputDir + outputFile, dict(counts=counts)) print("Binned species data into %s" % (outputDir)) # # Save the pdfs into a .mat file in the output directory named according to the output indices. # pdfs=np.zeros((len(speciesToBin),),dtype=object) # for i in range(0,len(speciesToBin)): # counts = sum(data) # pdf=bins[i,:,:,:,:].astype(float)/float(totalTimePoints) # # /float(np.sum(bins[i,0,0,0,:])) # pdfs[i] = pdf # cellio.cellsave(outputDir,pdfs,outputIndices); # print("Binned species data into %s"%(outputDir)) # If interactive, show the pdf. if interactive: subvolumeCounts = bins.sum(axis=1).sum(axis=1).sum(axis=1) for i in range(0, len(speciesToBin)): print "Subvolume distribution for species %d" % (speciesToBin[i]) plt.figure() plt.subplot(1, 1, 1) plt.bar(np.arange(0, subvolumeCounts.shape[1]), np.log10(subvolumeCounts[i, :])) io.show()
def ex7_Kmeans(): # main function #################-1- print('Finding closest centroids.') mat = scipy.io.loadmat('ex7data2.mat') X = mat['X'] K = 3 # 3 Centroids initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) # Find the closest centroids for the examples using the initial_centroids idx = findClosestCentroids(X, initial_centroids) print('Closest centroids for the first 3 examples:', idx[0:3]) print('(the closest centroids should be 1, 3, 2 respectively)') #################-2- print('Computing centroids means.') # Compute means based on the closest centroids found in the previous part. centroids = computeCentroids(X, idx, K) print('Centroids computed after initial finding of closest centroids:', centroids) print('(the centroids should be') print(' [ 2.428301 3.157924 ]\n') print(' [ 5.813503 2.633656 ]\n') print(' [ 7.119387 3.616684 ]\n\n') ################-3- print('Running K-Means clustering on example dataset.') K = 3 max_iters = 10 initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) [centroids, idx] = runkMeans(X, initial_centroids, max_iters, True) print('K-Means Done.') ###############-4- A = io.imread('bird_small.png') A = A / 255 img_size = A.shape X = np.reshape(A, (img_size[0] * img_size[1], 3)) K = 16 max_iters = 10 initial_centroids = kMeansInitCentroids(X, K) [centroids, idx] = runkMeans(X, initial_centroids, max_iters, False) io.imshow(A) ##############-5- print('Applying K-Means to compress an image.') #print('centroids:',centroids) idx1 = findClosestCentroids(X, centroids) #print('idx1:',idx1[0:10]) #print('idx1 sh:',idx1.shape) X_recovered = [ ] #np.zeros(shape=(idx1.shape[0])) # 16384 e 3 olması lazım for i in range(0, idx1.shape[0]): # 16384 X_recovered.append(centroids[int(idx1[i][0])]) X_recovered = np.reshape(X_recovered, (img_size[0], img_size[1], 3)) # Display the original image io.imshow(A) #imagesc(A); #io.title("Original") io.show() # Display compressed image side by side #io.title('Compressed, with',K,'colors.') io.imshow(X_recovered) #imagesc(X_recovered) io.show() ###############-6- out of homework. Doing same job by python class print('Image compression by K-means (sklearn) python class..') kmeans = KMeans(init="random", n_clusters=K, n_init=10, max_iter=max_iters, random_state=42) kmeans.fit(X) # X : pixels of image. print('kmeans.inertia_:', kmeans.inertia_) print( 'kmeans.cluster_centers_:', kmeans.cluster_centers_) # this is same with centroids variable on up. print('kmeans.n_iter_:', kmeans.n_iter_) print('kmeans.labels_[:5]', kmeans.labels_[:10]) print('kmeans.labels_:', kmeans.labels_.shape) y_kmeans = kmeans.predict(X) # y_kmeans is same with idx1 variable on up. print('y_kmeans:', y_kmeans.shape) print('y_kmeans 0-10:', y_kmeans[0:10]) X_recovered2 = [] for i in range(0, y_kmeans.shape[0]): # 16384 X_recovered2.append(kmeans.cluster_centers_[int(y_kmeans[i])]) X_recovered2 = np.reshape(X_recovered2, (img_size[0], img_size[1], 3)) io.imshow(X_recovered2) io.show()
def main(sc, outputDir, outputIndices, filename, replicates, trajectory, speciesToBin, outputFileNum, skipTime, sparse, interactive): global globalSpeciesToBin, globalSkipTime, globalSparse, globalReplicates, globalTrajectory # Broadcast the global variables. globalSpeciesToBin=sc.broadcast(speciesToBin) globalReplicates = sc.broadcast(replicates) globalTrajectory = sc.broadcast(trajectory) globalSkipTime=sc.broadcast(skipTime) globalSparse=sc.broadcast(sparse) # Load the records from the sfile. allRecords = sc.newAPIHadoopFile(filename, "robertslab.hadoop.io.SFileInputFormat", "robertslab.hadoop.io.SFileHeader", "robertslab.hadoop.io.SFileRecord", keyConverter="robertslab.spark.sfile.SFileHeaderToPythonConverter", valueConverter="robertslab.spark.sfile.SFileRecordToPythonConverter") # Bin the species counts records and sum across all of the bins. results = allRecords.filter(filterLatticeTimeSeries).map(binLatticeOccupancy).reduceByKey(addLatticeBins).values().collect() totalTimePoints=results[0][0] bins=results[0][1] # bins[:,:,:,:,0] += totalTimePoints print "Recovered bins from %d total time points"%(totalTimePoints) print bins.shape # # Get the file: # inputFile = "../../ltable/grad_bc/yeast_cell/molar/vol_full/data_rdme_Dkp15/cell_modelII_48reps_gradient_0_c1b_1.0e-6_c2b_2.0e-6_c3b_6.0e-5_c0a_5.0e-4_c4_2.0e-4_c5_2.0e-3_c6_2.0e-6_Dk_5.0e-12_Dkp_5.0e-15_Dr_5.0e-12_Drl_5.0e-15.lm" # f = h5py.File(inputFile,'r') # print ("Processing %s file."%(inputFile)) # # # Get the membrane sites # lattice = f["/Model/Diffusion/LatticeSites"].value # MembraneSites = (lattice==1).astype(int) # l, m, n = MembraneSites.nonzero() # print "Length of membrane sites = " + str(len(l)) # # # Calculating the mean: # particle = np.zeros((bins.shape[0],bins.shape[1],bins.shape[2],bins.shape[3],bins.shape[4])) # data = np.zeros((bins.shape[0],bins.shape[1],bins.shape[2],bins.shape[3],bins.shape[4])) # for p in range(0,bins.shape[4]): # particle[:,:,:,:,p] = p # # counts = np.zeros((len(l),5)) # for i in range(0,len(speciesToBin)): # for mem in range(0,len(l)): # data[i,l[mem],m[mem],n[mem],:] = particle[i,l[mem],m[mem],n[mem],:]*bins[i,l[mem],m[mem],n[mem],:] # mean = np.sum(data,axis=4) # for mem in range(0,len(l)): # counts[mem,0] = speciesToBin[i] # counts[mem,1] = l[mem] # counts[mem,2] = m[mem] # counts[mem,3] = n[mem] # counts[mem,4] = float(mean[i,l[mem],m[mem],n[mem]])/float(totalTimePoints) # Save the counts into a .mat file in the output directory named according to the output indices. # cellio.cellsave(outputDir,counts,outputIndices); outputFile = 'traj_%s_%s_%s.p'%(trajectory[0],trajectory[-1],outputFileNum) pickle.dump(results, open(outputDir+outputFile, "wb")) # scipy.io.savemat(outputDir+outputFile, dict(bins=bins)) print("Binned species data into %s"%(outputDir)) # # Save the pdfs into a .mat file in the output directory named according to the output indices. # pdfs=np.zeros((len(speciesToBin),),dtype=object) # for i in range(0,len(speciesToBin)): # counts = sum(data) # pdf=bins[i,:,:,:,:].astype(float)/float(totalTimePoints) # # /float(np.sum(bins[i,0,0,0,:])) # pdfs[i] = pdf # cellio.cellsave(outputDir,pdfs,outputIndices); # print("Binned species data into %s"%(outputDir)) # If interactive, show the pdf. if interactive: subvolumeCounts=bins.sum(axis=1).sum(axis=1).sum(axis=1) for i in range(0,len(speciesToBin)): print "Subvolume distribution for species %d"%(speciesToBin[i]) plt.figure() plt.subplot(1,1,1) plt.bar(np.arange(0,subvolumeCounts.shape[1]),np.log10(subvolumeCounts[i,:])) io.show()
# ploted the points along with the centroid coordinates of each cluster to see how the centroid positions effects clustering print "\nCentroid position " plt.scatter(X[:, 0], X[:, 1], c=kmeans.labels_, cmap='rainbow') plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], color='black') plt.title('kmeans_cluster_centers') plt.show() print "\nDone with Apply K-Means classifier on ex6data1.mat\n" print('\nRunning K-Means clustering on pixels from an image.\n\n') image = io.imread('bird_small.png') io.imshow(image) plt.title('original_bird_small_image') io.show() rows = image.shape[0] cols = image.shape[1] image = image.reshape(image.shape[0] * image.shape[1], 3) # kmeans algorithms with with 16 colors and max iter 10 kmeans = KMeans(n_clusters=128, n_init=10, max_iter=10) kmeans.fit(image) clusters = np.asarray(kmeans.cluster_centers_, dtype=np.uint8) labels = np.asarray(kmeans.labels_, dtype=np.uint8) labels = labels.reshape(rows, cols) # saving in standard binary file format np.save('codebook_tiger.npy', clusters)