def anaPsf(xaxis,yaxis,data): # centroids xCentroid = (xaxis*data).sum()/data.sum() yCentroid = (yaxis*data).sum()/data.sum() # radial sum -- around Centroid raxis = numpy.sqrt((xaxis-xCentroid)*(xaxis-xCentroid)+(yaxis-yCentroid)*(yaxis-yCentroid)) # histogram nsumbin = 1000 npix,bin_edges = scipy.histogram(raxis,nsumbin,(0.,100.)) rsumpix,bin_edges = scipy.histogram(raxis,nsumbin,(0.,100.),weights=data) # calculate ee80 rsumpixNorm = rsumpix/rsumpix.sum() rcumsum = numpy.cumsum(rsumpixNorm) # at this point rcumsum[0] is = rsumpixNorm[0], rsumsum[1] to the sum of rsumpixnorm[0] and [1] etc. # so rcumsum[0] is the integral for r<bin_edges[1] and in general rcumsum[i] is the integral of r<bin_edges[i+1] # thus icumsum gives the appropriate limits for each rcumsum bin # icumsum = bin_edges[1:nsumbin+1] ee80 = numpy.interp(0.8,rcumsum,icumsum) # calculate polarization (w/o seeing, CCD diffusion) norm = data.sum() qxx = ( (xaxis-xCentroid)*(xaxis-xCentroid)*data ).sum() / norm qyy = ( (yaxis-yCentroid)*(yaxis-yCentroid)*data ).sum() / norm qyx = ( (yaxis-yCentroid)*(xaxis-xCentroid)*data ).sum() / norm e1 = (qxx-qyy)/(qxx+qyy) e2 = 2.0*qyx/(qxx+qyy) return ee80,qxx,qyy,qyx,e1,e2
def generate_scipy_comparison(csvPathname): # this is some hack code for reading the csv and doing some percentile stuff in scipy from numpy import loadtxt, genfromtxt, savetxt dataset = loadtxt( open(csvPathname, 'r'), delimiter=',', dtype='float64'); print "csv read for training, done" # we're going to strip just the last column for percentile work # used below NUMCLASSES = 10 print "csv read for training, done" # data is last column # drop the output print dataset.shape from scipy import histogram import numpy if 1==1: print "histogram of dataset" print histogram(dataset,bins=NUMCLASSES) print numpy.mean(dataset, axis=0, dtype=numpy.float64) print numpy.std(dataset, axis=0, dtype=numpy.float64, ddof=0) print numpy.std(dataset, axis=0, dtype=numpy.float64, ddof=1) from scipy import stats
def genMCHistogramsOpenCL(distribution, rng, iterations=100, numBins=1000): # get OpenCL CPU devices openCLDevices = [device for device in clsim.I3CLSimOpenCLDevice.GetAllDevices() if device.cpu] if len(openCLDevices)==0: raise RuntimeError("No CPU OpenCL devices available!") openCLDevice = openCLDevices[0] openCLDevice.useNativeMath=False workgroupSize = 1 workItemsPerIteration = 10240 print(" using platform:", openCLDevice.platform) print(" using device:", openCLDevice.device) print(" workgroupSize:", workgroupSize) print(" workItemsPerIteration:", workItemsPerIteration) tester = clsim.I3CLSimRandomDistributionTester(device=openCLDevice, workgroupSize=workgroupSize, workItemsPerIteration=workItemsPerIteration, randomService=rng, randomDistribution=distribution) print("maxWorkgroupSizeForKernel:", tester.maxWorkgroupSize) angles = tester.GenerateRandomNumbers(iterations) samples = len(angles) print("generated") angles = numpy.array(angles) # convert to numpy array print("converted") numAng_orig, binsAng = scipy.histogram(numpy.arccos(angles)*(180./math.pi), range=(0.,180.), bins=numBins) print("hist1 complete") numCos_orig, binsCos = scipy.histogram(angles, range=(-1.,1.), bins=numBins) print("hist2 complete") del angles # not needed anymore print("deleted") numAng=[] for i, number in enumerate(numAng_orig): binWidth = math.cos(binsAng[i]*math.pi/180.) - math.cos(binsAng[i+1]*math.pi/180.) numAng.append(float(number)/float(samples)/binWidth) numAng=numpy.array(numAng) numCos=[] for i, number in enumerate(numCos_orig): numCos.append(float(number)/float(samples)/float(2./float(numBins))) numCos=numpy.array(numCos) binsAng = numpy.array(binsAng[:-1])+(binsAng[1]-binsAng[0])/2. binsCos = numpy.array(binsCos[:-1])+(binsCos[1]-binsCos[0])/2. return dict(cos=dict(num=numCos, bins=binsCos), ang=dict(num=numAng, bins=binsAng))
def determine_dominant_color_in_image(self, image): NUM_CLUSTERS = 5 ar = scipy.misc.fromimage(image) shape = ar.shape if len(shape) > 2: ar = ar.reshape(scipy.product(shape[:2]), shape[2]) codes, _ = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS) # print "Before: %s" % codes original_codes = codes for low, hi in [(60, 200), (35, 230), (10, 250)]: codes = scipy.array([code for code in codes if not ((code[0] < low and code[1] < low and code[2] < low) or (code[0] > hi and code[1] > hi and code[2] > hi))]) if not len(codes): codes = original_codes else: break # print "After: %s" % codes vecs, _ = scipy.cluster.vq.vq(ar, codes) # assign codes counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences # colors = [''.join(chr(c) for c in code).encode('hex') for code in codes] # total = scipy.sum(counts) # print dict(zip(colors, [count/float(total) for count in counts])) index_max = scipy.argmax(counts) # find most frequent peak = codes[index_max] color = ''.join(chr(c) for c in peak).encode('hex') # print 'most frequent is %s (#%s)' % (peak, color) return color[:6]
def include_image_level_features(orig_imga,fvector_l,featsel): # include grayscale values ? f_input_gray = featsel['input_gray'] if f_input_gray is not None: shape = f_input_gray #print orig_imga.shape fvector_l += [sp.misc.imresize(colorconv.gray_convert(orig_imga), shape).ravel()] # include color histograms ? f_input_colorhists = featsel['input_colorhists'] if f_input_colorhists is not None: nbins = f_input_colorhists colorhists = sp.empty((3,nbins), 'f') if orig_imga.ndim == 3: for d in xrange(3): h = sp.histogram(orig_imga[:,:,d].ravel(), bins=nbins, range=[0,255]) binvals = h[0].astype('f') colorhists[d] = binvals else: raise ValueError, "orig_imga.ndim == 3" #h = sp.histogram(orig_imga[:,:].ravel(), # bins=nbins, # range=[0,255]) #binvals = h[0].astype('f') #colorhists[:] = binvals #feat_l += [colorhists.ravel()] fvector_l += [colorhists.ravel()] return fvector_l
def blobs(shape, porosity, blobiness=8): """ Generates an image containing amorphous blobs Parameters ---------- shape : list The size of the image to generate in [Nx, Ny, Nz] where N is the number of voxels blobiness : scalar Controls the morphology of the image. A higher number results in a larger number of smaller blobs. porosity : scalar The porosity of the final image. This number is approximated by the method so the returned result may not have exactly the specified value. """ if sp.size(shape) == 1: shape = sp.full((3, ), int(shape)) [Nx, Ny, Nz] = shape sigma = sp.mean(shape)/(4*blobiness) mask = sp.rand(Nx, Ny, Nz) mask = spim.gaussian_filter(mask, sigma=sigma) hist = sp.histogram(mask, bins=1000) cdf = sp.cumsum(hist[0])/sp.size(mask) xN = sp.where(cdf >= porosity)[0][0] im = mask <= hist[1][xN] return im
def _computeEntropy(self): ''' Compute the entropy of the histogram of distances from the pivot element. Low entropy scores means the distribution is concentrated, and thus may be a good candidate for splitting. High entropy (at limit, a uniform distribution), may indicate that there is no good separation of the elements in this node. ''' assert(self.Pivot != None) assert(self.Ds != None) assert(len(self.Ds) >= self.Tau) assert(len(self.Ds) == len(self.items)) #create a list of distances not including the sample which was selected as the pivot #...which will have a distance of zero, within numerical errors. Dx = [D for D in self.Ds if D>0.01] #compute histogram using 10 bins of the Dx list HistInfo = scipy.histogram(Dx, bins=10) pk = scipy.array( HistInfo[0] ) epsilon = 0.000001 H = entropy(pk+epsilon) #avoids log0 warnings #print "Histogram: ", HistInfo[0] #print "Entropy: %f"%H #print "Range: Min(>0)=%f, Max=%f, Mean=%f, Median=%f"%(min(Dx),max(self.Ds),scipy.mean(self.Ds),scipy.median(self.Ds)) return H
def tag_images_with_color_value(NUM_CLUSTERS = 4, INPUT_FOLDER = './data/covers/'): isbn = list() cover_color = list() files = os.listdir(INPUT_FOLDER) for eachFile in files: print eachFile im = Image.open(INPUT_FOLDER + eachFile) im = im.resize((50, 50)) # optional, to reduce time ar = scipy.misc.fromimage(im) shape = ar.shape print len(shape) if len(shape) == 2: ar = ar.reshape(scipy.product(shape[:1]), shape[1]) else: ar = ar.reshape(scipy.product(shape[:2]), shape[2]) # finding clusters codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS) # cluster centres:\n', codes vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences index_max = scipy.argmax(counts) # find most frequent peak = codes[index_max] colour = ''.join(chr(c) for c in peak).encode('hex') isbn.append(eachFile[:-4]) cover_color.append(colour) result = zip(isbn, cover_color) return result
def getPredominantColor(filename): im = Image.open(filename).convert('RGB') # Convert to numpy array ar = scipy.misc.fromimage(im) # Get dimensions shape = ar.shape # Convert to bidimensional array of width x height rows and 3 columns (RGB) ar = ar.reshape(scipy.product(shape[:2]), shape[2]) # Find cluster centers and their distortions # codes contains the RGB value of the centers codes, dist = scipy.cluster.vq.kmeans(ar.astype(float), NUM_CLUSTERS) # Maps all the pixels in the image to their respective centers vecs, dist = scipy.cluster.vq.vq(ar, codes) # Counts the occurances of each color (NUM_CLUSTER different colors after the mapping) counts, bins = scipy.histogram(vecs, len(codes)) # Find most frequent color index_max = scipy.argmax(counts) peak = codes[index_max] return peak.astype(int)
def makehistmult(testpathlist,npulseslist): sns.set_style("whitegrid") sns.set_context("notebook") params = ['Ne','Te','Ti','Vi'] paramsLT = ['N_e','T_e','T_i','V_i'] errdictlist =[ makehistdata(params,itest)[0] for itest in testpathlist] (figmplf, axmat) = plt.subplots(2, 2,figsize=(12,8), facecolor='w') axvec = axmat.flatten() histlims = [[4e10,2e11],[1200.,3000.],[300.,1900.],[-250.,250.]] histvecs = [sp.linspace(ipm[0],ipm[1],100) for ipm in histlims] linehand = [] lablist= ['J = {:d}'.format(i) for i in npulseslist] for iax,iparam in enumerate(params): for idict,inpulse in zip(errdictlist,npulseslist): curvals = idict[iparam] curhist,binout = sp.histogram(curvals,bins=histvecs[iax]) dx=binout[1]-binout[0] curhist_norm = curhist.astype(float)/(curvals.size*dx) plthand = axvec[iax].plot(binout[:-1],curhist_norm,label='J = {:d}'.format(inpulse))[0] linehand.append(plthand) axvec[iax].set_xlabel(r'$'+paramsLT[iax]+'$') axvec[iax].set_title(r'Histogram for $'+paramsLT[iax]+'$') leg = figmplf.legend(linehand[:len(npulseslist)],lablist) plt.tight_layout() plt.subplots_adjust(top=0.9) spti = figmplf.suptitle('Parameter Distributions',fontsize=18) return (figmplf,axvec,linehand)
def find_a_dominant_color(image): # K-mean clustering to find the k most dominant color, from: # http://stackoverflow.com/questions/3241929/python-find-dominant-most-common-color-in-an-image n_clusters = 5 # Get image into a workable form im = image.copy() im = im.resize((150, 150)) # optional, to reduce time ar = scipy.misc.fromimage(im) im_shape = ar.shape ar = ar.reshape(scipy.product(im_shape[:2]), im_shape[2]) ar = np.float_(ar) # Compute clusters codes, dist = scipy.cluster.vq.kmeans(ar, n_clusters) vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences # Get the indexes of the most frequent, 2nd most frequent, 3rd, ... sorted_idxs = np.argsort(counts) # Get the color peak = codes[sorted_idxs[1]] # get second most frequent color return [int(i) for i in peak.tolist()] # list comprehension to quickly cast everything to int
def reduce_colors(image, k): '''Apply kmeans algorithm. Input: image, number of clusters to use Returns: colors, counts per color, new image ''' if k > 32: print "Setting colors to maximum allowed of 32" k = 32 rows, cols, rgb = image.shape # reshape the image in a single row array of RGB pixels image_row = np.reshape(image,(rows * cols, 3)) #HERE ADD CODE TO GET A GOOD GUESS OF COLORS AND PASS THAT AS #SECOND ARGUMENT TO kmeans #image_array_sample = shuffle(image_row, random_state=0)[:1000] #kguess = kmeans(image_array_sample, k) #colors,_ = kmeans(image_row, kguess) # perform the clustering colors,_ = kmeans(image_row, k) # vector quantization, assign to each pixel the index of the nearest centroid (i=1..k) qnt,_ = vq(image_row,colors) # reshape the qnt vector to the original image shape image_centers_id = np.reshape(qnt,(rows, cols)) # assign the color value to each pixel newimage = colors[image_centers_id] #count number of pixels of each cluster color counts,bins = sp.histogram(qnt, len(colors)) return colors, counts, newimage
def degree_distrib(net, deg_type="total", node_list=None, use_weights=True, log=False, num_bins=30): ''' Computing the degree distribution of a network. Parameters ---------- net : :class:`~nngt.Graph` or subclass the network to analyze. deg_type : string, optional (default: "total") type of degree to consider ("in", "out", or "total"). node_list : list or numpy.array of ints, optional (default: None) Restrict the distribution to a set of nodes (default: all nodes). use_weights : bool, optional (default: True) use weighted degrees (do not take the sign into account: all weights are positive). log : bool, optional (default: False) use log-spaced bins. Returns ------- counts : :class:`numpy.array` number of nodes in each bin deg : :class:`numpy.array` bins ''' ia_node_deg = net.get_degrees(node_list, deg_type, use_weights) ra_bins = sp.linspace(ia_node_deg.min(), ia_node_deg.max(), num_bins) if log: ra_bins = sp.logspace(sp.log10(sp.maximum(ia_node_deg.min(),1)), sp.log10(ia_node_deg.max()), num_bins) counts,deg = sp.histogram(ia_node_deg, ra_bins) ia_indices = sp.argwhere(counts) return counts[ia_indices], deg[ia_indices]
def getImageDescriptor(model, im, conf): im = standardizeImage(im) height, width = im.shape[:2] numWords = model.vocab.shape[1] frames, descrs = getPhowFeatures(im, conf.phowOpts) # quantize appearance if model.quantizer == 'vq': binsa, _ = vq(descrs.T, model.vocab.T) elif model.quantizer == 'kdtree': raise ValueError('quantizer kdtree not implemented') else: raise ValueError('quantizer {0} not known or understood'.format(model.quantizer)) hist = [] for n_spatial_bins_x, n_spatial_bins_y in zip(model.numSpatialX, model.numSpatialX): binsx, distsx = vq(frames[0, :], linspace(0, width, n_spatial_bins_x)) binsy, distsy = vq(frames[1, :], linspace(0, height, n_spatial_bins_y)) # binsx and binsy list to what spatial bin each feature point belongs to if (numpy.any(distsx < 0)) | (numpy.any(distsx > (width/n_spatial_bins_x+0.5))): print ("something went wrong") import pdb; pdb.set_trace() if (numpy.any(distsy < 0)) | (numpy.any(distsy > (height/n_spatial_bins_y+0.5))): print ("something went wrong") import pdb; pdb.set_trace() # combined quantization number_of_bins = n_spatial_bins_x * n_spatial_bins_y * numWords temp = arange(number_of_bins) # update using this: http://stackoverflow.com/questions/15230179/how-to-get-the-linear-index-for-a-numpy-array-sub2ind temp = temp.reshape([n_spatial_bins_x, n_spatial_bins_y, numWords]) bin_comb = temp[binsx, binsy, binsa] hist_temp, _ = histogram(bin_comb, bins=range(number_of_bins+1), density=True) hist.append(hist_temp) hist = hstack(hist) hist = array(hist, 'float32') / sum(hist) return hist
def getDominantColor(img_url): if r.exists(img_url): cache_result = r.hmget(img_url, ['r', 'g', 'b']) return cache_result NUM_CLUSTERS = 5 im = Image.open(StringIO.StringIO(urllib2.urlopen(img_url).read())) img_arr = scipy.misc.fromimage(im) img_shape = img_arr.shape if len(img_shape) > 2: img_arr = img_arr.reshape(scipy.product(img_shape[:2]), img_shape[2]) codes, _ = scipy.cluster.vq.kmeans(img_arr, NUM_CLUSTERS) original_codes = codes for low, hi in [(60, 200), (35, 230), (10, 250)]: codes = scipy.array([code for code in codes if not (all([c < low for c in code]) or all([c > hi for c in code]))]) if not len(codes): codes = original_codes else: break vecs, _ = scipy.cluster.vq.vq(img_arr, codes) counts, bins = scipy.histogram(vecs, len(codes)) index_max = scipy.argmax(counts) peak = codes[index_max] color = [int(c) for c in peak[:3]] r.hmset(img_url, {'r':color[0], 'g':color[1], 'b':color[2]}) #r.expire(img_url, 86400) return color
def cluster_colors(image_url, num_clusters=5): """ Return the most clustered colors of an image. Use scipy's k-means clustering algorithm. """ print 'Reading image...' response = requests.get(image_url) im = Image.open(StringIO(response.content)) im = im.resize((150, 150)) # optional, to reduce time ar = scipy.misc.fromimage(im) shape = ar.shape ar = ar.reshape(scipy.product(shape[:2]), shape[2]) ar = ar.astype(float) print 'Finding clusters...' # k-means clustering codes, dist = scipy.cluster.vq.kmeans(ar, num_clusters) vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences sorted_index = sorted(range(len(counts)), key=lambda index: counts[index], reverse=True) most_common_colors = [] for index in sorted_index: peak = codes[index] peak = peak.astype(int) colour = ''.join(format(c, '02x') for c in peak) most_common_colors.append('#' + colour) return most_common_colors
def genMCHistogramsHost(distribution, hist_range, iterations=10000000, numBins=1000): print("generating (host)") values = [] for i in range(iterations): values.append(distribution.SampleFromDistribution(rng, [])) samples = len(values) print("generated (host)") values = numpy.array(values)/I3Units.nanometer # convert to numpy array and convert units print("converted (host)") range_width=hist_range[1]-hist_range[0] num_orig, bins = scipy.histogram(values, range=hist_range, bins=numBins) print("hist1 complete (host)") del values # not needed anymore print("deleted (host)") num=[] for number in num_orig: num.append(float(number)/float(samples)/float(range_width/float(numBins))) num=numpy.array(num) bins = numpy.array(bins[:-1])+(bins[1]-bins[0])/2. return dict(num=num, bins=bins)
def genMCHistogramsOpenCL(distribution, range, iterations=1000, numBins=1000): tester = clsim.I3CLSimRandomDistributionTester(device=openCLDevice, workgroupSize=workgroupSize, workItemsPerIteration=workItemsPerIteration, randomService=rng, randomDistribution=distribution) values = tester.GenerateRandomNumbers(iterations) samples = len(values) print("generated") values = numpy.array(values)/I3Units.nanometer # convert to numpy array and convert units print("converted") range_width=range[1]-range[0] num_orig, bins = scipy.histogram(values, range=range, bins=numBins) print("hist1 complete") del values # not needed anymore print("deleted") num=[] for number in num_orig: num.append(float(number)/float(samples)/float(range_width/float(numBins))) num=numpy.array(num) bins = numpy.array(bins[:-1])+(bins[1]-bins[0])/2. return dict(num=num, bins=bins)
def get_dominant_color(image_path): ''' Parse image and return dominant color in image. @param image_path: Image path to parse. @return: Return dominant color, format as hexadecimal number. ''' # print 'reading image' im = Image.open(image_path) im = im.resize((150, 150)) # optional, to reduce time ar = scipy.misc.fromimage(im) shape = ar.shape ar = ar.reshape(scipy.product(shape[:2]), shape[2]) # print 'finding clusters' NUM_CLUSTERS = 5 codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS) # print 'cluster centres:\n', codes vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences index_max = scipy.argmax(counts) # find most frequent peak = codes[index_max] colour = ''.join(chr(c) for c in peak).encode('hex') # print 'most frequent is %s (#%s)' % (peak, colour) return "#%s" % (colour[0:6])
def hist(fcms, index, savefile=None, display=True, **kwargs): """Plot overlay histogram. fcms is a list of FCMData objects/arrays index is channel to plot """ figure = pylab.figure() for fcm in fcms: if isinstance(index, str): index = fcm.name_to_index(index) y = fcm[:, index] h, b = histogram(y, bins=200, **kwargs) b = (b[:-1] + b[1:]) / 2.0 unused_x = pylab.linspace(min(y), max(y), 100) pylab.plot(b, h, label=fcm.name) pylab.legend() pylab.xlabel(fcms[0].channels[index]) if display: pylab.show() if savefile: pylab.savefig(savefile) return figure
def test_Intensity_1(self): """Test a case of distributed intensity values.""" # Create label image with only one region label_image = scipy.zeros(2*2*2, dtype=scipy.int8).reshape(2,2,2) # Create original image with two equally distibuted intensity value original_image = scipy.zeros(2*2*2, dtype=scipy.int8) original_image[:4] = -1 original_image[4:] = 1 original_image = original_image.reshape(2,2,2) # Initialize object statistics = LabelImageStatistics(label_image, original_image) # Computed expected result i = scipy.array([-1,-1,-1,-1,1,1,1,1]) h = scipy.histogram(i, statistics._intensity_distribution_local_histogram_width) hr = scipy.array(h[0]) / float(h[0].sum()) g = stats.norm(*stats.norm.fit(i)) r = abs(hr - g.pdf(h[1][:-1])) r *= h[1][-2] - h[1][0] r = r.sum() # Check created intensity distribution intensity_distributions = statistics.get_intensity_distributions() self.assertEqual(len(intensity_distributions), 1) self.assertEqual(intensity_distributions[0], i.std()) intensity_distribution_histogram = statistics.get_intensity_distribution_histogram() self.assertEqual(intensity_distribution_histogram[0][statistics.get_intensity_distribution_histogram_width()/2], 1) self.assertEqual(intensity_distribution_histogram[0].max(), 1) self.assertEqual(intensity_distribution_histogram[0].min(), 0) self.assertEqual(intensity_distribution_histogram[1].mean(), i.std())
def getDomIMAGEColor( imName ): # Reference: # http://stackoverflow.com/questions/3241929/ # python-find-dominant-most-common-color-in-an-image # number of k-means clusters NUM_CLUSTERS = 4 # Open target image im = imName im = im.resize((150, 150)) # optional, to reduce time ar = scipy.misc.fromimage(im) shape = ar.shape ar = ar.reshape(scipy.product(shape[:2]), shape[2]) ar = ar.astype(float) # Find clusters codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS) vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences # Find most frequent index_max = scipy.argmax(counts) peak = codes[index_max] color = ''.join(chr(int(c)) for c in peak).encode('hex') return (peak, color)
def create_histogram(parameter_name, nbins=100, writeFile=True, skipfirst=0, truncate=False, smooth=False): """ Returns a histogram and some statistics about this parameter. @param writeFile: if true, write the histogram to paramname.histogram """ f = "%s-chain-0.prob.dump" % parameter_name values = numpy.recfromtxt(f)[skipfirst::nevery] statistics = { 'min': float(values.min()), 'max': float(values.max()), 'stdev': float(values.std()), 'mean': float(values.mean()), 'median':float(numpy.median(values)), 'q1': float(scipy.stats.scoreatpercentile(values, 25)), 'q3': float(scipy.stats.scoreatpercentile(values, 75)), 'p5': float(scipy.stats.scoreatpercentile(values, 5)), 'p95': float(scipy.stats.scoreatpercentile(values, 95)), } hist = scipy.histogram(values, bins = nbins if not smooth else nbins*10, normed=True) histwithborders = numpy.dstack([hist[1][0:nbins], hist[1][1:nbins+1], hist[0]]) if writeFile: scipy.savetxt('%s.histogram' % parameter_name, histwithborders[0], delimiter="\t") return histwithborders[0], statistics
def generate_scipy_comparison(csvPathname): # this is some hack code for reading the csv and doing some percentile stuff in scipy # from numpy import loadtxt, genfromtxt, savetxt import numpy as np import scipy as sp dataset = np.genfromtxt( open(csvPathname, 'r'), delimiter=',', skip_header=1, dtype=None); # guess! print "csv read for training, done" # we're going to strip just the last column for percentile work # used below NUMCLASSES = 10 print "csv read for training, done" # data is last column # drop the output print dataset.shape target = [x[1] for x in dataset] # we may have read it in as a string. coerce to number targetFP = np.array(target, np.float) if 1==0: n_features = len(dataset[0]) - 1; print "n_features:", n_features # get the end # target = [x[-1] for x in dataset] # get the 2nd col print "histogram of target" print target print sp.histogram(target, bins=NUMCLASSES) print target[0] print target[1] # per = [100 * t for t in thresholds] per = [1 * t for t in thresholds] print "sp per:", per from scipy import stats # a = stats.scoreatpercentile(target, per=per) a = stats.mstats.mquantiles(targetFP, prob=per) print "sp percentiles:", a
def abs_n_ent_diff(v1,v2,nbins=10): """ Absolute normalized entropy difference, between v1 and v2 (not symmetric!) Quantized the values in 100 bins, measure entropy of each of those bins, look at the difference of entropy between the two distributions for each of those bins. nbins - quantization level between 0 and 1 """ assert v1.size == v2.size,'v1 and v2 different sizes' edges = np.array(range(0,nbins+1),'float') / nbins h1 = histogram(v1.flatten(),edges)[0] + 2 h2 = histogram(v2.flatten(),edges)[0] + 2 ents1 = -np.log2(h1*1./h1.sum()) ents2 = -np.log2(h2*1./h2.sum()) return np.abs(((ents1 - ents2)/ents1)).mean()
def histeq(im,nbr_bins=256): """ Histogram equalization of a grayscale image. """ # get image histogram imhist,bins = sp.histogram(im.flatten(),nbr_bins,normed=True) cdf = imhist.cumsum() # cumulative distribution function cdf = 255 * cdf / cdf[-1] # normalize # use linear interpolation of cdf to find new pixel values im2 = interp(im.flatten(),bins[:-1],cdf) return im2.reshape(im.shape), cdf
def calculateThreshold(image, coveragePercent): import scipy data = image.data histogram = scipy.histogram(data, len(scipy.unique(data))) cumsum = scipy.cumsum(histogram[0]) targetValue = cumsum[-1] * coveragePercent index = scipy.argmin(scipy.absolute(cumsum - targetValue)) threshold = histogram[1][index] return threshold * image.unit
def get_size_histogram(self): """ Gives the region size distribution. @return: A histogram created from the normalized region sizes with scipy.histogram. @note: The width and therefore the number of distinct values of the histogram can be set with @see: LabelImageStatistics.set_size_histogram_width. """ return scipy.histogram(self._sizes.values(), self._size_histogram_width)
def nonna_select_data(data, outlier_threshold, level='high'): """ This function returns a list of indexed after identifying the main outliers. It applies a cut on the data to remove exactly a fraction (1-outlier_threshold) of all data points. By default the cut is applied only at the higher end of the data values, but the parameter level can be used to change this Input arguments: data = vector containing all data points outlier_threshold = remove outliers until we are left with exactly this fraction of the original data level = 'high|low|both' determines if the outliers are removed only from the high values end, the low values end of both ends. Output: idx = index of selected (good) data """ # histogram all the data values n,x = scipy.histogram(data, len(data)/10) # compute the cumulative distribution and normalize nn = scipy.cumsum(n) nn = nn / float(max(nn)) if level=='high': # select the value such that a fraction outlier_threshold of the data lies below it if outlier_threshold < 1: val = x[pylab.find(nn/float(max(nn)) >= outlier_threshold)[0]] else: val = max(data) # use that fraction of data only idx = data <= val elif level=='low': # select the value such that a fraction outlier_threshold of the data lies above it if outlier_threshold < 1: val = x[pylab.find(nn/float(max(nn)) <= (1-outlier_threshold))[-1]] else: val = min(data) # use that fraction of data only idx = data >= val elif level=='both': # select the value such that a fraction outlier_threshold/2 of the data lies below it if outlier_threshold < 1: Hval = x[pylab.find(nn/float(max(nn)) >= 1-(1-outlier_threshold)/2)[0]] else: Hval = max(data) # select the value such that a fraction outlier_threshold/2 of the data lies above it if outlier_threshold < 1: Lval = x[pylab.find(nn/float(max(nn)) <= (1-outlier_threshold)/2)[-1]] else: Lval = min(data) # use that fraction of data only idx = scipy.logical_and(data >= Lval, data <= Hval) return idx
def _bin_single_spike_train(train, bins): """ Return a binned representation of SpikeTrain object. :param train: A spike train to bin. :type train: :class:`neo.core.SpikeTrain` :param bins: The bin edges, including the rightmost edge, with time units. :type bins: Quantity 1D :returns: The binned spike train. :rtype: 1-D array """ return sp.histogram(train.rescale(bins.units), bins)[0]
def chord_length_distribution(im, bins=25, log=False): r""" Determines the distribution of chord lengths in a image containing chords. Parameters ---------- im : ND-image An image with chords drawn in the pore space, as produced by ``apply_chords`` or ``apply_chords_3d``. bins : scalar or array_like If a scalar is given it is interpreted as the number of bins to use, and if an array is given they are used as the bins directly. log : Boolean If true, the logarithm of the chord lengths will be used, which can make the data more clear. Returns ------- A tuple containing the ``chord_length_bins``, and four separate pieces of information: ``cumulative_chord_count`` and ``cumulative_chord_length``, as well as the ``differenial_chord_count`` and ``differential_chord_length``. """ h = chord_length_counts(im) if log: h = sp.log10(h) y_num, x = sp.histogram(h, bins=bins, density=True) y_len, x = sp.histogram(h, bins=bins, weights=h, density=True) y_num_cum = sp.cumsum((y_num*(x[1:]-x[:-1]))[::-1])[::-1] y_len_cum = sp.cumsum((y_len*(x[1:]-x[:-1]))[::-1])[::-1] data = namedtuple('chord_distribution', ('chord_length_bins', 'cumulative_chord_count', 'cumulative_chord_length', 'differential_chord_count', 'differential_chord_length')) return data(x[:-1], y_num_cum, y_len_cum, y_num, y_len)
def Mclast(): fig = plt.figure(1, figsize=(6,9)) gs = gridspec.GridSpec(2,1,height_ratios=[4,1]) ax = plt.subplot(gs[0]) mvir, mclast, mratio = ioformat.rcol(fwind, [1,5,14], linestart=1) print("Mvir Range: ", min(mvir), max(mvir)) x, y = [], [] for i in range(len(mclast)): if(mclast[i] > mclast_cut and mratio[i] < 20.0): x.append(mvir[i]) y.append(mclast[i]) xbins = linspace(11.0, 13.5, 30) ybins = linspace(0.05, 1.0, 40) xgrid, ygrid = meshgrid(xbins, ybins) z, edx, edy = histogram2d(x, y, bins=[xbins,ybins]) z = z.T + 0.01 zf = ndimage.gaussian_filter(z, sigma=1.0, order=0) cont = ax.contour(xbins[1:], ybins[1:], zf, colors="red") #plt.pcolor(xgrid, ygrid, z, cmap="Purples", norm=LogNorm(vmin=z.min(), vmax=z.max())) ax.pcolor(xgrid, ygrid, z, cmap="Purples") setp(ax.get_xticklabels(), visible=False) ax.set_ylabel("Mc (Rejoin)") plt.title(modelname+", Z~1.0") ax = plt.subplot(gs[1]) plt.subplots_adjust(hspace=0.0, top=0.9, bottom=0.15) hist1, bins = histogram(mvir, bins=linspace(11.0,13.5,30)) hist2, bins = histogram(x, bins=linspace(11.0,13.5,30)) hist = [] for i in range(len(hist1)): if(hist1[i] > 0): hist.append(float(hist2[i])/float(hist1[i])) else: hist.append(0.0) #width=0.8*(bins[1]-bins[0]) center = (bins[:-1] + bins[1:]) / 2.0 ax.plot(center, hist, "b.-") ax.set_ylim(0.0, 1.0) ax.set_xlabel("Mvir") ax.set_ylabel("f_rej") plt.show()
def anaPsf(xaxis, yaxis, data): # centroids xCentroid = (xaxis * data).sum() / data.sum() yCentroid = (yaxis * data).sum() / data.sum() # radial sum -- around Centroid raxis = numpy.sqrt((xaxis - xCentroid) * (xaxis - xCentroid) + (yaxis - yCentroid) * (yaxis - yCentroid)) # histogram nsumbin = 1000 npix, bin_edges = scipy.histogram(raxis, nsumbin, (0., 100.)) rsumpix, bin_edges = scipy.histogram(raxis, nsumbin, (0., 100.), weights=data) # calculate ee80 rsumpixNorm = rsumpix / rsumpix.sum() rcumsum = numpy.cumsum(rsumpixNorm) # at this point rcumsum[0] is = rsumpixNorm[0], rsumsum[1] to the sum of rsumpixnorm[0] and [1] etc. # so rcumsum[0] is the integral for r<bin_edges[1] and in general rcumsum[i] is the integral of r<bin_edges[i+1] # thus icumsum gives the appropriate limits for each rcumsum bin # icumsum = bin_edges[1:nsumbin + 1] ee80 = numpy.interp(0.8, rcumsum, icumsum) # calculate polarization (w/o seeing, CCD diffusion) norm = data.sum() qxx = ((xaxis - xCentroid) * (xaxis - xCentroid) * data).sum() / norm qyy = ((yaxis - yCentroid) * (yaxis - yCentroid) * data).sum() / norm qyx = ((yaxis - yCentroid) * (xaxis - xCentroid) * data).sum() / norm e1 = (qxx - qyy) / (qxx + qyy) e2 = 2.0 * qyx / (qxx + qyy) return ee80, qxx, qyy, qyx, e1, e2
def ejercicio_4(): my_list = [] with open('numeros.txt') as f: for line in f: for i in line: if i.isdigit() == True: my_list.append(int(i)) my_list.sort() hist, bin_edges = scipy.histogram([my_list], bins=range(int(my_list[-1]) + 2)) plt.bar(bin_edges[:-1], hist, width=1) plt.xlim(min(bin_edges), max(bin_edges)) plt.show()
def plotCurrentErrors(currentErrors, labelSize=22, barScale=1.0): # make histogram of current errors fig = pyplot.figure() numBins = max(20, round(scipy.sqrt(len(currentErrors)))) hist, bins = scipy.histogram(currentErrors, bins=numBins) barWidth = barScale * (bins[1] - bins[0]) binCenters = 0.5 * (bins[:-1] + bins[1:]) pyplot.bar(binCenters, hist, align='center', width=barWidth) pyplot.ylabel('Number of parameter sets', fontsize=labelSize) pyplot.xlabel('Parameter set error') pyplot.title('Distribution of errors', fontsize=labelSize)
def determine_dominant_color_in_image(self, image): NUM_CLUSTERS = 5 # Convert image into array of values for each point. if image.mode == '1': image.convert('L') ar = numpy.array(image) # ar = scipy.misc.fromimage(image) shape = ar.shape # Reshape array of values to merge color bands. [[R], [G], [B], [A]] => [R, G, B, A] if len(shape) > 2: ar = ar.reshape(scipy.product(shape[:2]), shape[2]) # Get NUM_CLUSTERS worth of centroids. ar = ar.astype(numpy.float) codes, _ = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS) # Pare centroids, removing blacks and whites and shades of really dark and really light. original_codes = codes for low, hi in [(60, 200), (35, 230), (10, 250)]: codes = scipy.array([ code for code in codes if not ((code[0] < low and code[1] < low and code[2] < low) or (code[0] > hi and code[1] > hi and code[2] > hi)) ]) if not len(codes): codes = original_codes else: break # Assign codes (vector quantization). Each vector is compared to the centroids # and assigned the nearest one. vecs, _ = scipy.cluster.vq.vq(ar, codes) # Count occurences of each clustered vector. counts, bins = scipy.histogram(vecs, len(codes)) # Show colors for each code in its hex value. # colors = [''.join(chr(c) for c in code).encode('hex') for code in codes] # total = scipy.sum(counts) # print dict(zip(colors, [count/float(total) for count in counts])) # Find the most frequent color, based on the counts. index_max = scipy.argmax(counts) peak = codes.astype(int)[index_max] color = "{:02x}{:02x}{:02x}".format(peak[0], peak[1], peak[2]) color = self.feed.adjust_color(color[:6], 21) return color
def prominent_colors(image, num_colors): thumbnail = create_thumbnail(image) vertices = vertices_from_image(thumbnail) # Because the vertices are colors they should all form finite # numbers, so we can disable check_finite num_clusters = num_colors (centroid_codebook, _) = kmeans(vertices, num_clusters, check_finite = False) (codes, _) = vq(vertices, centroid_codebook, check_finite = False) (counts, bins) = histogram(codes, len(centroid_codebook)) most_frequent = argsort(counts)[::-1] centroid_codebook = centroid_codebook.astype(int) return [tuple(centroid_codebook[most_frequent[i]]) for i in range(num_colors)]
def get_dominant_color(img, RESIZE=15): NUM_CLUSTERS = 3 orig_img = cv2.resize(img, (CODE_IMG_SIZE, CODE_IMG_SIZE)) img = cv2.resize(img, (RESIZE, RESIZE)) shape = img.shape ar = img.reshape(scipy.product(shape[:2]), shape[2]).astype(float) codes, _ = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS) vecs, _ = scipy.cluster.vq.vq(ar, codes) # assign codes counts, _ = scipy.histogram(vecs, len(codes)) # count occurrences index_max = scipy.argmax(counts) # find most frequent dominant_color = codes[index_max] return dominant_color, orig_img
def _test(): seed = 274 numpy.random.seed(274) N = 10 M = 1000 u = numpy.random.rand() w = numpy.random.rand(N) sample = fast_sample(w, M, u) num, bins = scipy.histogram(sample, numpy.arange(N)) u = 0.2 print 'u = ', u print 'w = ', w / sum(w) print 'frequency = ', num / float(M) print 'w - sample frequency = ', w / sum(w) - num / float(M) u = 0.98 sample = fast_sample(w, M, u) num, bins = scipy.histogram(sample, numpy.arange(N)) print 'u = ', u print 'w = ', w / sum(w) print 'frequency = ', num / float(M) print 'w - sample frequency = ', w / sum(w) - num / float(M)
def _histogram(x, bins): h = sp.histogram(x, bins=bins, density=True) delta_x = h[1] P = h[0] temp = P * (delta_x[1:] - delta_x[:-1]) C = sp.cumsum(temp[-1::-1])[-1::-1] S = P * (delta_x[1:] - delta_x[:-1]) bin_edges = delta_x bin_widths = delta_x[1:] - delta_x[:-1] bin_centers = (delta_x[1:] + delta_x[:-1]) / 2 psd = namedtuple( 'histogram', ('pdf', 'cdf', 'relfreq', 'bin_centers', 'bin_edges', 'bin_widths')) return psd(P, C, S, bin_centers, bin_edges, bin_widths)
def findDominantMostCommonColorInAnImageFile(image): NUM_CLUSTERS = 5 ar = np.asarray(image) shape = ar.shape ar = ar.reshape(scipy.product(shape[:2]), shape[2]).astype(float) # print('finding clusters') codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS) # print('cluster centres:\n', codes) vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences index_max = scipy.argmax(counts) # find most frequent peak = codes[index_max] colour = binascii.hexlify(bytearray(int(c) for c in peak)).decode('ascii') return colour
def main(img): image = Image.open(img) # image = ImageGrab.grab() image = image.resize((200, 200)) NUM_CLUSTERS = 5 # Convert image into array of values for each point. ar = scipy.misc.fromimage(image) # Reshape array of values to merge color bands. ar = ar.reshape(scipy.product(ar.shape[:2]), ar.shape[2]) # Get NUM_CLUSTERS worth of centroids. codes, _ = scipy.cluster.vq.kmeans(ar.astype(float), NUM_CLUSTERS) # Pare centroids, removing blacks and whites and shades of really dark and really light. original_codes = codes for low, hi in [(60, 200), (35, 230), (10, 250)]: codes = scipy.array([ code for code in codes if not ((code[0] < low and code[1] < low and code[2] < low) or (code[0] > hi and code[1] > hi and code[2] > hi)) ]) if not len(codes): codes = original_codes else: break # Assign codes (vector quantization). Each vector is compared to the centroids # and assigned the nearest one. vecs, _ = scipy.cluster.vq.vq(ar, codes) # Count occurences of each clustered vector. counts, bins = scipy.histogram(vecs, len(codes)) normalized_codes = codes / codes.max() # Show colors for each code in its hex value. colors = [rgb2hex(c) for c in normalized_codes] total = float(scipy.sum(counts)) # top N colors as a proportion of the image color_dist = dict(zip(colors, (count / total for count in counts))) pprint(color_dist) # Find the most frequent color, based on the counts. # TODO: no need to use scipy for this. index_max = scipy.argmax(counts) peak = normalized_codes[index_max] color = rgb2hex(peak) print(color)
def dominant_color(img): if(img != None): NUM_CLUSTERS = 1 img = img.resize((150,150)) arr = np.asarray(img) shape = arr.shape arr = arr.reshape(scipy.product(shape[:2]), shape[2]).astype(float) codes, dist = scipy.cluster.vq.kmeans(arr, NUM_CLUSTERS) vecs, dist = scipy.cluster.vq.vq(arr, codes) counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences index_max = scipy.argmax(counts) # find most frequent peak = codes[index_max] return list(peak) else: return None
def draw_from_inputted_distribution(data, dt, n_samples): #function for using empirical release data to #draw from the time-varying departure rate they exhibit #Input: the empirical release data (a list of times) #dt determines the bin size #n_samples is the number of samples to return t_max = max(data) t_min = min(data) bins = scipy.linspace(t_min, t_max, (t_max - t_min) / dt) hist, bin_edges = scipy.histogram(data, bins=bins, density=True) cum_values = np.zeros(bin_edges.shape) cum_values[1:] = np.cumsum(hist * np.diff(bin_edges)) inv_cdf = interpolate.interp1d(cum_values, bin_edges) r = np.random.rand(n_samples) return inv_cdf(r)
def FindRegionColour(self, region, resizeRatio=1, numClusters=5): reWidth, reHieght = region.size region = region.resize( (int(reWidth * resizeRatio), int(reHieght * resizeRatio))) ar = np.asarray(region) shape = ar.shape ar = ar.reshape(scipy.product(shape[:2]), shape[2]).astype(float) codes, dist = cluster.vq.kmeans(ar, numClusters) vecs, dist = cluster.vq.vq(ar, codes) counts, bins = scipy.histogram(vecs, len(codes)) index_max = scipy.argmax(counts) peak = codes[index_max] return peak
def est4(data, _): h = histogram(data, data.max(), range=(0, data.max()), normed=1)[0] ns = N.arange(0, data.max()) #k, theta, x0 = est2(data) #k, theta, x0 = _k, _theta, _x0 print "shape =", len(data), type(data), data.shape x0 = data.min() x0 = 10 sd = data.std() print "SD", sd, sd**2, data.mean() x0 = data.mean() - 4 * sd x0 = fmin(hist_dist_P, (x0, ), args=(data.mean(), h, ns), xtol=1.0) return 0, 0, x0
def getDominantColorFromImage(im): NUM_CLUSTERS = 5 im = im.resize((50, 50)) # optional, to reduce time ar = np.asarray(im) shape = ar.shape ar = ar.reshape(scipy.product(shape[:2]), shape[2]).astype(float) codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS) vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences index_max = scipy.argmax(counts) # find most frequent peak = codes[index_max] return '#%02x%02x%02x' % tuple(int(i) for i in peak)
def get_color(screenshot): NUM_CLUSTERS = 5 im = screenshot ar = np.asarray(im) shape = ar.shape ar = ar.reshape(scipy.product(shape[:2]), shape[2]).astype(float) codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS) vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences index_max = scipy.argmax(counts) # find most frequent peak = codes[index_max] colour = binascii.hexlify(bytearray(int(c) for c in peak)).decode('ascii') return colour
def plotcounts(self, binsize, pngfile): """ bin the data based on a bin size of binsize days """ #vector_datetime2epoch = scipy.vectorize(converttime.datetime2epoch) #etime = vector_d2e(self['etime']) etime = self['etime'] etimemin = scipy.floor(min(etime) / 86400) * 86400 etimemax = scipy.ceil(max(etime) / 86400) * 86400 r = [etimemin, etimemax] nbins = (etimemax - etimemin) / (binsize * 86400) h = scipy.histogram(etime, nbins, r) ecount = h.__getitem__(0) ebin = h.__getitem__(1) #vector_e2d = scipy.vectorize(converttime.epoch2datetime) edt = vector_epoch2datetime(ebin) pylab.plot_date(edt, ecount, linestyle='steps-mid') pylab.savefig(pngfile)
def makeHist(cosAnglesAndWeights, numBins=2000): numCos_orig, binsCos = scipy.histogram(cosAnglesAndWeights[0], range=(-1., 1.), weights=cosAnglesAndWeights[1], bins=numBins) sumWeights = sum(cosAnglesAndWeights[1]) numCos = [] for i, number in enumerate(numCos_orig): numCos.append( float(number) / float(sumWeights) / float(2. / float(numBins))) numCos = numpy.array(numCos) binsCos = numpy.array(binsCos[:-1]) + (binsCos[1] - binsCos[0]) / 2. return numpy.array([binsCos, numCos])
def getImageDescriptor(model, im, idx, imageName): #gets histograms extension = -(len(imageName.rpartition('.')[2])+1) #find how long the extension is, ie .jpg imageName = imageName.rpartition('/')[2][:extension] #get just the image name minus the extension and path sift = str('-'.join(map(str, conf.phowOpts.Sizes))) if not isdir(conf.imageCropPath+"histos/"): mkdir(conf.imageCropPath+"histos/") if isfile(conf.imageCropPath+"histos/"+imageName+'_'+sift+'.histo'): with open(conf.imageCropPath+"histos/"+imageName+'_'+sift+'.histo', 'rb') as fp: histo = load(fp) return [idx, histo] im = standardizeImage(im) #scale image to 640x480 height, width = im.shape[:2] numWords = model.vocab.shape[1] frames, descrs = getPhowFeatures(im, conf.phowOpts) #extract features # quantize appearance if model.quantizer == 'vq': binsa, _ = vq(descrs.T, model.vocab.T) #slowest function - assigns words from vocab to features in descrs elif model.quantizer == 'kdtree': raise ValueError('quantizer kdtree not implemented') else: raise ValueError('quantizer {0} not known or understood'.format(model.quantizer)) hist = [] #generate the histogram bins for n_spatial_bins_x, n_spatial_bins_y in zip(model.numSpatialX, model.numSpatialX): binsx, distsx = vq(frames[0, :], linspace(0, width, n_spatial_bins_x)) binsy, distsy = vq(frames[1, :], linspace(0, height, n_spatial_bins_y)) # binsx and binsy list to what spatial bin each feature point belongs to if (numpy.any(distsx < 0)) | (numpy.any(distsx > (width/n_spatial_bins_x+0.5))) | (numpy.any(distsy > (height/n_spatial_bins_y+0.5))): print ("something went wrong") import pdb; pdb.set_trace() # combined quantization number_of_bins = n_spatial_bins_x * n_spatial_bins_y * numWords temp = arange(number_of_bins) # update using this: http://stackoverflow.com/questions/15230179/how-to-get-the-linear-index-for-a-numpy-array-sub2ind temp = temp.reshape([n_spatial_bins_x, n_spatial_bins_y, numWords]) bin_comb = temp[binsx, binsy, binsa] hist_temp, _ = histogram(bin_comb, bins=range(number_of_bins+1), density=True) #generate histogram hist.append(hist_temp) hist = hstack(hist) hist = array(hist, 'float32') / sum(hist) numTot = float(conf.numClasses*(conf.numTrain+conf.numTest)*(len(conf.rotation)+1)) sys.stdout.write ("\r"+str(datetime.now())+" Histograms Calculated: "+str(((idx+1)/numTot)*100.0)[:5]+"%") #make progress percentage sys.stdout.flush() with open(conf.imageCropPath+"histos/"+imageName+'_'+sift+'.histo', 'wb') as fp: dump(hist, fp) return [idx, hist]
def compute_MI_origemcee(seq_matQ,seq_matR,batches,ematQ,ematR,gamma,R_0): # preliminaries n_seqs = len(batches) n_batches = int(batches.max()) + 1 # assumes zero indexed batches n_bins = 1000 #energies = sp.zeros(n_seqs) f = sp.zeros((n_batches,n_seqs)) # compute energies # for i in range(n_seqs): # energies[i] = sp.sum(seqs[:,:,i]*emat) # alternate way energies = np.zeros(n_seqs) for i in range(n_seqs): RNAP = (seq_matQ[:,:,i]*ematQ).sum() TF = (seq_matR[:,:,i]*ematR).sum() + R_0 energies[i] = -RNAP + mp.log(1 + mp.exp(-TF - gamma)) - mp.log(1 + mp.exp(-TF)) # sort energies inds = sp.argsort(energies) for i,ind in enumerate(inds): f[batches[ind],i] = 1.0/n_seqs # batches aren't zero indexed # bin and convolve with Gaussian f_binned = sp.zeros((n_batches,n_bins)) for i in range(n_batches): f_binned[i,:] = sp.histogram(f[i,:].nonzero()[0],bins=n_bins,range=(0,n_seqs))[0] #f_binned = f_binned/f_binned.sum() f_reg = sp.ndimage.gaussian_filter1d(f_binned,0.04*n_bins,axis=1) f_reg = f_reg/f_reg.sum() # compute marginal probabilities p_b = sp.sum(f_reg,axis=1) p_s = sp.sum(f_reg,axis=0) # finally sum to compute the MI MI = 0 for i in range(n_batches): for j in range(n_bins): if f_reg[i,j] != 0: MI = MI + f_reg[i,j]*sp.log2(f_reg[i,j]/(p_b[i]*p_s[j])) print MI return MI,f_reg
def append_data_peaks(self, data, force=False): """append bin(s) calculated from a strip of data with this method the data is first queried for peaks. this should reduce the noise/smoothness of the histogram as observed from the amplitude distribution of the pure signal. :type data: ndarray :param data: the data to generate the bin(s) to append from :type force: bool :param force: if True, immediately start a new bin before calculation """ # check data data_ = sp.asanyarray(data) if data.ndim < 2: data_ = sp.atleast_2d(data_) if data_.shape[0] < data_.shape[1]: data_ = data_.T nsmpl, nchan = data_.shape if nchan != self._nchan: raise ValueError('data has channel count %s, expected %s' % (nchan, self._nchan)) # generate bin set bin_set = [0] if self._cur_bin_smpl != 0: bin_set.append(self._bin_size - self._cur_bin_smpl) while bin_set[-1] < nsmpl: bin_set.append(bin_set[-1] + self._bin_size) if bin_set[-1] > nsmpl: bin_set[-1] = nsmpl # process bins idx = 1 while idx < len(bin_set): data_bin = data_[bin_set[idx - 1]:bin_set[idx], :] for c in xrange(self._nchan): self._cur_bin[c] += sp.histogram(data_bin[:, c], bins=self._ampl_range)[0] self._cur_bin_smpl += data_bin.shape[0] if self._cur_bin_smpl == self._bin_size: self.append_bin(self._cur_bin) self._cur_bin[:] = 0 self._cur_bin_smpl = 0 idx += 1
def ejercicio_3(): print("Ingrese numeros, termine con quit: ") my_list = [] while True: inp = input() if inp == "quit": break my_list.append(inp) print(my_list) my_list.sort() hist, bin_edges = scipy.histogram([my_list], bins = range(int(my_list[-1])+2)) plt.bar(bin_edges[:-1], hist, width = 1) plt.xlim(min(bin_edges), max(bin_edges)) plt.show()
def getImageDescriptor(model, im): im = standardizeImage(im) height, width = im.shape[:2] numWords = model.vocab.shape[1] frames, descrs = getPhowFeatures(im, conf.phowOpts) # quantize appearance if model.quantizer == 'vq': binsa, _ = vq(descrs.T, model.vocab.T) elif model.quantizer == 'kdtree': raise ValueError('quantizer kdtree not implemented') else: raise ValueError('quantizer {0} not known or understood'.format( model.quantizer)) hist = [] for n_spatial_bins_x, n_spatial_bins_y in zip(model.numSpatialX, model.numSpatialX): binsx, distsx = vq(frames[0, :], linspace(0, width, n_spatial_bins_x)) binsy, distsy = vq(frames[1, :], linspace(0, height, n_spatial_bins_y)) # binsx and binsy list to what spatial bin each feature point belongs to if (numpy.any(distsx < 0)) | (numpy.any( distsx > (width / n_spatial_bins_x + 0.5))): print 'something went wrong' import pdb pdb.set_trace() if (numpy.any(distsy < 0)) | (numpy.any( distsy > (height / n_spatial_bins_y + 0.5))): print 'something went wrong' import pdb pdb.set_trace() # combined quantization number_of_bins = n_spatial_bins_x * n_spatial_bins_y * numWords temp = arange(number_of_bins) # update using this: http://stackoverflow.com/questions/15230179/how-to-get-the-linear-index-for-a-numpy-array-sub2ind temp = temp.reshape([n_spatial_bins_x, n_spatial_bins_y, numWords]) bin_comb = temp[binsx, binsy, binsa] hist_temp, _ = histogram(bin_comb, bins=range(number_of_bins + 1), density=True) hist.append(hist_temp) hist = hstack(hist) hist = array(hist, 'float32') / sum(hist) return hist
def createPattern(image): colorArray = [] ar = np.asarray(image) shape = ar.shape ar = ar.reshape(scipy.product(shape[:2]), shape[2]).astype(float) codes, dist = scipy.cluster.vq.kmeans(ar, 5) #Baskın 5 rengin kümelenmesi. vecs, dist = scipy.cluster.vq.vq(ar, codes) counts, bins = scipy.histogram(vecs, len(codes)) index_max = scipy.argmax( counts) #En sık kullanılan 1 rengin indexini bulur peak = codes[index_max] for i in range(0, 5): colorArray.append(codes[i].astype("uint8").tolist()) colorArray.append(peak.astype("uint8").tolist()) #En baskın renk return colorArray
def kde(data, N=None, MIN=None, MAX=None): # Parameters to set up the mesh on which to calculate N = 2**12 if N is None else int(2**scipy.ceil(scipy.log2(N))) if MIN is None or MAX is None: minimum = min(data) maximum = max(data) Range = maximum - minimum MIN = minimum - Range / 10 if MIN is None else MIN MAX = maximum + Range / 10 if MAX is None else MAX # Range of the data R = MAX - MIN # Histogram the data to get a crude first approximation of the density M = len(data) DataHist, bins = scipy.histogram(data, bins=N, range=(MIN, MAX)) DataHist = DataHist / M DCTData = scipy.fftpack.dct(DataHist, norm=None) I = [iN * iN for iN in range(1, N)] SqDCTData = (DCTData[1:] / 2)**2 # The fixed point calculation finds the bandwidth = t_star guess = 0.1 try: t_star = scipy.optimize.brentq(fixed_point, 0, guess, args=(M, I, SqDCTData)) except ValueError: print('Oops!') return None # Smooth the DCTransformed data using t_star SmDCTData = DCTData * scipy.exp( -scipy.arange(N)**2 * scipy.pi**2 * t_star / 2) # Inverse DCT to get density density = scipy.fftpack.idct(SmDCTData, norm=None) * N / R mesh = [(bins[i] + bins[i + 1]) / 2 for i in range(N)] bandwidth = scipy.sqrt(t_star) * R density = density / scipy.trapz(density, mesh) cdf = np.cumsum(density) * (mesh[1] - mesh[0]) return bandwidth, mesh, density, cdf
def compute_scc_histogram(self): ''' Computes a histogram of abs correlation coefficients from the pickled R(a,b) matrix. ''' if not os.path.exists(self.rabDirectory): raise RAICARRabException if not os.path.exists(os.path.join(self.rabDirectory, 'rabmatrix.db')): raise RAICARRabException with open(os.path.join(self.rabDirectory, 'rabmatrix.db'), 'rb') as rabPtr: RabDict = pickle.load(rabPtr) rPDF = dict.fromkeys(['bin edges', 'counts', 'bar width']) rPDF['bin edges'] = np.linspace(0, 1.0, 101) rPDF['counts'], _ = histogram(a=np.hstack(list(RabDict.values())[ i].flatten() for i in range(0, len(RabDict))), bins=rPDF['bin edges']) rPDF['bin edges'] = rPDF['bin edges'][0:-1] rPDF['bar width'] = 0.01 return rPDF
def dominant_color(img, nb_clusters=5, need_resize=False): if need_resize: img = img.resize((150, 150)) img_arr = np.asarray(img) shape = img_arr.shape img_arr = img_arr.reshape(scipy.product(shape[:2]), shape[2]).astype(float) codes, dist = scipy.cluster.vq.kmeans(img_arr, nb_clusters) vecs, dist = scipy.cluster.vq.vq(img_arr, codes) counts, bins = scipy.histogram(vecs, len(codes)) index_max = scipy.argmax(counts) peak = codes[index_max] colour = binascii.hexlify(bytearray(int(c) for c in peak)).decode('ascii') return colour
def GetnewColors(): NUM_CLUSTERS = 7 im = Image.open('static/images/image.jpg') im = im.resize((150, 150)) ar = scipy.misc.fromimage(im) shape = ar.shape ar = ar.reshape(scipy.product(shape[:2]), shape[2]) codes, dist = scipy.cluster.vq.kmeans(ar.astype(float), NUM_CLUSTERS) vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences s = set() s = counts x = set() x = getcol(codes, NUM_CLUSTERS) return x