Beispiel #1
0
def anaPsf(xaxis,yaxis,data):

    # centroids
    xCentroid = (xaxis*data).sum()/data.sum()
    yCentroid = (yaxis*data).sum()/data.sum()

    # radial sum -- around Centroid
    raxis = numpy.sqrt((xaxis-xCentroid)*(xaxis-xCentroid)+(yaxis-yCentroid)*(yaxis-yCentroid))

    # histogram 
    nsumbin = 1000
    npix,bin_edges = scipy.histogram(raxis,nsumbin,(0.,100.))
    rsumpix,bin_edges = scipy.histogram(raxis,nsumbin,(0.,100.),weights=data)

    # calculate ee80
    rsumpixNorm = rsumpix/rsumpix.sum()
    rcumsum = numpy.cumsum(rsumpixNorm)
    # at this point rcumsum[0] is = rsumpixNorm[0], rsumsum[1] to the sum of rsumpixnorm[0] and [1] etc.
    # so rcumsum[0] is the integral for r<bin_edges[1]   and in general rcumsum[i] is the integral of r<bin_edges[i+1]
    # thus icumsum gives the appropriate limits for each rcumsum bin
    #

    icumsum = bin_edges[1:nsumbin+1]
    ee80 = numpy.interp(0.8,rcumsum,icumsum)

    # calculate polarization (w/o seeing, CCD diffusion)
    norm = data.sum()
    qxx = ( (xaxis-xCentroid)*(xaxis-xCentroid)*data ).sum() / norm
    qyy = ( (yaxis-yCentroid)*(yaxis-yCentroid)*data ).sum() / norm
    qyx = ( (yaxis-yCentroid)*(xaxis-xCentroid)*data ).sum() / norm
    
    e1 = (qxx-qyy)/(qxx+qyy)
    e2 = 2.0*qyx/(qxx+qyy)

    return ee80,qxx,qyy,qyx,e1,e2
def generate_scipy_comparison(csvPathname):
    # this is some hack code for reading the csv and doing some percentile stuff in scipy
    from numpy import loadtxt, genfromtxt, savetxt

    dataset = loadtxt(
        open(csvPathname, 'r'),
        delimiter=',',
        dtype='float64');

    print "csv read for training, done"

    # we're going to strip just the last column for percentile work
    # used below
    NUMCLASSES = 10
    print "csv read for training, done"

    # data is last column
    # drop the output
    print dataset.shape
    from scipy import histogram
    import numpy
    if 1==1:
        print "histogram of dataset"
        print histogram(dataset,bins=NUMCLASSES)
        print numpy.mean(dataset, axis=0, dtype=numpy.float64)
        print numpy.std(dataset, axis=0, dtype=numpy.float64, ddof=0)
        print numpy.std(dataset, axis=0, dtype=numpy.float64, ddof=1)

    from scipy import stats
def genMCHistogramsOpenCL(distribution, rng, iterations=100, numBins=1000):
    # get OpenCL CPU devices
    openCLDevices = [device for device in clsim.I3CLSimOpenCLDevice.GetAllDevices() if device.cpu]
    if len(openCLDevices)==0:
        raise RuntimeError("No CPU OpenCL devices available!")
    openCLDevice = openCLDevices[0]

    openCLDevice.useNativeMath=False
    workgroupSize = 1
    workItemsPerIteration = 10240
    print("           using platform:", openCLDevice.platform)
    print("             using device:", openCLDevice.device)
    print("            workgroupSize:", workgroupSize)
    print("    workItemsPerIteration:", workItemsPerIteration)
    
    tester = clsim.I3CLSimRandomDistributionTester(device=openCLDevice,
                                                   workgroupSize=workgroupSize,
                                                   workItemsPerIteration=workItemsPerIteration,
                                                   randomService=rng,
                                                   randomDistribution=distribution)
    
    print("maxWorkgroupSizeForKernel:", tester.maxWorkgroupSize)
    
    angles = tester.GenerateRandomNumbers(iterations)
    samples = len(angles)
    
    print("generated")
    
    angles = numpy.array(angles) # convert to numpy array
    print("converted")
    
    numAng_orig, binsAng = scipy.histogram(numpy.arccos(angles)*(180./math.pi), range=(0.,180.), bins=numBins)
    print("hist1 complete")

    numCos_orig, binsCos = scipy.histogram(angles, range=(-1.,1.), bins=numBins)
    print("hist2 complete")
    
    del angles # not needed anymore
    print("deleted")
    
    numAng=[]
    for i, number in enumerate(numAng_orig):
        binWidth = math.cos(binsAng[i]*math.pi/180.) - math.cos(binsAng[i+1]*math.pi/180.)
        numAng.append(float(number)/float(samples)/binWidth)
    numAng=numpy.array(numAng)
    
    numCos=[]
    for i, number in enumerate(numCos_orig):
        numCos.append(float(number)/float(samples)/float(2./float(numBins)))
    numCos=numpy.array(numCos)
    
    binsAng = numpy.array(binsAng[:-1])+(binsAng[1]-binsAng[0])/2.
    binsCos = numpy.array(binsCos[:-1])+(binsCos[1]-binsCos[0])/2.
    
    return dict(cos=dict(num=numCos, bins=binsCos), ang=dict(num=numAng, bins=binsAng))
Beispiel #4
0
    def determine_dominant_color_in_image(self, image):
        NUM_CLUSTERS = 5
            
        ar = scipy.misc.fromimage(image)
        shape = ar.shape
        if len(shape) > 2:
            ar = ar.reshape(scipy.product(shape[:2]), shape[2])

        codes, _ = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)
        # print "Before: %s" % codes
        original_codes = codes
        for low, hi in [(60, 200), (35, 230), (10, 250)]:
            codes = scipy.array([code for code in codes 
                                 if not ((code[0] < low and code[1] < low and code[2] < low) or
                                         (code[0] > hi and code[1] > hi and code[2] > hi))])
            if not len(codes): codes = original_codes
            else: break
        # print "After: %s" % codes
    
        vecs, _ = scipy.cluster.vq.vq(ar, codes)         # assign codes
        counts, bins = scipy.histogram(vecs, len(codes))    # count occurrences
        # colors = [''.join(chr(c) for c in code).encode('hex') for code in codes]
        # total = scipy.sum(counts)
        # print dict(zip(colors, [count/float(total) for count in counts]))
        index_max = scipy.argmax(counts)                    # find most frequent
        peak = codes[index_max]
        color = ''.join(chr(c) for c in peak).encode('hex')
        # print 'most frequent is %s (#%s)' % (peak, color)
        
        return color[:6]
Beispiel #5
0
def include_image_level_features(orig_imga,fvector_l,featsel):
    # include grayscale values ?
    f_input_gray = featsel['input_gray']
    if f_input_gray is not None:
        shape = f_input_gray
        #print orig_imga.shape
        fvector_l += [sp.misc.imresize(colorconv.gray_convert(orig_imga), shape).ravel()]

    # include color histograms ?
    f_input_colorhists = featsel['input_colorhists']
    if f_input_colorhists is not None:
        nbins = f_input_colorhists
        colorhists = sp.empty((3,nbins), 'f')
        if orig_imga.ndim == 3:
            for d in xrange(3):
                h = sp.histogram(orig_imga[:,:,d].ravel(),
                                 bins=nbins,
                                 range=[0,255])
                binvals = h[0].astype('f')
                colorhists[d] = binvals
        else:
            raise ValueError, "orig_imga.ndim == 3"
            #h = sp.histogram(orig_imga[:,:].ravel(),
            #                 bins=nbins,
            #                 range=[0,255])
            #binvals = h[0].astype('f')
            #colorhists[:] = binvals

        #feat_l += [colorhists.ravel()]
        fvector_l += [colorhists.ravel()]

    return fvector_l
Beispiel #6
0
    def blobs(shape, porosity, blobiness=8):
        """
        Generates an image containing amorphous blobs

        Parameters
        ----------
        shape : list
            The size of the image to generate in [Nx, Ny, Nz] where N is the
            number of voxels

        blobiness : scalar
            Controls the morphology of the image.  A higher number results in
            a larger number of smaller blobs.

        porosity : scalar
            The porosity of the final image.  This number is approximated by
            the method so the returned result may not have exactly the
            specified value.

        """
        if sp.size(shape) == 1:
            shape = sp.full((3, ), int(shape))
        [Nx, Ny, Nz] = shape
        sigma = sp.mean(shape)/(4*blobiness)
        mask = sp.rand(Nx, Ny, Nz)
        mask = spim.gaussian_filter(mask, sigma=sigma)
        hist = sp.histogram(mask, bins=1000)
        cdf = sp.cumsum(hist[0])/sp.size(mask)
        xN = sp.where(cdf >= porosity)[0][0]
        im = mask <= hist[1][xN]
        return im
 def _computeEntropy(self):
     ''' Compute the entropy of the histogram of distances from the pivot element. Low entropy
     scores means the distribution is concentrated, and thus may be a good candidate for splitting.
     High entropy (at limit, a uniform distribution), may indicate that there is no good separation
     of the elements in this node.
     '''
     assert(self.Pivot != None)
     assert(self.Ds != None)
     assert(len(self.Ds) >= self.Tau)
     assert(len(self.Ds) == len(self.items))
         
     #create a list of distances not including the sample which was selected as the pivot
     #...which will have a distance of zero, within numerical errors.
     Dx = [D for D in self.Ds if D>0.01]
     
     #compute histogram using 10 bins of the Dx list
     HistInfo = scipy.histogram(Dx, bins=10)
     pk = scipy.array( HistInfo[0] )
     epsilon = 0.000001
     H = entropy(pk+epsilon)  #avoids log0 warnings
     #print "Histogram: ", HistInfo[0]
     #print "Entropy: %f"%H
     #print "Range: Min(>0)=%f, Max=%f, Mean=%f, Median=%f"%(min(Dx),max(self.Ds),scipy.mean(self.Ds),scipy.median(self.Ds))
     
     return H
     
def tag_images_with_color_value(NUM_CLUSTERS = 4, INPUT_FOLDER = './data/covers/'):

    isbn = list()
    cover_color = list()

    files = os.listdir(INPUT_FOLDER)
    for eachFile in files:
        print eachFile
        im = Image.open(INPUT_FOLDER + eachFile)
        im = im.resize((50, 50))                          # optional, to reduce time
        ar = scipy.misc.fromimage(im)
        shape = ar.shape
        print len(shape)

        if len(shape) == 2:
            ar = ar.reshape(scipy.product(shape[:1]), shape[1])
        else:
            ar = ar.reshape(scipy.product(shape[:2]), shape[2])

        # finding clusters
        codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)
        # cluster centres:\n', codes

        vecs, dist = scipy.cluster.vq.vq(ar, codes)         # assign codes
        counts, bins = scipy.histogram(vecs, len(codes))    # count occurrences

        index_max = scipy.argmax(counts)                    # find most frequent
        peak = codes[index_max]
        colour = ''.join(chr(c) for c in peak).encode('hex')

        isbn.append(eachFile[:-4])
        cover_color.append(colour)

    result = zip(isbn, cover_color)
    return result
def getPredominantColor(filename):
    im = Image.open(filename).convert('RGB')

    # Convert to numpy array
    ar = scipy.misc.fromimage(im)

    # Get dimensions
    shape = ar.shape

    # Convert to bidimensional array of width x height rows and 3 columns (RGB)
    ar = ar.reshape(scipy.product(shape[:2]), shape[2])

    # Find cluster centers and their distortions
    # codes contains the RGB value of the centers
    codes, dist = scipy.cluster.vq.kmeans(ar.astype(float), NUM_CLUSTERS)

    # Maps all the pixels in the image to their respective centers
    vecs, dist = scipy.cluster.vq.vq(ar, codes)

    # Counts the occurances of each color (NUM_CLUSTER different colors after the mapping)
    counts, bins = scipy.histogram(vecs, len(codes))

    # Find most frequent color
    index_max = scipy.argmax(counts)
    peak = codes[index_max]

    return peak.astype(int)
Beispiel #10
0
def makehistmult(testpathlist,npulseslist):
    
    sns.set_style("whitegrid")
    sns.set_context("notebook")
    params = ['Ne','Te','Ti','Vi']
    paramsLT = ['N_e','T_e','T_i','V_i']
    errdictlist =[ makehistdata(params,itest)[0] for itest in testpathlist] 
    (figmplf, axmat) = plt.subplots(2, 2,figsize=(12,8), facecolor='w')
    axvec = axmat.flatten()
    histlims = [[4e10,2e11],[1200.,3000.],[300.,1900.],[-250.,250.]]
    histvecs = [sp.linspace(ipm[0],ipm[1],100) for ipm in histlims]
    linehand = []
    lablist= ['J = {:d}'.format(i) for i in npulseslist]
    
    for iax,iparam in enumerate(params):
        for idict,inpulse in zip(errdictlist,npulseslist):
            curvals = idict[iparam]
            curhist,binout = sp.histogram(curvals,bins=histvecs[iax])
            dx=binout[1]-binout[0]
            curhist_norm = curhist.astype(float)/(curvals.size*dx)
            plthand = axvec[iax].plot(binout[:-1],curhist_norm,label='J = {:d}'.format(inpulse))[0]
            linehand.append(plthand)
            
        axvec[iax].set_xlabel(r'$'+paramsLT[iax]+'$')
        axvec[iax].set_title(r'Histogram for $'+paramsLT[iax]+'$')
    leg = figmplf.legend(linehand[:len(npulseslist)],lablist)
    plt.tight_layout()
    plt.subplots_adjust(top=0.9)
    spti = figmplf.suptitle('Parameter Distributions',fontsize=18)
    return (figmplf,axvec,linehand)
Beispiel #11
0
def find_a_dominant_color(image):
    # K-mean clustering to find the k most dominant color, from:
    # http://stackoverflow.com/questions/3241929/python-find-dominant-most-common-color-in-an-image
    n_clusters = 5

    # Get image into a workable form
    im = image.copy()
    im = im.resize((150, 150))      # optional, to reduce time
    ar = scipy.misc.fromimage(im)
    im_shape = ar.shape
    ar = ar.reshape(scipy.product(im_shape[:2]), im_shape[2])
    ar = np.float_(ar)

    # Compute clusters
    codes, dist = scipy.cluster.vq.kmeans(ar, n_clusters)
    vecs, dist = scipy.cluster.vq.vq(ar, codes)         # assign codes
    counts, bins = scipy.histogram(vecs, len(codes))    # count occurrences

    # Get the indexes of the most frequent, 2nd most frequent, 3rd, ...
    sorted_idxs = np.argsort(counts)

    # Get the color
    peak = codes[sorted_idxs[1]] # get second most frequent color

    return [int(i) for i in peak.tolist()] # list comprehension to quickly cast everything to int
Beispiel #12
0
def reduce_colors(image, k):
    '''Apply kmeans algorithm.
        Input:   image, number of clusters to use
        Returns: colors, 
                 counts per color, 
                 new image
    '''
    if k > 32:
        print "Setting colors to maximum allowed of 32"
        k = 32
    rows, cols, rgb = image.shape
    # reshape the image in a single row array of RGB pixels
    image_row = np.reshape(image,(rows * cols, 3))
    #HERE ADD CODE TO GET A GOOD GUESS OF COLORS AND PASS THAT AS
    #SECOND ARGUMENT TO kmeans
    #image_array_sample = shuffle(image_row, random_state=0)[:1000]
    #kguess = kmeans(image_array_sample, k)
    #colors,_ = kmeans(image_row, kguess)
    # perform the clustering
    colors,_ = kmeans(image_row, k)
    # vector quantization, assign to each pixel the index of the nearest centroid (i=1..k)
    qnt,_ = vq(image_row,colors)
    # reshape the qnt vector to the original image shape
    image_centers_id = np.reshape(qnt,(rows, cols))
    # assign the color value to each pixel
    newimage = colors[image_centers_id]
    #count number of pixels of each cluster color
    counts,bins = sp.histogram(qnt, len(colors))
    return colors, counts, newimage
Beispiel #13
0
def degree_distrib(net, deg_type="total", node_list=None, use_weights=True,
                   log=False, num_bins=30):
    '''
    Computing the degree distribution of a network.
    
    Parameters
    ----------
    net : :class:`~nngt.Graph` or subclass
        the network to analyze.
    deg_type : string, optional (default: "total")
        type of degree to consider ("in", "out", or "total").
    node_list : list or numpy.array of ints, optional (default: None)
        Restrict the distribution to a set of nodes (default: all nodes).
    use_weights : bool, optional (default: True)
        use weighted degrees (do not take the sign into account: all weights
        are positive).
    log : bool, optional (default: False)
        use log-spaced bins.
    
    Returns
    -------
    counts : :class:`numpy.array`
        number of nodes in each bin
    deg : :class:`numpy.array`
        bins
    '''
    ia_node_deg = net.get_degrees(node_list, deg_type, use_weights)
    ra_bins = sp.linspace(ia_node_deg.min(), ia_node_deg.max(), num_bins)
    if log:
        ra_bins = sp.logspace(sp.log10(sp.maximum(ia_node_deg.min(),1)),
                               sp.log10(ia_node_deg.max()), num_bins)
    counts,deg = sp.histogram(ia_node_deg, ra_bins)
    ia_indices = sp.argwhere(counts)
    return counts[ia_indices], deg[ia_indices]
def getImageDescriptor(model, im, conf):
	im = standardizeImage(im)
	height, width = im.shape[:2]
	numWords = model.vocab.shape[1]
	frames, descrs = getPhowFeatures(im, conf.phowOpts)
	# quantize appearance
	if model.quantizer == 'vq':
		binsa, _ = vq(descrs.T, model.vocab.T)
	elif model.quantizer == 'kdtree':
		raise ValueError('quantizer kdtree not implemented')
	else:
		raise ValueError('quantizer {0} not known or understood'.format(model.quantizer))
	hist = []
	for n_spatial_bins_x, n_spatial_bins_y in zip(model.numSpatialX, model.numSpatialX):
		binsx, distsx = vq(frames[0, :], linspace(0, width, n_spatial_bins_x))
		binsy, distsy = vq(frames[1, :], linspace(0, height, n_spatial_bins_y))
		# binsx and binsy list to what spatial bin each feature point belongs to
		if (numpy.any(distsx < 0)) | (numpy.any(distsx > (width/n_spatial_bins_x+0.5))):
			print ("something went wrong")
			import pdb; pdb.set_trace()
		if (numpy.any(distsy < 0)) | (numpy.any(distsy > (height/n_spatial_bins_y+0.5))):
			print ("something went wrong")
			import pdb; pdb.set_trace()
		# combined quantization
		number_of_bins = n_spatial_bins_x * n_spatial_bins_y * numWords
		temp = arange(number_of_bins)
		# update using this: http://stackoverflow.com/questions/15230179/how-to-get-the-linear-index-for-a-numpy-array-sub2ind
		temp = temp.reshape([n_spatial_bins_x, n_spatial_bins_y, numWords])
		bin_comb = temp[binsx, binsy, binsa]
		hist_temp, _ = histogram(bin_comb, bins=range(number_of_bins+1), density=True)
		hist.append(hist_temp)
	
	hist = hstack(hist)
	hist = array(hist, 'float32') / sum(hist)
	return hist
Beispiel #15
0
def getDominantColor(img_url):
    if r.exists(img_url):
        cache_result = r.hmget(img_url, ['r', 'g', 'b'])
        return cache_result
        
    NUM_CLUSTERS = 5
    im = Image.open(StringIO.StringIO(urllib2.urlopen(img_url).read()))
    img_arr = scipy.misc.fromimage(im)
    img_shape = img_arr.shape
    
    if len(img_shape) > 2:
        img_arr = img_arr.reshape(scipy.product(img_shape[:2]), img_shape[2])
    
    codes, _ = scipy.cluster.vq.kmeans(img_arr, NUM_CLUSTERS)
    
    original_codes = codes
    for low, hi in [(60, 200), (35, 230), (10, 250)]:
        codes = scipy.array([code for code in codes if not (all([c < low for c in code]) or all([c > hi for c in code]))])
        if not len(codes):
            codes = original_codes
        else:
            break

    vecs, _ = scipy.cluster.vq.vq(img_arr, codes)
    counts, bins = scipy.histogram(vecs, len(codes))

    index_max = scipy.argmax(counts)
    peak = codes[index_max]
    color = [int(c) for c in peak[:3]]
    r.hmset(img_url, {'r':color[0], 'g':color[1], 'b':color[2]})
    #r.expire(img_url, 86400)
    return color
Beispiel #16
0
def cluster_colors(image_url, num_clusters=5):
    """
    Return the most clustered colors of an image.
    Use scipy's k-means clustering algorithm.
    """

    print 'Reading image...'
    response = requests.get(image_url)
    im = Image.open(StringIO(response.content))
    im = im.resize((150, 150))      # optional, to reduce time
    ar = scipy.misc.fromimage(im)
    shape = ar.shape
    ar = ar.reshape(scipy.product(shape[:2]), shape[2])
    ar = ar.astype(float)

    print 'Finding clusters...'
    # k-means clustering
    codes, dist = scipy.cluster.vq.kmeans(ar, num_clusters)
    vecs, dist = scipy.cluster.vq.vq(ar, codes)         # assign codes
    counts, bins = scipy.histogram(vecs, len(codes))    # count occurrences
    sorted_index = sorted(range(len(counts)), key=lambda index: counts[index], reverse=True)

    most_common_colors = []
    for index in sorted_index:
        peak = codes[index]
        peak = peak.astype(int)
        colour = ''.join(format(c, '02x') for c in peak)
        most_common_colors.append('#' + colour)
    return most_common_colors
Beispiel #17
0
def genMCHistogramsHost(distribution, hist_range, iterations=10000000, numBins=1000):
    print("generating (host)")

    values = []
    for i in range(iterations):
        values.append(distribution.SampleFromDistribution(rng, []))
    samples = len(values)
    print("generated (host)")
    
    values = numpy.array(values)/I3Units.nanometer # convert to numpy array and convert units
    print("converted (host)")
    
    range_width=hist_range[1]-hist_range[0]
    
    num_orig, bins = scipy.histogram(values, range=hist_range, bins=numBins)
    print("hist1 complete (host)")
    
    del values # not needed anymore
    print("deleted (host)")
    
    num=[]
    for number in num_orig:
        num.append(float(number)/float(samples)/float(range_width/float(numBins)))
    num=numpy.array(num)
    
    bins = numpy.array(bins[:-1])+(bins[1]-bins[0])/2.
    
    return dict(num=num, bins=bins)
Beispiel #18
0
def genMCHistogramsOpenCL(distribution, range, iterations=1000, numBins=1000):
    tester = clsim.I3CLSimRandomDistributionTester(device=openCLDevice,
                                                   workgroupSize=workgroupSize,
                                                   workItemsPerIteration=workItemsPerIteration,
                                                   randomService=rng,
                                                   randomDistribution=distribution)
    
    values = tester.GenerateRandomNumbers(iterations)
    samples = len(values)
    print("generated")
    
    values = numpy.array(values)/I3Units.nanometer # convert to numpy array and convert units
    print("converted")
    
    range_width=range[1]-range[0]
    
    num_orig, bins = scipy.histogram(values, range=range, bins=numBins)
    print("hist1 complete")
    
    del values # not needed anymore
    print("deleted")
    
    num=[]
    for number in num_orig:
        num.append(float(number)/float(samples)/float(range_width/float(numBins)))
    num=numpy.array(num)
    
    bins = numpy.array(bins[:-1])+(bins[1]-bins[0])/2.
    
    return dict(num=num, bins=bins)
Beispiel #19
0
def get_dominant_color(image_path):
    '''
    Parse image and return dominant color in image.

    @param image_path: Image path to parse.
    @return: Return dominant color, format as hexadecimal number. 
    '''
    # print 'reading image'
    im = Image.open(image_path)
    im = im.resize((150, 150))      # optional, to reduce time
    ar = scipy.misc.fromimage(im)
    shape = ar.shape
    ar = ar.reshape(scipy.product(shape[:2]), shape[2])
    
    # print 'finding clusters'
    NUM_CLUSTERS = 5
    codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)
    # print 'cluster centres:\n', codes
    
    vecs, dist = scipy.cluster.vq.vq(ar, codes)         # assign codes
    counts, bins = scipy.histogram(vecs, len(codes))    # count occurrences
    
    index_max = scipy.argmax(counts)                    # find most frequent
    peak = codes[index_max]
    colour = ''.join(chr(c) for c in peak).encode('hex')
    # print 'most frequent is %s (#%s)' % (peak, colour)
    
    return "#%s" % (colour[0:6])
Beispiel #20
0
def hist(fcms, index, savefile=None, display=True, **kwargs):
    """Plot overlay histogram.

    fcms is a list of FCMData objects/arrays
    index is channel to plot
    """
    figure = pylab.figure()
    for fcm in fcms:
        if isinstance(index, str):
            index = fcm.name_to_index(index)
        y = fcm[:, index]
        h, b = histogram(y, bins=200, **kwargs)
        b = (b[:-1] + b[1:]) / 2.0
        unused_x = pylab.linspace(min(y), max(y), 100)
        pylab.plot(b, h, label=fcm.name)
    pylab.legend()
    pylab.xlabel(fcms[0].channels[index])

    if display:
        pylab.show()

    if savefile:
        pylab.savefig(savefile)

    return figure
 def test_Intensity_1(self):
     """Test a case of distributed intensity values."""
     # Create label image with only one region
     label_image = scipy.zeros(2*2*2, dtype=scipy.int8).reshape(2,2,2)
     
     # Create original image with two equally distibuted intensity value
     original_image = scipy.zeros(2*2*2, dtype=scipy.int8)
     original_image[:4] = -1
     original_image[4:] = 1
     original_image = original_image.reshape(2,2,2)
     
     # Initialize object
     statistics = LabelImageStatistics(label_image, original_image)
     
     # Computed expected result
     i = scipy.array([-1,-1,-1,-1,1,1,1,1])
     h = scipy.histogram(i, statistics._intensity_distribution_local_histogram_width)
     hr = scipy.array(h[0]) / float(h[0].sum())
     g = stats.norm(*stats.norm.fit(i))
     r = abs(hr - g.pdf(h[1][:-1]))
     r *= h[1][-2] - h[1][0]
     r = r.sum()
     
     # Check created intensity distribution
     intensity_distributions = statistics.get_intensity_distributions()
     self.assertEqual(len(intensity_distributions), 1)
     self.assertEqual(intensity_distributions[0], i.std())
     
     intensity_distribution_histogram = statistics.get_intensity_distribution_histogram()
     self.assertEqual(intensity_distribution_histogram[0][statistics.get_intensity_distribution_histogram_width()/2], 1)
     self.assertEqual(intensity_distribution_histogram[0].max(), 1)
     self.assertEqual(intensity_distribution_histogram[0].min(), 0)
     self.assertEqual(intensity_distribution_histogram[1].mean(), i.std())
Beispiel #22
0
def getDomIMAGEColor( imName ):
	# Reference:
	# 	http://stackoverflow.com/questions/3241929/
	# 	python-find-dominant-most-common-color-in-an-image

	# number of k-means clusters
	NUM_CLUSTERS = 4

	# Open target image
	im = imName
	im = im.resize((150, 150))      # optional, to reduce time
	ar = scipy.misc.fromimage(im)
	shape = ar.shape
	ar = ar.reshape(scipy.product(shape[:2]), shape[2])
	ar = ar.astype(float)

	# Find clusters
	codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)
	vecs, dist = scipy.cluster.vq.vq(ar, codes)         # assign codes
	counts, bins = scipy.histogram(vecs, len(codes))    # count occurrences

	# Find most frequent
	index_max = scipy.argmax(counts)                    
	peak = codes[index_max]
	color = ''.join(chr(int(c)) for c in peak).encode('hex')

	return (peak, color)
Beispiel #23
0
def create_histogram(parameter_name, nbins=100, writeFile=True, skipfirst=0, truncate=False, smooth=False):
	"""
	Returns a histogram and some statistics about this parameter.
		
	@param writeFile: if true, write the histogram to paramname.histogram
	"""
	f = "%s-chain-0.prob.dump" % parameter_name
	values = numpy.recfromtxt(f)[skipfirst::nevery]

	statistics = {
		'min':   float(values.min()),
		'max':   float(values.max()),
		'stdev': float(values.std()),
		'mean':  float(values.mean()),
		'median':float(numpy.median(values)),
		'q1':    float(scipy.stats.scoreatpercentile(values, 25)),
		'q3':    float(scipy.stats.scoreatpercentile(values, 75)),
		'p5':    float(scipy.stats.scoreatpercentile(values, 5)),
		'p95':    float(scipy.stats.scoreatpercentile(values, 95)),
	}
	
	hist = scipy.histogram(values, bins = nbins if not smooth else nbins*10, normed=True)
	histwithborders = numpy.dstack([hist[1][0:nbins], hist[1][1:nbins+1], hist[0]])
	if writeFile:
		scipy.savetxt('%s.histogram' % parameter_name, histwithborders[0], delimiter="\t")
	return histwithborders[0], statistics
Beispiel #24
0
def generate_scipy_comparison(csvPathname):
    # this is some hack code for reading the csv and doing some percentile stuff in scipy
    # from numpy import loadtxt, genfromtxt, savetxt
    import numpy as np
    import scipy as sp

    dataset = np.genfromtxt(
        open(csvPathname, 'r'),
        delimiter=',',
        skip_header=1,
        dtype=None); # guess!

    print "csv read for training, done"
    # we're going to strip just the last column for percentile work
    # used below
    NUMCLASSES = 10
    print "csv read for training, done"

    # data is last column
    # drop the output
    print dataset.shape
    target = [x[1] for x in dataset]
    # we may have read it in as a string. coerce to number
    targetFP = np.array(target, np.float)

    if 1==0:
        n_features = len(dataset[0]) - 1;
        print "n_features:", n_features

        # get the end
        # target = [x[-1] for x in dataset]
        # get the 2nd col

        print "histogram of target"
        print target
        print sp.histogram(target, bins=NUMCLASSES)

        print target[0]
        print target[1]

    # per = [100 * t for t in thresholds]
    per = [1 * t for t in thresholds]
    print "sp per:", per
    from scipy import stats
    # a = stats.scoreatpercentile(target, per=per)
    a = stats.mstats.mquantiles(targetFP, prob=per)
    print "sp percentiles:", a
Beispiel #25
0
def abs_n_ent_diff(v1,v2,nbins=10):
    """
    Absolute normalized entropy difference, between v1 and v2
    (not symmetric!)
    Quantized the values in 100 bins, measure entropy
    of each of those bins, look at the difference of
    entropy between the two distributions for each of
    those bins.
    nbins  - quantization level between 0 and 1
    """
    assert v1.size == v2.size,'v1 and v2 different sizes'
    edges = np.array(range(0,nbins+1),'float') / nbins
    h1 = histogram(v1.flatten(),edges)[0] + 2
    h2 = histogram(v2.flatten(),edges)[0] + 2
    ents1 = -np.log2(h1*1./h1.sum())
    ents2 = -np.log2(h2*1./h2.sum())
    return np.abs(((ents1 - ents2)/ents1)).mean()
Beispiel #26
0
def histeq(im,nbr_bins=256):
    """ Histogram equalization of a grayscale image. """
    # get image histogram
    imhist,bins = sp.histogram(im.flatten(),nbr_bins,normed=True)
    cdf = imhist.cumsum() # cumulative distribution function
    cdf = 255 * cdf / cdf[-1] # normalize
    # use linear interpolation of cdf to find new pixel values
    im2 = interp(im.flatten(),bins[:-1],cdf)
    return im2.reshape(im.shape), cdf
Beispiel #27
0
def calculateThreshold(image, coveragePercent):
    import scipy
    data = image.data
    histogram = scipy.histogram(data, len(scipy.unique(data)))
    cumsum = scipy.cumsum(histogram[0])
    targetValue = cumsum[-1] * coveragePercent
    index = scipy.argmin(scipy.absolute(cumsum - targetValue))
    threshold = histogram[1][index]
    return threshold * image.unit
 def get_size_histogram(self):
     """
     Gives the region size distribution.
     @return: A histogram created from the normalized region sizes with
              scipy.histogram.
     
     @note: The width and therefore the number of distinct values of the histogram can
            be set with @see: LabelImageStatistics.set_size_histogram_width.
     """
     return scipy.histogram(self._sizes.values(), self._size_histogram_width)
def nonna_select_data(data, outlier_threshold, level='high'):
	"""
	This function returns a list of indexed after identifying the main outliers. It applies
	a cut on the data to remove exactly a fraction (1-outlier_threshold) of all data points.
	By default the cut is applied only at the higher end of the data values, but the 
	parameter level can be used to change this
	
	Input arguments:
	data              = vector containing all data points
	outlier_threshold = remove outliers until we are left with exactly this fraction of the
	                    original data
	level             = 'high|low|both' determines if the outliers are removed only from the
					    high values end, the low values end of both ends.
					    
	Output:
	idx               = index of selected (good) data
	"""
	
	# histogram all the data values
	n,x = scipy.histogram(data, len(data)/10)
	# compute the cumulative distribution and normalize
	nn = scipy.cumsum(n)
	nn = nn / float(max(nn))
	
	if level=='high':
		# select the value such that a fraction outlier_threshold of the data lies below it
		if outlier_threshold < 1:
			val = x[pylab.find(nn/float(max(nn)) >= outlier_threshold)[0]]
		else:
			val = max(data)
		# use that fraction of data only
		idx = data <= val 
	elif level=='low':
		# select the value such that a fraction outlier_threshold of the data lies above it
		if outlier_threshold < 1:
			val = x[pylab.find(nn/float(max(nn)) <= (1-outlier_threshold))[-1]]
		else:
			val = min(data)
		# use that fraction of data only
		idx = data >= val 
	elif level=='both':		
		# select the value such that a fraction outlier_threshold/2 of the data lies below it
		if outlier_threshold < 1:
			Hval = x[pylab.find(nn/float(max(nn)) >= 1-(1-outlier_threshold)/2)[0]]
		else:
			Hval = max(data)	
		# select the value such that a fraction outlier_threshold/2 of the data lies above it
		if outlier_threshold < 1:
			Lval = x[pylab.find(nn/float(max(nn)) <= (1-outlier_threshold)/2)[-1]]
		else:
			Lval = min(data)
  		# use that fraction of data only
		idx = scipy.logical_and(data >= Lval, data <= Hval) 
	
	return idx
Beispiel #30
0
def _bin_single_spike_train(train, bins):
    """ Return a binned representation of SpikeTrain object.

    :param train: A spike train to bin.
    :type train: :class:`neo.core.SpikeTrain`
    :param bins: The bin edges, including the rightmost edge, with time units.
    :type bins: Quantity 1D
    :returns: The binned spike train.
    :rtype: 1-D array
    """
    return sp.histogram(train.rescale(bins.units), bins)[0]
Beispiel #31
0
def chord_length_distribution(im, bins=25, log=False):
    r"""
    Determines the distribution of chord lengths in a image containing chords.

    Parameters
    ----------
    im : ND-image
        An image with chords drawn in the pore space, as produced by
        ``apply_chords`` or ``apply_chords_3d``.

    bins : scalar or array_like
        If a scalar is given it is interpreted as the number of bins to use,
        and if an array is given they are used as the bins directly.

    log : Boolean
        If true, the logarithm of the chord lengths will be used, which can
        make the data more clear.

    Returns
    -------
    A tuple containing the ``chord_length_bins``, and four separate pieces of
    information: ``cumulative_chord_count`` and ``cumulative_chord_length``,
    as well as the ``differenial_chord_count`` and
    ``differential_chord_length``.
    """
    h = chord_length_counts(im)
    if log:
        h = sp.log10(h)
    y_num, x = sp.histogram(h, bins=bins, density=True)
    y_len, x = sp.histogram(h, bins=bins, weights=h, density=True)
    y_num_cum = sp.cumsum((y_num*(x[1:]-x[:-1]))[::-1])[::-1]
    y_len_cum = sp.cumsum((y_len*(x[1:]-x[:-1]))[::-1])[::-1]
    data = namedtuple('chord_distribution', ('chord_length_bins',
                                             'cumulative_chord_count',
                                             'cumulative_chord_length',
                                             'differential_chord_count',
                                             'differential_chord_length'))
    return data(x[:-1], y_num_cum, y_len_cum, y_num, y_len)
Beispiel #32
0
def Mclast():
    fig = plt.figure(1, figsize=(6,9))
    gs = gridspec.GridSpec(2,1,height_ratios=[4,1])
    ax = plt.subplot(gs[0])
    mvir, mclast, mratio = ioformat.rcol(fwind, [1,5,14], linestart=1)
    print("Mvir Range: ", min(mvir), max(mvir))
    x, y = [], []
    for i in range(len(mclast)):
        if(mclast[i] > mclast_cut and mratio[i] < 20.0):
            x.append(mvir[i])
            y.append(mclast[i])
    xbins = linspace(11.0, 13.5, 30)
    ybins = linspace(0.05, 1.0, 40)
    xgrid, ygrid = meshgrid(xbins, ybins)
    z, edx, edy = histogram2d(x, y, bins=[xbins,ybins])
    z = z.T + 0.01
    zf = ndimage.gaussian_filter(z, sigma=1.0, order=0)
    cont = ax.contour(xbins[1:], ybins[1:], zf, colors="red")
    #plt.pcolor(xgrid, ygrid, z, cmap="Purples", norm=LogNorm(vmin=z.min(), vmax=z.max()))
    ax.pcolor(xgrid, ygrid, z, cmap="Purples")
    setp(ax.get_xticklabels(), visible=False)
    ax.set_ylabel("Mc (Rejoin)")
    plt.title(modelname+", Z~1.0")
    ax = plt.subplot(gs[1])
    plt.subplots_adjust(hspace=0.0, top=0.9, bottom=0.15)
    hist1, bins = histogram(mvir, bins=linspace(11.0,13.5,30))
    hist2, bins = histogram(x, bins=linspace(11.0,13.5,30))
    hist = []
    for i in range(len(hist1)):
        if(hist1[i] > 0): hist.append(float(hist2[i])/float(hist1[i]))
        else: hist.append(0.0)
    #width=0.8*(bins[1]-bins[0])
    center = (bins[:-1] + bins[1:]) / 2.0
    ax.plot(center, hist, "b.-")
    ax.set_ylim(0.0, 1.0)
    ax.set_xlabel("Mvir")
    ax.set_ylabel("f_rej")
    plt.show()
Beispiel #33
0
def anaPsf(xaxis, yaxis, data):

    # centroids
    xCentroid = (xaxis * data).sum() / data.sum()
    yCentroid = (yaxis * data).sum() / data.sum()

    # radial sum -- around Centroid
    raxis = numpy.sqrt((xaxis - xCentroid) * (xaxis - xCentroid) +
                       (yaxis - yCentroid) * (yaxis - yCentroid))

    # histogram
    nsumbin = 1000
    npix, bin_edges = scipy.histogram(raxis, nsumbin, (0., 100.))
    rsumpix, bin_edges = scipy.histogram(raxis,
                                         nsumbin, (0., 100.),
                                         weights=data)

    # calculate ee80
    rsumpixNorm = rsumpix / rsumpix.sum()
    rcumsum = numpy.cumsum(rsumpixNorm)
    # at this point rcumsum[0] is = rsumpixNorm[0], rsumsum[1] to the sum of rsumpixnorm[0] and [1] etc.
    # so rcumsum[0] is the integral for r<bin_edges[1]   and in general rcumsum[i] is the integral of r<bin_edges[i+1]
    # thus icumsum gives the appropriate limits for each rcumsum bin
    #

    icumsum = bin_edges[1:nsumbin + 1]
    ee80 = numpy.interp(0.8, rcumsum, icumsum)

    # calculate polarization (w/o seeing, CCD diffusion)
    norm = data.sum()
    qxx = ((xaxis - xCentroid) * (xaxis - xCentroid) * data).sum() / norm
    qyy = ((yaxis - yCentroid) * (yaxis - yCentroid) * data).sum() / norm
    qyx = ((yaxis - yCentroid) * (xaxis - xCentroid) * data).sum() / norm

    e1 = (qxx - qyy) / (qxx + qyy)
    e2 = 2.0 * qyx / (qxx + qyy)

    return ee80, qxx, qyy, qyx, e1, e2
Beispiel #34
0
def ejercicio_4():
    my_list = []
    with open('numeros.txt') as f:
        for line in f:
            for i in line:
                if i.isdigit() == True:
                    my_list.append(int(i))
    my_list.sort()

    hist, bin_edges = scipy.histogram([my_list],
                                      bins=range(int(my_list[-1]) + 2))
    plt.bar(bin_edges[:-1], hist, width=1)
    plt.xlim(min(bin_edges), max(bin_edges))
    plt.show()
Beispiel #35
0
def plotCurrentErrors(currentErrors, labelSize=22, barScale=1.0):
    # make histogram of current errors

    fig = pyplot.figure()

    numBins = max(20, round(scipy.sqrt(len(currentErrors))))
    hist, bins = scipy.histogram(currentErrors, bins=numBins)
    barWidth = barScale * (bins[1] - bins[0])
    binCenters = 0.5 * (bins[:-1] + bins[1:])
    pyplot.bar(binCenters, hist, align='center', width=barWidth)

    pyplot.ylabel('Number of parameter sets', fontsize=labelSize)
    pyplot.xlabel('Parameter set error')
    pyplot.title('Distribution of errors', fontsize=labelSize)
Beispiel #36
0
    def determine_dominant_color_in_image(self, image):
        NUM_CLUSTERS = 5

        # Convert image into array of values for each point.
        if image.mode == '1':
            image.convert('L')
        ar = numpy.array(image)
        # ar = scipy.misc.fromimage(image)
        shape = ar.shape

        # Reshape array of values to merge color bands. [[R], [G], [B], [A]] => [R, G, B, A]
        if len(shape) > 2:
            ar = ar.reshape(scipy.product(shape[:2]), shape[2])

        # Get NUM_CLUSTERS worth of centroids.
        ar = ar.astype(numpy.float)
        codes, _ = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)

        # Pare centroids, removing blacks and whites and shades of really dark and really light.
        original_codes = codes
        for low, hi in [(60, 200), (35, 230), (10, 250)]:
            codes = scipy.array([
                code for code in codes
                if not ((code[0] < low and code[1] < low and code[2] < low) or
                        (code[0] > hi and code[1] > hi and code[2] > hi))
            ])
            if not len(codes):
                codes = original_codes
            else:
                break

        # Assign codes (vector quantization). Each vector is compared to the centroids
        # and assigned the nearest one.
        vecs, _ = scipy.cluster.vq.vq(ar, codes)

        # Count occurences of each clustered vector.
        counts, bins = scipy.histogram(vecs, len(codes))

        # Show colors for each code in its hex value.
        # colors = [''.join(chr(c) for c in code).encode('hex') for code in codes]
        # total = scipy.sum(counts)
        # print dict(zip(colors, [count/float(total) for count in counts]))

        # Find the most frequent color, based on the counts.
        index_max = scipy.argmax(counts)
        peak = codes.astype(int)[index_max]
        color = "{:02x}{:02x}{:02x}".format(peak[0], peak[1], peak[2])
        color = self.feed.adjust_color(color[:6], 21)

        return color
Beispiel #37
0
def prominent_colors(image, num_colors):
    thumbnail = create_thumbnail(image)
    vertices = vertices_from_image(thumbnail)

    # Because the vertices are colors they should all form finite
    # numbers, so we can disable check_finite
    num_clusters = num_colors
    (centroid_codebook, _) = kmeans(vertices, num_clusters, check_finite = False)
    (codes, _) = vq(vertices, centroid_codebook, check_finite = False)
    (counts, bins) = histogram(codes, len(centroid_codebook))
    most_frequent = argsort(counts)[::-1]

    centroid_codebook = centroid_codebook.astype(int)
    return [tuple(centroid_codebook[most_frequent[i]]) for i in range(num_colors)]
Beispiel #38
0
def get_dominant_color(img, RESIZE=15):
   
    NUM_CLUSTERS = 3
    orig_img  = cv2.resize(img, (CODE_IMG_SIZE, CODE_IMG_SIZE))
    
    img       = cv2.resize(img, (RESIZE, RESIZE))
    shape     = img.shape
    ar        = img.reshape(scipy.product(shape[:2]), shape[2]).astype(float)
    codes, _  = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)
    vecs, _   = scipy.cluster.vq.vq(ar, codes)         # assign codes
    counts, _ = scipy.histogram(vecs, len(codes))      # count occurrences
    index_max = scipy.argmax(counts)                   # find most frequent
    dominant_color      = codes[index_max]
    return dominant_color, orig_img
Beispiel #39
0
def _test():
    seed = 274
    numpy.random.seed(274)

    N = 10
    M = 1000
    u = numpy.random.rand()
    w = numpy.random.rand(N)
    sample = fast_sample(w, M, u)
    num, bins = scipy.histogram(sample, numpy.arange(N))
    u = 0.2
    print 'u = ', u
    print 'w = ', w / sum(w)
    print 'frequency = ', num / float(M)
    print 'w - sample frequency = ', w / sum(w) - num / float(M)

    u = 0.98
    sample = fast_sample(w, M, u)
    num, bins = scipy.histogram(sample, numpy.arange(N))
    print 'u = ', u
    print 'w = ', w / sum(w)
    print 'frequency = ', num / float(M)
    print 'w - sample frequency = ', w / sum(w) - num / float(M)
Beispiel #40
0
def _histogram(x, bins):
    h = sp.histogram(x, bins=bins, density=True)
    delta_x = h[1]
    P = h[0]
    temp = P * (delta_x[1:] - delta_x[:-1])
    C = sp.cumsum(temp[-1::-1])[-1::-1]
    S = P * (delta_x[1:] - delta_x[:-1])
    bin_edges = delta_x
    bin_widths = delta_x[1:] - delta_x[:-1]
    bin_centers = (delta_x[1:] + delta_x[:-1]) / 2
    psd = namedtuple(
        'histogram',
        ('pdf', 'cdf', 'relfreq', 'bin_centers', 'bin_edges', 'bin_widths'))
    return psd(P, C, S, bin_centers, bin_edges, bin_widths)
Beispiel #41
0
def findDominantMostCommonColorInAnImageFile(image):
    NUM_CLUSTERS = 5
    ar = np.asarray(image)
    shape = ar.shape
    ar = ar.reshape(scipy.product(shape[:2]), shape[2]).astype(float)
    # print('finding clusters')
    codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)
    # print('cluster centres:\n', codes)
    vecs, dist = scipy.cluster.vq.vq(ar, codes)  # assign codes
    counts, bins = scipy.histogram(vecs, len(codes))  # count occurrences
    index_max = scipy.argmax(counts)  # find most frequent
    peak = codes[index_max]
    colour = binascii.hexlify(bytearray(int(c) for c in peak)).decode('ascii')
    return colour
Beispiel #42
0
def main(img):
    image = Image.open(img)
    # image = ImageGrab.grab()
    image = image.resize((200, 200))
    NUM_CLUSTERS = 5

    # Convert image into array of values for each point.
    ar = scipy.misc.fromimage(image)

    # Reshape array of values to merge color bands.
    ar = ar.reshape(scipy.product(ar.shape[:2]), ar.shape[2])

    # Get NUM_CLUSTERS worth of centroids.
    codes, _ = scipy.cluster.vq.kmeans(ar.astype(float), NUM_CLUSTERS)

    # Pare centroids, removing blacks and whites and shades of really dark and really light.
    original_codes = codes
    for low, hi in [(60, 200), (35, 230), (10, 250)]:
        codes = scipy.array([
            code for code in codes
            if not ((code[0] < low and code[1] < low and code[2] < low) or
                    (code[0] > hi and code[1] > hi and code[2] > hi))
        ])
        if not len(codes):
            codes = original_codes
        else:
            break

    # Assign codes (vector quantization). Each vector is compared to the centroids
    # and assigned the nearest one.
    vecs, _ = scipy.cluster.vq.vq(ar, codes)

    # Count occurences of each clustered vector.
    counts, bins = scipy.histogram(vecs, len(codes))
    normalized_codes = codes / codes.max()

    # Show colors for each code in its hex value.
    colors = [rgb2hex(c) for c in normalized_codes]
    total = float(scipy.sum(counts))
    # top N colors as a proportion of the image
    color_dist = dict(zip(colors, (count / total for count in counts)))
    pprint(color_dist)

    # Find the most frequent color, based on the counts.
    # TODO: no need to use scipy for this.
    index_max = scipy.argmax(counts)
    peak = normalized_codes[index_max]
    color = rgb2hex(peak)
    print(color)
def dominant_color(img):
    if(img != None):
        NUM_CLUSTERS = 1
        img = img.resize((150,150))
        arr = np.asarray(img)
        shape = arr.shape
        arr = arr.reshape(scipy.product(shape[:2]), shape[2]).astype(float)
        codes, dist = scipy.cluster.vq.kmeans(arr, NUM_CLUSTERS)
        vecs, dist = scipy.cluster.vq.vq(arr, codes)
        counts, bins = scipy.histogram(vecs, len(codes))    # count occurrences
        index_max = scipy.argmax(counts)                    # find most frequent
        peak = codes[index_max]
        return list(peak)
    else:
        return None
Beispiel #44
0
def draw_from_inputted_distribution(data, dt, n_samples):
    #function for using empirical release data to
    #draw from the time-varying departure rate they exhibit
    #Input: the empirical release data (a list of times)
    #dt determines the bin size
    #n_samples is the number of samples to return
    t_max = max(data)
    t_min = min(data)
    bins = scipy.linspace(t_min, t_max, (t_max - t_min) / dt)
    hist, bin_edges = scipy.histogram(data, bins=bins, density=True)
    cum_values = np.zeros(bin_edges.shape)
    cum_values[1:] = np.cumsum(hist * np.diff(bin_edges))
    inv_cdf = interpolate.interp1d(cum_values, bin_edges)
    r = np.random.rand(n_samples)
    return inv_cdf(r)
Beispiel #45
0
    def FindRegionColour(self, region, resizeRatio=1, numClusters=5):
        reWidth, reHieght = region.size
        region = region.resize(
            (int(reWidth * resizeRatio), int(reHieght * resizeRatio)))
        ar = np.asarray(region)
        shape = ar.shape
        ar = ar.reshape(scipy.product(shape[:2]), shape[2]).astype(float)

        codes, dist = cluster.vq.kmeans(ar, numClusters)
        vecs, dist = cluster.vq.vq(ar, codes)
        counts, bins = scipy.histogram(vecs, len(codes))

        index_max = scipy.argmax(counts)
        peak = codes[index_max]
        return peak
Beispiel #46
0
def est4(data, _):
    h = histogram(data, data.max(), range=(0, data.max()), normed=1)[0]
    ns = N.arange(0, data.max())

    #k, theta, x0 = est2(data)
    #k, theta, x0 = _k, _theta, _x0
    print "shape =", len(data), type(data), data.shape
    x0 = data.min()
    x0 = 10
    sd = data.std()
    print "SD", sd, sd**2, data.mean()
    x0 = data.mean() - 4 * sd
    x0 = fmin(hist_dist_P, (x0, ), args=(data.mean(), h, ns), xtol=1.0)

    return 0, 0, x0
Beispiel #47
0
    def getDominantColorFromImage(im):
        NUM_CLUSTERS = 5

        im = im.resize((50, 50))  # optional, to reduce time
        ar = np.asarray(im)
        shape = ar.shape
        ar = ar.reshape(scipy.product(shape[:2]), shape[2]).astype(float)

        codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)

        vecs, dist = scipy.cluster.vq.vq(ar, codes)  # assign codes
        counts, bins = scipy.histogram(vecs, len(codes))  # count occurrences

        index_max = scipy.argmax(counts)  # find most frequent
        peak = codes[index_max]
        return '#%02x%02x%02x' % tuple(int(i) for i in peak)
Beispiel #48
0
def get_color(screenshot):
    NUM_CLUSTERS = 5
    im = screenshot
    ar = np.asarray(im)
    shape = ar.shape
    ar = ar.reshape(scipy.product(shape[:2]), shape[2]).astype(float)

    codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)

    vecs, dist = scipy.cluster.vq.vq(ar, codes)  # assign codes
    counts, bins = scipy.histogram(vecs, len(codes))  # count occurrences

    index_max = scipy.argmax(counts)  # find most frequent
    peak = codes[index_max]
    colour = binascii.hexlify(bytearray(int(c) for c in peak)).decode('ascii')
    return colour
Beispiel #49
0
 def plotcounts(self, binsize, pngfile):
     """ bin the data based on a bin size of binsize days """
     #vector_datetime2epoch = scipy.vectorize(converttime.datetime2epoch)
     #etime = vector_d2e(self['etime'])
     etime = self['etime']
     etimemin = scipy.floor(min(etime) / 86400) * 86400
     etimemax = scipy.ceil(max(etime) / 86400) * 86400
     r = [etimemin, etimemax]
     nbins = (etimemax - etimemin) / (binsize * 86400)
     h = scipy.histogram(etime, nbins, r)
     ecount = h.__getitem__(0)
     ebin = h.__getitem__(1)
     #vector_e2d = scipy.vectorize(converttime.epoch2datetime)
     edt = vector_epoch2datetime(ebin)
     pylab.plot_date(edt, ecount, linestyle='steps-mid')
     pylab.savefig(pngfile)
Beispiel #50
0
    def makeHist(cosAnglesAndWeights, numBins=2000):
        numCos_orig, binsCos = scipy.histogram(cosAnglesAndWeights[0],
                                               range=(-1., 1.),
                                               weights=cosAnglesAndWeights[1],
                                               bins=numBins)
        sumWeights = sum(cosAnglesAndWeights[1])

        numCos = []
        for i, number in enumerate(numCos_orig):
            numCos.append(
                float(number) / float(sumWeights) / float(2. / float(numBins)))
        numCos = numpy.array(numCos)

        binsCos = numpy.array(binsCos[:-1]) + (binsCos[1] - binsCos[0]) / 2.

        return numpy.array([binsCos, numCos])
Beispiel #51
0
def getImageDescriptor(model, im, idx, imageName): #gets histograms
	extension = -(len(imageName.rpartition('.')[2])+1) #find how long the extension is, ie .jpg
	imageName = imageName.rpartition('/')[2][:extension] #get just the image name minus the extension and path
	sift = str('-'.join(map(str, conf.phowOpts.Sizes)))
	if not isdir(conf.imageCropPath+"histos/"):
		mkdir(conf.imageCropPath+"histos/")
	if isfile(conf.imageCropPath+"histos/"+imageName+'_'+sift+'.histo'):
		with open(conf.imageCropPath+"histos/"+imageName+'_'+sift+'.histo', 'rb') as fp:
			histo = load(fp)
			return [idx, histo]
	im = standardizeImage(im) #scale image to 640x480
	height, width = im.shape[:2]
	numWords = model.vocab.shape[1]
	frames, descrs = getPhowFeatures(im, conf.phowOpts) #extract features
	# quantize appearance
	if model.quantizer == 'vq':
		binsa, _ = vq(descrs.T, model.vocab.T) #slowest function - assigns words from vocab to features in descrs
	elif model.quantizer == 'kdtree':
		raise ValueError('quantizer kdtree not implemented')
	else:
		raise ValueError('quantizer {0} not known or understood'.format(model.quantizer))
	hist = []
	#generate the histogram bins
	for n_spatial_bins_x, n_spatial_bins_y in zip(model.numSpatialX, model.numSpatialX):
		binsx, distsx = vq(frames[0, :], linspace(0, width, n_spatial_bins_x))
		binsy, distsy = vq(frames[1, :], linspace(0, height, n_spatial_bins_y))
		# binsx and binsy list to what spatial bin each feature point belongs to
		if (numpy.any(distsx < 0)) | (numpy.any(distsx > (width/n_spatial_bins_x+0.5))) | (numpy.any(distsy > (height/n_spatial_bins_y+0.5))):
			print ("something went wrong")
			import pdb; pdb.set_trace()
		# combined quantization
		number_of_bins = n_spatial_bins_x * n_spatial_bins_y * numWords
		temp = arange(number_of_bins)
		# update using this: http://stackoverflow.com/questions/15230179/how-to-get-the-linear-index-for-a-numpy-array-sub2ind
		temp = temp.reshape([n_spatial_bins_x, n_spatial_bins_y, numWords])
		bin_comb = temp[binsx, binsy, binsa]
		hist_temp, _ = histogram(bin_comb, bins=range(number_of_bins+1), density=True) #generate histogram
		hist.append(hist_temp)

	hist = hstack(hist)
	hist = array(hist, 'float32') / sum(hist)
	numTot = float(conf.numClasses*(conf.numTrain+conf.numTest)*(len(conf.rotation)+1))
	sys.stdout.write ("\r"+str(datetime.now())+" Histograms Calculated: "+str(((idx+1)/numTot)*100.0)[:5]+"%") #make progress percentage
	sys.stdout.flush()
	with open(conf.imageCropPath+"histos/"+imageName+'_'+sift+'.histo', 'wb') as fp:
		dump(hist, fp)
	return [idx, hist]
Beispiel #52
0
def compute_MI_origemcee(seq_matQ,seq_matR,batches,ematQ,ematR,gamma,R_0):
    # preliminaries
    n_seqs = len(batches)
    n_batches = int(batches.max()) + 1 # assumes zero indexed batches
    n_bins = 1000
    
    #energies = sp.zeros(n_seqs)
    f = sp.zeros((n_batches,n_seqs))
    
    # compute energies
    # for i in range(n_seqs):
    #     energies[i] = sp.sum(seqs[:,:,i]*emat)
    # alternate way
    energies = np.zeros(n_seqs)
    for i in range(n_seqs):
    	RNAP = (seq_matQ[:,:,i]*ematQ).sum()
    	TF = (seq_matR[:,:,i]*ematR).sum() + R_0
    	energies[i] = -RNAP + mp.log(1 + mp.exp(-TF - gamma)) - mp.log(1 + mp.exp(-TF))


    # sort energies
    inds = sp.argsort(energies)
    for i,ind in enumerate(inds):
        f[batches[ind],i] = 1.0/n_seqs # batches aren't zero indexed
        

    # bin and convolve with Gaussian
    f_binned = sp.zeros((n_batches,n_bins))
    
    for i in range(n_batches):
        f_binned[i,:] = sp.histogram(f[i,:].nonzero()[0],bins=n_bins,range=(0,n_seqs))[0]
    #f_binned = f_binned/f_binned.sum()
    f_reg = sp.ndimage.gaussian_filter1d(f_binned,0.04*n_bins,axis=1)
    f_reg = f_reg/f_reg.sum()

    # compute marginal probabilities
    p_b = sp.sum(f_reg,axis=1)
    p_s = sp.sum(f_reg,axis=0)

    # finally sum to compute the MI
    MI = 0
    for i in range(n_batches):
        for j in range(n_bins):
            if f_reg[i,j] != 0:
                MI = MI + f_reg[i,j]*sp.log2(f_reg[i,j]/(p_b[i]*p_s[j]))
    print MI
    return MI,f_reg
Beispiel #53
0
    def append_data_peaks(self, data, force=False):
        """append bin(s) calculated from a strip of data

        with this method the data is first queried for peaks. this should
        reduce the noise/smoothness of the histogram as observed from the
        amplitude distribution of the pure signal.

        :type data: ndarray
        :param data: the data to generate the bin(s) to append from
        :type force: bool
        :param force: if True, immediately start a new bin before calculation
        """

        # check data
        data_ = sp.asanyarray(data)
        if data.ndim < 2:
            data_ = sp.atleast_2d(data_)
            if data_.shape[0] < data_.shape[1]:
                data_ = data_.T
        nsmpl, nchan = data_.shape
        if nchan != self._nchan:
            raise ValueError('data has channel count %s, expected %s' %
                             (nchan, self._nchan))

        # generate bin set
        bin_set = [0]
        if self._cur_bin_smpl != 0:
            bin_set.append(self._bin_size - self._cur_bin_smpl)
        while bin_set[-1] < nsmpl:
            bin_set.append(bin_set[-1] + self._bin_size)
        if bin_set[-1] > nsmpl:
            bin_set[-1] = nsmpl

        # process bins
        idx = 1
        while idx < len(bin_set):
            data_bin = data_[bin_set[idx - 1]:bin_set[idx], :]
            for c in xrange(self._nchan):
                self._cur_bin[c] += sp.histogram(data_bin[:, c],
                                                 bins=self._ampl_range)[0]
            self._cur_bin_smpl += data_bin.shape[0]
            if self._cur_bin_smpl == self._bin_size:
                self.append_bin(self._cur_bin)
                self._cur_bin[:] = 0
                self._cur_bin_smpl = 0
            idx += 1
Beispiel #54
0
def ejercicio_3():
    print("Ingrese numeros, termine con quit: ")
    my_list = []

    while True:
        inp = input()
        if inp == "quit":
            break
        my_list.append(inp)
    print(my_list)
    
    my_list.sort()

    hist, bin_edges = scipy.histogram([my_list], bins = range(int(my_list[-1])+2))
    plt.bar(bin_edges[:-1], hist, width = 1)
    plt.xlim(min(bin_edges), max(bin_edges))
    plt.show()
Beispiel #55
0
def getImageDescriptor(model, im):
    im = standardizeImage(im)
    height, width = im.shape[:2]
    numWords = model.vocab.shape[1]

    frames, descrs = getPhowFeatures(im, conf.phowOpts)
    # quantize appearance
    if model.quantizer == 'vq':
        binsa, _ = vq(descrs.T, model.vocab.T)
    elif model.quantizer == 'kdtree':
        raise ValueError('quantizer kdtree not implemented')
    else:
        raise ValueError('quantizer {0} not known or understood'.format(
            model.quantizer))

    hist = []
    for n_spatial_bins_x, n_spatial_bins_y in zip(model.numSpatialX,
                                                  model.numSpatialX):
        binsx, distsx = vq(frames[0, :], linspace(0, width, n_spatial_bins_x))
        binsy, distsy = vq(frames[1, :], linspace(0, height, n_spatial_bins_y))
        # binsx and binsy list to what spatial bin each feature point belongs to
        if (numpy.any(distsx < 0)) | (numpy.any(
                distsx > (width / n_spatial_bins_x + 0.5))):
            print 'something went wrong'
            import pdb
            pdb.set_trace()
        if (numpy.any(distsy < 0)) | (numpy.any(
                distsy > (height / n_spatial_bins_y + 0.5))):
            print 'something went wrong'
            import pdb
            pdb.set_trace()

        # combined quantization
        number_of_bins = n_spatial_bins_x * n_spatial_bins_y * numWords
        temp = arange(number_of_bins)
        # update using this: http://stackoverflow.com/questions/15230179/how-to-get-the-linear-index-for-a-numpy-array-sub2ind
        temp = temp.reshape([n_spatial_bins_x, n_spatial_bins_y, numWords])
        bin_comb = temp[binsx, binsy, binsa]
        hist_temp, _ = histogram(bin_comb,
                                 bins=range(number_of_bins + 1),
                                 density=True)
        hist.append(hist_temp)

    hist = hstack(hist)
    hist = array(hist, 'float32') / sum(hist)
    return hist
Beispiel #56
0
def createPattern(image):
    colorArray = []
    ar = np.asarray(image)
    shape = ar.shape
    ar = ar.reshape(scipy.product(shape[:2]), shape[2]).astype(float)

    codes, dist = scipy.cluster.vq.kmeans(ar, 5)  #Baskın 5 rengin kümelenmesi.
    vecs, dist = scipy.cluster.vq.vq(ar, codes)
    counts, bins = scipy.histogram(vecs, len(codes))
    index_max = scipy.argmax(
        counts)  #En sık kullanılan 1 rengin indexini bulur
    peak = codes[index_max]

    for i in range(0, 5):
        colorArray.append(codes[i].astype("uint8").tolist())
        colorArray.append(peak.astype("uint8").tolist())  #En baskın renk
    return colorArray
Beispiel #57
0
def kde(data, N=None, MIN=None, MAX=None):

    # Parameters to set up the mesh on which to calculate
    N = 2**12 if N is None else int(2**scipy.ceil(scipy.log2(N)))
    if MIN is None or MAX is None:
        minimum = min(data)
        maximum = max(data)
        Range = maximum - minimum
        MIN = minimum - Range / 10 if MIN is None else MIN
        MAX = maximum + Range / 10 if MAX is None else MAX

    # Range of the data
    R = MAX - MIN

    # Histogram the data to get a crude first approximation of the density
    M = len(data)
    DataHist, bins = scipy.histogram(data, bins=N, range=(MIN, MAX))
    DataHist = DataHist / M
    DCTData = scipy.fftpack.dct(DataHist, norm=None)

    I = [iN * iN for iN in range(1, N)]
    SqDCTData = (DCTData[1:] / 2)**2

    # The fixed point calculation finds the bandwidth = t_star
    guess = 0.1
    try:
        t_star = scipy.optimize.brentq(fixed_point,
                                       0,
                                       guess,
                                       args=(M, I, SqDCTData))
    except ValueError:
        print('Oops!')
        return None

    # Smooth the DCTransformed data using t_star
    SmDCTData = DCTData * scipy.exp(
        -scipy.arange(N)**2 * scipy.pi**2 * t_star / 2)
    # Inverse DCT to get density
    density = scipy.fftpack.idct(SmDCTData, norm=None) * N / R
    mesh = [(bins[i] + bins[i + 1]) / 2 for i in range(N)]
    bandwidth = scipy.sqrt(t_star) * R

    density = density / scipy.trapz(density, mesh)
    cdf = np.cumsum(density) * (mesh[1] - mesh[0])

    return bandwidth, mesh, density, cdf
Beispiel #58
0
 def compute_scc_histogram(self):
     '''
     Computes a histogram of abs correlation coefficients from the pickled R(a,b) matrix.
     '''
     if not os.path.exists(self.rabDirectory):
         raise RAICARRabException
     if not os.path.exists(os.path.join(self.rabDirectory, 'rabmatrix.db')):
         raise RAICARRabException
     with open(os.path.join(self.rabDirectory, 'rabmatrix.db'), 'rb') as rabPtr:
         RabDict = pickle.load(rabPtr)
     rPDF = dict.fromkeys(['bin edges', 'counts', 'bar width'])
     rPDF['bin edges'] = np.linspace(0, 1.0, 101)
     rPDF['counts'], _ = histogram(a=np.hstack(list(RabDict.values())[
                                   i].flatten() for i in range(0, len(RabDict))), bins=rPDF['bin edges'])
     rPDF['bin edges'] = rPDF['bin edges'][0:-1]
     rPDF['bar width'] = 0.01
     return rPDF
Beispiel #59
0
def dominant_color(img, nb_clusters=5, need_resize=False):
    if need_resize:
        img = img.resize((150, 150))

    img_arr = np.asarray(img)
    shape = img_arr.shape
    img_arr = img_arr.reshape(scipy.product(shape[:2]), shape[2]).astype(float)

    codes, dist = scipy.cluster.vq.kmeans(img_arr, nb_clusters)

    vecs, dist = scipy.cluster.vq.vq(img_arr, codes)
    counts, bins = scipy.histogram(vecs, len(codes))

    index_max = scipy.argmax(counts)
    peak = codes[index_max]
    colour = binascii.hexlify(bytearray(int(c) for c in peak)).decode('ascii')
    return colour
Beispiel #60
0
def GetnewColors():
    NUM_CLUSTERS = 7
    im = Image.open('static/images/image.jpg')
    im = im.resize((150, 150))

    ar = scipy.misc.fromimage(im)
    shape = ar.shape
    ar = ar.reshape(scipy.product(shape[:2]), shape[2])
    codes, dist = scipy.cluster.vq.kmeans(ar.astype(float), NUM_CLUSTERS)

    vecs, dist = scipy.cluster.vq.vq(ar, codes)  # assign codes
    counts, bins = scipy.histogram(vecs, len(codes))  # count occurrences
    s = set()
    s = counts
    x = set()
    x = getcol(codes, NUM_CLUSTERS)
    return x