Example #1
0
def getDistrib(data, nbins=0, stride=0, bins=[], norm = False):
    from scipy.stats import histogram, histogram2
    if nbins>0:
        stride = (max(data)-min(data))/nbins
        bins = np.arange(min(data)-stride, max(data)+stride, stride)
        dist = histogram2(data, bins)
        if norm:
            dist = map(float, dist)
            dist = [dist[i]/sum(dist) for i in range(len(dist))]
        return dist, bins, stride
    elif stride>0:
        bins = np.arange(min(data)-stride, max(data)+stride, stride)
        dist = histogram2(data, bins)
        if norm:
            dist = map(float, dist)
            dist = [dist[i]/sum(dist) for i in range(len(dist))]
        return dist, bins
    elif len(bins)>0:
        dist = histogram2(data, bins)
        if norm:
            dist = map(float, dist)
            dist = [dist[i]/sum(dist) for i in range(len(dist))]
        return dist
    else:
        nbins = 10
        stride = (max(data)-min(data))/nbins
        bins = np.arange(min(data)-stride, max(data)+stride, stride)
        dist = histogram2(data, bins)
        if norm:
            dist = map(float, dist)
            dist = [dist[i]/sum(dist) for i in range(len(dist))]
        return dist, bins
Example #2
0
    def analyzeList(self, mylist, myrange=(0, 1, 1), filename=None):
        """
		histogram2(a, bins) -- Compute histogram of a using divisions in bins

		Description:
		   Count the number of times values from array a fall into
		   numerical ranges defined by bins.  Range x is given by
		   bins[x] <= range_x < bins[x+1] where x =0,N and N is the
		   length of the bins array.  The last range is given by
		   bins[N] <= range_N < infinity.  Values less than bins[0] are
		   not included in the histogram.
		Arguments:
		   a -- 1D array.  The array of values to be divied into bins
		   bins -- 1D array.  Defines the ranges of values to use during
		         histogramming.
		Returns:
		   1D array.  Each value represents the occurences for a given
		   bin (range) of values.
		"""
        #hist,bmin,minw,err = stats.histogram(mynumpy, numbins=36)
        #print hist,bmin,minw,err,"\n"
        if len(mylist) < 2:
            apDisplay.printWarning("Did not write file not enough rows (" +
                                   str(filename) + ")")
            return

        if myrange[0] is None:
            mymin = float(math.floor(ndimage.minimum(mylist)))
        else:
            mymin = float(myrange[0])
        if myrange[1] is None:
            mymax = float(math.ceil(ndimage.maximum(mylist)))
        else:
            mymax = float(myrange[1])
        mystep = float(myrange[2])

        mynumpy = numpy.asarray(mylist, dtype=numpy.float32)
        print "range=", round(ndimage.minimum(mynumpy),
                              2), " <> ", round(ndimage.maximum(mynumpy), 2)
        print " mean=", round(ndimage.mean(mynumpy), 2), " +- ", round(
            ndimage.standard_deviation(mynumpy), 2)

        #histogram
        bins = []
        mybin = mymin
        while mybin <= mymax:
            bins.append(mybin)
            mybin += mystep
        bins = numpy.asarray(bins, dtype=numpy.float32)
        apDisplay.printMsg("Creating histogram with " + str(len(bins)) +
                           " bins")
        hist = stats.histogram2(mynumpy, bins=bins)
        #print bins
        #print hist
        if filename is not None:
            f = open(filename, "w")
            for i in range(len(bins)):
                out = ("%3.4f %d\n" % (bins[i] + mystep / 2.0, hist[i]))
                f.write(out)
            f.write("&\n")
Example #3
0
def PdfFromTrace (trace, intBounds):
    """
    Returns the empirical density function of a trace.
    
    Parameters
    ----------
    trace : vector of doubles
        The trace data
    intBounds : vector of doubles
        The array of interval boundaries. The pdf is the
        number of samples falling into an interval divided
        by the interval length.
    
    Returns
    -------
    x : vector of doubles
        The center of the intervals (the points where the 
        empirical pdf is calculated)
    y : vector of doubles
        The values of the empirical pdf at the given points
    """
    hist = stats.histogram2 (trace, intBounds)
    intlens = intBounds[1:] - intBounds[0:-1]
    y = hist[0:-1] / intlens / len(trace)
    x = (intBounds[1:] + intBounds[0:-1]) / 2.0
    return (x,y)
Example #4
0
def PdfFromTrace(trace, intBounds):
    """
    Returns the empirical density function of a trace.
    
    Parameters
    ----------
    trace : vector of doubles
        The trace data
    intBounds : vector of doubles
        The array of interval boundaries. The pdf is the
        number of samples falling into an interval divided
        by the interval length.
    
    Returns
    -------
    x : vector of doubles
        The center of the intervals (the points where the 
        empirical pdf is calculated)
    y : vector of doubles
        The values of the empirical pdf at the given points
    """
    hist = stats.histogram2(trace, intBounds)
    intlens = intBounds[1:] - intBounds[0:-1]
    y = hist[0:-1] / intlens / len(trace)
    x = (intBounds[1:] + intBounds[0:-1]) / 2.0
    return (x, y)
Example #5
0
def pdf_velocity(particle):
  nbins=10

  uX = np.zeros( (np.size(particle[:,0]),np.size(particle[0,:]) ))
  uY = np.zeros( (np.size(particle[:,0]),np.size(particle[0,:]) ))
  uZ = np.zeros( (np.size(particle[:,0]),np.size(particle[0,:]) ))

  Tmax=np.size(particle[:,0])
  NParticles=np.size(particle[0,:])
  NParticles = 3
  print "T,N",Tmax,NParticles

  jrange = range(Tmax)
  for j in jrange:
    for i in range(NParticles):  
      uX[j,i] = particle[j][i][3]     
      uY[j,i] = particle[j][i][4]
      uZ[j,i] = particle[j][i][5]
    
  #print uX[:,1].sort()
  # in case the hist must be stored
  h_tmp=np.zeros(nbins)
  h_uX =np.zeros(nbins)
  binX=pl.linspace(uX[:,NParticles-1].min(),uX[:,NParticles-1].max(),nbins)
  print binX
  pl.figure(1)
  pl.subplot(121)
  for npart in range(NParticles):
    h_tmp=stats.histogram2(uX[:,npart],binX)
    pl.plot(binX,h_tmp)
    h_uX+=h_tmp
  h_uX /= NParticles
  return (binX,h_uX)
	def analyzeList(self, mylist, myrange=(0,1,1), filename=None):
		"""
		histogram2(a, bins) -- Compute histogram of a using divisions in bins

		Description:
		   Count the number of times values from array a fall into
		   numerical ranges defined by bins.  Range x is given by
		   bins[x] <= range_x < bins[x+1] where x =0,N and N is the
		   length of the bins array.  The last range is given by
		   bins[N] <= range_N < infinity.  Values less than bins[0] are
		   not included in the histogram.
		Arguments:
		   a -- 1D array.  The array of values to be divied into bins
		   bins -- 1D array.  Defines the ranges of values to use during
		         histogramming.
		Returns:
		   1D array.  Each value represents the occurences for a given
		   bin (range) of values.
		"""
		#hist,bmin,minw,err = stats.histogram(mynumpy, numbins=36)
		#print hist,bmin,minw,err,"\n"
		if len(mylist) < 2:
			apDisplay.printWarning("Did not write file not enough rows ("+str(filename)+")")
			return

		if myrange[0] is None:
			mymin = float(math.floor(ndimage.minimum(mylist)))
		else:
			mymin = float(myrange[0])
		if myrange[1] is None:
			mymax = float(math.ceil(ndimage.maximum(mylist)))
		else:
			mymax = float(myrange[1])
		mystep = float(myrange[2])

		mynumpy = numpy.asarray(mylist, dtype=numpy.float32)
		print "range=",round(ndimage.minimum(mynumpy),2)," <> ",round(ndimage.maximum(mynumpy),2)
		print " mean=",round(ndimage.mean(mynumpy),2)," +- ",round(ndimage.standard_deviation(mynumpy),2)

		#histogram
		bins = []
		mybin = mymin
		while mybin <= mymax:
			bins.append(mybin)
			mybin += mystep
		bins = numpy.asarray(bins, dtype=numpy.float32)
		apDisplay.printMsg("Creating histogram with "+str(len(bins))+" bins")
		hist = stats.histogram2(mynumpy, bins=bins)
		#print bins
		#print hist
		if filename is not None:
			f = open(filename, "w")
			for i in range(len(bins)):
				out = ("%3.4f %d\n" % (bins[i] + mystep/2.0, hist[i]) )
				f.write(out)
			f.write("&\n")
    def getSpectrum(self, eVChannel=1.0, limits=None):

        if limits is not None:
            startEnergy_eV = limits[0]

            endEnergy_eV = limits[1]
        else:
            startEnergy_eV = self.header["startEv"]

            endEnergy_eV = self.header["endEv"]

        energies_eV = numpy.arange(startEnergy_eV, endEnergy_eV + eVChannel,
                                   eVChannel)

        data = [energy_eV for energy_eV, dummy_time in self.cspData]

        intensities = stats.histogram2(data, energies_eV)

        assert len(energies_eV) == len(intensities)

        #print len(self.cspData), sum(intensities)

        return energies_eV[:-1], intensities[:-1]
    def generate_voi_histogram(self, poi, width):
        print 'poi',poi,'width',width
        
        # indices of points in volume of interest (poi)
        pts_indices = self.get_voi_pts_indices(poi, width)
        self.voi_pts_indices = pts_indices
        pts = np.asarray(self.processor.pts3d_bound)
        pts = pts[:,pts_indices] #truncate points to volume of interest
        self.voi_pts = pts
        #mlab.points3d(pts[0,:],pts[1,:],pts[2,:], mode='point')
        #mlab.show() 
        
        #go from 0 to 2m, create histogram with 80 bins = bin of 2.5cm (=height-slice)
        min = 0.
        max = 2.
        self.voi_bincount = 80
        self.voi_interval_size = max - min
        bins = np.asarray(range(self.voi_bincount)) * self.voi_interval_size/float(self.voi_bincount)
        #print 'bins',bins
        hist = stats.histogram2(pts[2],bins) / float(len(pts[2]))
        #print 'zhist',hist
        #print zip(bins, hist)
        self.z_hist = hist
        self.z_hist_bins = bins
        
        slices = self.get_voi_slice_indices()
        self.z_hist_slices_indices = slices
        
        #precalculate spread values:
        self.z_hist_spread = []
        for indices in self.z_hist_slices_indices:
            a = self.processor.pts3d_bound[:,indices]
            # ev12 gives an indication about how far points are spread out in a specific height-slice
            u, ev12 = gaussian_curvature.spread(a)
            self.z_hist_spread += [(ev12[0], ev12[1])]
        
        #create h,s,i histograms for each slice:
        pts_h = []
        pts_s = []
        #print self.processor.pts3d_bound
        #TODO: does this use the volume of interest? should it???
        n,m = np.shape(np.asarray(self.processor.pts3d_bound))
        #print 'm',m,'len(self.processor.pts3d_bound[2,:].A1)',len(self.processor.pts3d_bound[2,:].A1)
        for index in range(m):
            pts_h.append(float(self.imNP_h[self.processor.map2d[1,index],self.processor.map2d[0,index]]))
        for index in range(m):
            pts_s.append(float(self.imNP_s[self.processor.map2d[1,index],self.processor.map2d[0,index]]))
        pts_i = np.asarray(self.processor.intensities_bound)
        #print 'ptsi',pts_i
        if np.max(pts_i) > 0:
            self.intensity_normalization_factor = 1.0 / float(np.max(pts_i)) * 255
        else:
            self.intensity_normalization_factor = 1.
        #print 'self.intensity_normalization_factor', self.intensity_normalization_factor
        #print pts_i
        pts_i *= self.intensity_normalization_factor
        pts_h = np.asarray(pts_h)
        pts_s = np.asarray(pts_s)
        self.z_hist_h_hists = []
        self.z_hist_s_hists = []
        self.z_hist_i_hists = []
        
        #normalize by maximum slice:
        max_count = 0
        max_count_index = 0
        for count_idx, indices in enumerate(slices):
            n = np.shape(indices)
            if n[0] > max_count:
                max_count = n[0]
                max_count_index = count_idx
        slize_height = (self.voi_interval_size / float(self.voi_bincount))
        self.z_hist_height_max =  slize_height * (max_count_index + 0.5)
        #print 'max_count', max_count,'index',max_count_index, 'height in max bin', self.z_hist_height_max

        
        for indices in slices:
            pts_h_slice = pts_h[indices]
            pts_s_slice = pts_s[indices]
            pts_i_slice = pts_i[indices]
            self.hsi_hist_bincount = 5
            bins = np.asarray(range(0,self.hsi_hist_bincount))*float(255.0/float(self.hsi_hist_bincount))
            #print bins
            #todo: smooth with kernel fct
            count = float(len(pts_h_slice))
            if count == 0: 
                count = 1
            hist_h = stats.histogram2(pts_h_slice,bins) / count
            self.z_hist_h_hists.append(hist_h)
            hist_s = stats.histogram2(pts_s_slice,bins) / count
            self.z_hist_s_hists.append(hist_s)
            hist_i = stats.histogram2(pts_i_slice,bins) / count
            #print 'hist_i', hist_i, pts_i_slice, bins, pts_i
            self.z_hist_i_hists.append(hist_i)
    def get_featurevector(self, index, count, pts = None):
        if pts == None:
            pts = self.processor.pts3d_bound

        #print 'i',index,'c', count
        fv = [] 
        
        indices = np.asarray(self.kdtree_queried_indices[count])
        invalid_value = np.shape(pts)[1]
        #print indices
        #print 'iv',invalid_value
        indices = indices[indices != invalid_value]
        
        #print ut.getTime(), indices
        #print ut.getTime(), 'number of pts', len(indices)
        a = pts[:,indices]
        view = processor.rotate_to_plane(self.processor.scan_dataset.ground_plane_normal, np.matrix([-1,0,0.]).T)
        normal, eigenvalues = gaussian_curvature.gaussian_curvature(a,view)
        #eigenvalues = eigenvalues / np.square(r)
        #fv += [normal[0,0],0,normal[2,0]]
        #fv += normal.T.A[0].tolist()
        #fv += eigenvalues.tolist()
        #print np.asarray(pts[:,index].T[0])[0]
       # print 'pt',np.asarray(pts[:,index].T[0])
        point = pts[:,index]
        
        ev1, ev2 = self.get_voi_histogram_spread(point)
        #z_max_height_diff = pts[2,index] - self.get_voi_maxcount_height()
        #fv += [self.get_voi_histogram_value(point),z_max_height_diff,normal[0,0],normal[1,0],normal[2,0], ev1, ev2]
        fv += [self.get_voi_histogram_value(point),normal[0,0],normal[1,0],normal[2,0], ev1, ev2]
        
        h = self.imNP_h[self.processor.map2d[1,index],self.processor.map2d[0,index]]
        s = self.imNP_s[self.processor.map2d[1,index],self.processor.map2d[0,index]]
        i = self.processor.intensities_bound[index]
        hsi = self.get_voi_hsi_histogram_values(point,h,s,i)
        fv += [hsi[0],hsi[1],hsi[2]]
        #print np.shape(self.imNP_tex1)
        #print np.shape(self.map2d)
        tex1 = self.imNP_tex1[self.processor.map2d[1,index],self.processor.map2d[0,index]]
        tex2 = self.imNP_tex2[self.processor.map2d[1,index],self.processor.map2d[0,index]]
        fv += [tex1, tex2]
        #print tex1, tex2
        

        #color histograms:
        colors_h = []
        colors_s = []
        colors_v = []
        for idx in indices:
            colors_h.append(float(self.imNP_h[self.processor.map2d[1,idx],self.processor.map2d[0,idx]]))
            colors_s.append(float(self.imNP_s[self.processor.map2d[1,idx],self.processor.map2d[0,idx]]))
            colors_v.append(float(self.imNP_v[self.processor.map2d[1,idx],self.processor.map2d[0,idx]]))
        
        color_hist = stats.histogram2(np.array(colors_h), [0,51,102,153,204])
        color_hist = color_hist / float(np.sum(color_hist))
        color_hist = list(color_hist)
        fv += color_hist
        color_hist = stats.histogram2(np.array(colors_s), [0,51,102,153,204])
        color_hist = color_hist / float(np.sum(color_hist))
        color_hist = list(color_hist)
        fv += color_hist
        color_hist = stats.histogram2(np.array(colors_v), [0,51,102,153,204])
        color_hist = color_hist / float(np.sum(color_hist))
        color_hist = list(color_hist)
        fv += color_hist
        
        #intensities
        intensities = self.processor.intensities_bound[indices]
        intensities = np.asarray(intensities)
        #map to 0-255-range:   TODO: perhaps do some nonlinear transformation here? 
        intensities = intensities / 10000 * 255
        intensity_hist = stats.histogram2(intensities, [0,51,102,153,204])
        intensity_hist = intensity_hist / float(np.sum(intensity_hist))
        intensity_hist = list(intensity_hist)
        fv += intensity_hist    
    
        #current colors:
        fv += [float(self.imNP_h[self.processor.map2d[1,index],self.processor.map2d[0,index]]) / 255.0]
        fv += [float(self.imNP_s[self.processor.map2d[1,index],self.processor.map2d[0,index]]) / 255.0]
        fv += [float(self.imNP_v[self.processor.map2d[1,index],self.processor.map2d[0,index]]) / 255.0]  
        
        #current intensity value (scaled)
        intensity = self.processor.intensities_bound[index]
        #scale:
        intensity = intensity / 15000.0
        intensity = [intensity]
        fv += intensity  

        
        if self.debug_before_first_featurevector == True:
            self.debug_before_first_featurevector = False
            print ut.getTime(), 'get_featurevector: Choosing not to print Feature Vector Sample'
            #print ut.getTime(), 'feature vector sample(gaussian histograms):', fv
        return fv
    def generate_voi_histogram(self, poi, width):
        print 'poi', poi, 'width', width

        # indices of points in volume of interest (poi)
        pts_indices = self.get_voi_pts_indices(poi, width)
        self.voi_pts_indices = pts_indices
        pts = np.asarray(self.processor.pts3d_bound)
        pts = pts[:, pts_indices]  #truncate points to volume of interest
        self.voi_pts = pts
        #mlab.points3d(pts[0,:],pts[1,:],pts[2,:], mode='point')
        #mlab.show()

        #go from 0 to 2m, create histogram with 80 bins = bin of 2.5cm (=height-slice)
        min = 0.
        max = 2.
        self.voi_bincount = 80
        self.voi_interval_size = max - min
        bins = np.asarray(range(
            self.voi_bincount)) * self.voi_interval_size / float(
                self.voi_bincount)
        #print 'bins',bins
        hist = stats.histogram2(pts[2], bins) / float(len(pts[2]))
        #print 'zhist',hist
        #print zip(bins, hist)
        self.z_hist = hist
        self.z_hist_bins = bins

        slices = self.get_voi_slice_indices()
        self.z_hist_slices_indices = slices

        #precalculate spread values:
        self.z_hist_spread = []
        for indices in self.z_hist_slices_indices:
            a = self.processor.pts3d_bound[:, indices]
            # ev12 gives an indication about how far points are spread out in a specific height-slice
            u, ev12 = gaussian_curvature.spread(a)
            self.z_hist_spread += [(ev12[0], ev12[1])]

        #create h,s,i histograms for each slice:
        pts_h = []
        pts_s = []
        #print self.processor.pts3d_bound
        #TODO: does this use the volume of interest? should it???
        n, m = np.shape(np.asarray(self.processor.pts3d_bound))
        #print 'm',m,'len(self.processor.pts3d_bound[2,:].A1)',len(self.processor.pts3d_bound[2,:].A1)
        for index in range(m):
            pts_h.append(
                float(self.imNP_h[self.processor.map2d[1, index],
                                  self.processor.map2d[0, index]]))
        for index in range(m):
            pts_s.append(
                float(self.imNP_s[self.processor.map2d[1, index],
                                  self.processor.map2d[0, index]]))
        pts_i = np.asarray(self.processor.intensities_bound)
        #print 'ptsi',pts_i
        if np.max(pts_i) > 0:
            self.intensity_normalization_factor = 1.0 / float(
                np.max(pts_i)) * 255
        else:
            self.intensity_normalization_factor = 1.
        #print 'self.intensity_normalization_factor', self.intensity_normalization_factor
        #print pts_i
        pts_i *= self.intensity_normalization_factor
        pts_h = np.asarray(pts_h)
        pts_s = np.asarray(pts_s)
        self.z_hist_h_hists = []
        self.z_hist_s_hists = []
        self.z_hist_i_hists = []

        #normalize by maximum slice:
        max_count = 0
        max_count_index = 0
        for count_idx, indices in enumerate(slices):
            n = np.shape(indices)
            if n[0] > max_count:
                max_count = n[0]
                max_count_index = count_idx
        slize_height = (self.voi_interval_size / float(self.voi_bincount))
        self.z_hist_height_max = slize_height * (max_count_index + 0.5)
        #print 'max_count', max_count,'index',max_count_index, 'height in max bin', self.z_hist_height_max

        for indices in slices:
            pts_h_slice = pts_h[indices]
            pts_s_slice = pts_s[indices]
            pts_i_slice = pts_i[indices]
            self.hsi_hist_bincount = 5
            bins = np.asarray(range(0, self.hsi_hist_bincount)) * float(
                255.0 / float(self.hsi_hist_bincount))
            #print bins
            #todo: smooth with kernel fct
            count = float(len(pts_h_slice))
            if count == 0:
                count = 1
            hist_h = stats.histogram2(pts_h_slice, bins) / count
            self.z_hist_h_hists.append(hist_h)
            hist_s = stats.histogram2(pts_s_slice, bins) / count
            self.z_hist_s_hists.append(hist_s)
            hist_i = stats.histogram2(pts_i_slice, bins) / count
            #print 'hist_i', hist_i, pts_i_slice, bins, pts_i
            self.z_hist_i_hists.append(hist_i)
    def get_featurevector(self, index, count, pts=None):
        if pts == None:
            pts = self.processor.pts3d_bound

        #print 'i',index,'c', count
        fv = []

        indices = np.asarray(self.kdtree_queried_indices[count])
        invalid_value = np.shape(pts)[1]
        #print indices
        #print 'iv',invalid_value
        indices = indices[indices != invalid_value]

        #print ut.getTime(), indices
        #print ut.getTime(), 'number of pts', len(indices)
        a = pts[:, indices]
        view = processor.rotate_to_plane(
            self.processor.scan_dataset.ground_plane_normal,
            np.matrix([-1, 0, 0.]).T)
        normal, eigenvalues = gaussian_curvature.gaussian_curvature(a, view)
        #eigenvalues = eigenvalues / np.square(r)
        #fv += [normal[0,0],0,normal[2,0]]
        #fv += normal.T.A[0].tolist()
        #fv += eigenvalues.tolist()
        #print np.asarray(pts[:,index].T[0])[0]
        # print 'pt',np.asarray(pts[:,index].T[0])
        point = pts[:, index]

        ev1, ev2 = self.get_voi_histogram_spread(point)
        #z_max_height_diff = pts[2,index] - self.get_voi_maxcount_height()
        #fv += [self.get_voi_histogram_value(point),z_max_height_diff,normal[0,0],normal[1,0],normal[2,0], ev1, ev2]
        fv += [
            self.get_voi_histogram_value(point), normal[0, 0], normal[1, 0],
            normal[2, 0], ev1, ev2
        ]

        h = self.imNP_h[self.processor.map2d[1, index],
                        self.processor.map2d[0, index]]
        s = self.imNP_s[self.processor.map2d[1, index],
                        self.processor.map2d[0, index]]
        i = self.processor.intensities_bound[index]
        hsi = self.get_voi_hsi_histogram_values(point, h, s, i)
        fv += [hsi[0], hsi[1], hsi[2]]
        #print np.shape(self.imNP_tex1)
        #print np.shape(self.map2d)
        tex1 = self.imNP_tex1[self.processor.map2d[1, index],
                              self.processor.map2d[0, index]]
        tex2 = self.imNP_tex2[self.processor.map2d[1, index],
                              self.processor.map2d[0, index]]
        fv += [tex1, tex2]
        #print tex1, tex2

        #color histograms:
        colors_h = []
        colors_s = []
        colors_v = []
        for idx in indices:
            colors_h.append(
                float(self.imNP_h[self.processor.map2d[1, idx],
                                  self.processor.map2d[0, idx]]))
            colors_s.append(
                float(self.imNP_s[self.processor.map2d[1, idx],
                                  self.processor.map2d[0, idx]]))
            colors_v.append(
                float(self.imNP_v[self.processor.map2d[1, idx],
                                  self.processor.map2d[0, idx]]))

        color_hist = stats.histogram2(np.array(colors_h),
                                      [0, 51, 102, 153, 204])
        color_hist = color_hist / float(np.sum(color_hist))
        color_hist = list(color_hist)
        fv += color_hist
        color_hist = stats.histogram2(np.array(colors_s),
                                      [0, 51, 102, 153, 204])
        color_hist = color_hist / float(np.sum(color_hist))
        color_hist = list(color_hist)
        fv += color_hist
        color_hist = stats.histogram2(np.array(colors_v),
                                      [0, 51, 102, 153, 204])
        color_hist = color_hist / float(np.sum(color_hist))
        color_hist = list(color_hist)
        fv += color_hist

        #intensities
        intensities = self.processor.intensities_bound[indices]
        intensities = np.asarray(intensities)
        #map to 0-255-range:   TODO: perhaps do some nonlinear transformation here?
        intensities = intensities / 10000 * 255
        intensity_hist = stats.histogram2(intensities, [0, 51, 102, 153, 204])
        intensity_hist = intensity_hist / float(np.sum(intensity_hist))
        intensity_hist = list(intensity_hist)
        fv += intensity_hist

        #current colors:
        fv += [
            float(self.imNP_h[self.processor.map2d[1, index],
                              self.processor.map2d[0, index]]) / 255.0
        ]
        fv += [
            float(self.imNP_s[self.processor.map2d[1, index],
                              self.processor.map2d[0, index]]) / 255.0
        ]
        fv += [
            float(self.imNP_v[self.processor.map2d[1, index],
                              self.processor.map2d[0, index]]) / 255.0
        ]

        #current intensity value (scaled)
        intensity = self.processor.intensities_bound[index]
        #scale:
        intensity = intensity / 15000.0
        intensity = [intensity]
        fv += intensity

        if self.debug_before_first_featurevector == True:
            self.debug_before_first_featurevector = False
            print ut.getTime(
            ), 'get_featurevector: Choosing not to print Feature Vector Sample'
            #print ut.getTime(), 'feature vector sample(gaussian histograms):', fv
        return fv
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.")
    parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;")
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help="Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values."
    )
    parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used")
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help="if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored")
    parser.add_argument(
        "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored"
    )
    parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored")
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help="If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ")
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help="the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram")
    parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction")
    parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds")
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help="lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e")
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols != None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols != None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols != None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias)
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one), correction=args.correction, lambda_=args.lambda_
            )
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf is 0 and mf is 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf is 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf is 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf is 0 and mf is 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf is 0 and mf is 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf is 0 and mf is 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf is 0 and mf is 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf is 0 and mf is 0:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf is 0 and mf is 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf is 0 and mf is 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail)
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf is 0 and mf is 0:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf is 0 and mf is 0:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf is 0 and mf is 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda is 0:
                box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one), map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two)
            )
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(
                map(float, sample_one), map(float, sample_two), equal_var=args.equal_var
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one), zero_method=args.zero_method, correction=args.correction
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one), method=args.med, weights=map(float, sample_two)
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples
            )
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()
Example #13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o",
                        "--outfile",
                        required=True,
                        help="Path to the output file.")
    parser.add_argument("--sample_one_cols",
                        help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols",
                        help="Input format, like smi, sdf, inchi")
    parser.add_argument(
        "--sample_cols",
        help="Input format, like smi, sdf, inchi,separate arrays using ;",
    )
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help=
        "Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help=
        "If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta",
        action="store_true",
        default=False,
        help="Whether or not to return the internally computed a values.",
    )
    parser.add_argument(
        "--fisher",
        action="store_true",
        default=False,
        help="if true then Fisher definition is used",
    )
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help=
        "if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument(
        "--inclusive1",
        action="store_true",
        default=False,
        help="if false,lower_limit will be ignored",
    )
    parser.add_argument(
        "--inclusive2",
        action="store_true",
        default=False,
        help="if false,higher_limit will be ignored",
    )
    parser.add_argument(
        "--inclusive",
        action="store_true",
        default=False,
        help="if false,limit will be ignored",
    )
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help=
        "If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help=
        "Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument(
        "--correction",
        action="store_true",
        default=False,
        help="continuity correction ",
    )
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help=
        "Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help=
        "the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b",
                        type=int,
                        default=0,
                        help="The number of bins to use for the histogram")
    parser.add_argument("--N",
                        type=int,
                        default=0,
                        help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof",
                        type=int,
                        default=0,
                        help="Degrees of freedom correction")
    parser.add_argument(
        "--score",
        type=int,
        default=0,
        help="Score that is compared to the elements in a.",
    )
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help=
        "The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument(
        "--new",
        type=float,
        default=0.0,
        help="Value to put in place of values in a outside of bounds",
    )
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help=
        "lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help=
        "If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument(
        "--base",
        type=float,
        default=1.6,
        help="The logarithmic base to use, defaults to e",
    )
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols is not None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols is not None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols is not None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(
                map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one),
                                               dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one),
                                       n=args.n,
                                       p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(
                map(float, sample_one),
                axis=args.axis,
                fisher=args.fisher,
                bias=args.bias,
            )
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one),
                                        score=args.score,
                                        kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one),
                                                   alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one),
                                             low=args.m,
                                             high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one),
                cdf=args.cdf,
                N=args.N,
                alternative=args.alternative,
                mode=args.mode,
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one),
                correction=args.correction,
                lambda_=args.lambda_)
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf == 0 and mf == 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf),
                                   (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf == 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one),
                                 lowerlimit=mf,
                                 inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf == 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one),
                                 upperlimit=nf,
                                 inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf == 0 and mf == 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf),
                                 (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf == 0 and mf == 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf),
                                 (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf == 0 and mf == 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf),
                               (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf == 0 and mf == 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one),
                    map(float, sample_two),
                    interpolation_method=args.interpolation,
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one),
                    map(float, sample_two),
                    (mf, nf),
                    interpolation_method=args.interpolation,
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf == 0 and mf == 0:
                rel, low_range, binsize, ex = stats.relfreq(
                    map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(
                    map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf == 0 and mf == 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf == 0 and mf == 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one),
                                    mf,
                                    nf,
                                    newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one),
                               proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(
                map(float, sample_one),
                proportiontocut=args.proportiontocut,
                tail=args.tail,
            )
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf == 0 and mf == 0:
                hi, low_range, binsize, ex = stats.histogram(
                    map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(
                    map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf == 0 and mf == 0:
                cum, low_range, binsize, ex = stats.cumfreq(
                    map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(
                    map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf == 0 and mf == 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf),
                                          method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda == 0:
                box, ma, ci = stats.boxcox(map(float, sample_one),
                                           alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one),
                                   imbda,
                                   alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one),
                                  map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one),
                                                  map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one),
                                        map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one),
                                       map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two))
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one),
                                          map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one),
                                              map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one),
                                        map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one),
                map(float, sample_two),
                use_continuity=args.mwu_use_continuity,
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one),
                           map(float, sample_two),
                           ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(map(float, sample_one),
                                                  map(float, sample_two),
                                                  equal_var=args.equal_var)
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one),
                                      map(float, sample_two),
                                      axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one),
                                    map(float, sample_two),
                                    axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one),
                                          map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one),
                map(float, sample_two),
                initial_lexsort=args.initial_lexsort,
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one),
                              map(float, sample_two),
                              base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one),
                                               map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one),
                                               map(float, sample_two),
                                               ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one),
                                               ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one),
                    map(float, sample_two),
                    ddof=args.ddof,
                    lambda_=args.lambda_,
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one),
                                                       ddof=args.ddof,
                                                       lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one),
                                                     map(float, sample_two),
                                                     alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one),
                                                     alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one),
                    method=args.med,
                    weights=map(float, sample_two),
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one),
                                                      method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center,
                                         proportiontocut=args.proportiontocut,
                                         *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center,
                                      proportiontocut=args.proportiontocut,
                                      *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties,
                correction=args.correction,
                lambda_=args.lambda_,
                *b_samples)
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()
 def histogram(self,totals=None):
     """histogram of days to guesses"""
     if totals is None:
         totals = self.totals
     range = self.range_of_ints(totals)
     return stats.histogram2(totals,range)
Example #15
0
import sys
import os
import string
import numpy
import scipy
import scipy.stats
from scipy.stats import histogram, histogram2

debug = os.getenv("DEBUG")

rsptime_fn = sys.argv[1]
f = open(rsptime_fn, "r")
records = f.readlines()

times = numpy.array( [ float(r.strip().split(',')[1]) for r in records ] )
maxtime = max(times)
(time_histo, time_low_range, time_binsize, time_extrapoints) = histogram( times, defaultlimits=(0.0, maxtime))
assert(time_low_range == 0.0)
assert(time_extrapoints == 0)
if debug: 
  print(time_histo, ' shape ', time_histo.shape, ' low_range ', time_low_range, ' binsize ', time_binsize, ' extrapoints ', time_extrapoints)
print('time histogram: %s'%string.join([ str(v) for v in time_histo.tolist() ], ','))

rsptimes = numpy.array( [ float(r.strip().split(',')[2]) for r in records ] )
rsptime_histo = histogram2( rsptimes, [ 0.0001, 0.00032, 0.001, 0.0032, 0.01, 0.032, 0.1, 0.32, 1, 3.2, 10, 32, 100 ] )
if debug: 
  print(rsptime_histo,rsptime_histo.shape)
print('response time histogram: %s'%string.join( [ str(v) for v in rsptime_histo.tolist() ], ','))