Example #1
0
 def check_basic(self):
     a1 = [3,4,5,10,-3,-5,6]
     a2 = [3,-6,-2,8,7,4,2,1]
     a3 = [3.,4,5,10,-3,-5,-6,7.0]
     assert_equal(stats.median(a1),4)
     assert_equal(stats.median(a2),2.5)
     assert_equal(stats.median(a3),3.5)
Example #2
0
  def statsex(self, objects):

    """
	Do some statistics on a source list
	Return dictionary
    """

    import stats, pstat
    
    # Return if we have no objects
    if len(objects) == 0:
      return 0	 

    # Define dictionary to hold statistics	
    stat = {}

    # Get number of objects
    stat['N'] = str(len(objects))

    # Define list (float) of FWHM values
    fwhm = [ float(obj[7]) for obj in objects ]
 
    # Define list (float) of ELLIPTICITY values
    el = [ float(obj[6]) for obj in objects ]

    # Define list (float) of THETA_IMAGE values
    pa = [ float(obj[5]) for obj in objects ]

    # Define list (float) of 'Stella-like' values
    stella = [ float(obj[9]) for obj in objects ]	

    # Create a histogram of FWHM values of binsize 1 pixel
    hfwhm = stats.histogram(fwhm,40,[0,40])[0]
    
    stat['medianFWHM'] = "%.2f" % stats.median(fwhm)
    stat['meanFWHM']   = "%.2f" % stats.mean(fwhm)
    stat['modeFWHM']   = "%.2f" % float(hfwhm.index(max(hfwhm))+0.5)

    try:	
       stat['stdevFWHM']  = "%.2f" % stats.stdev(fwhm)
    except ZeroDivisionError:
       stat['stdevFWHM'] = '0.00';

    stat['medianEL'] = "%.2f" % stats.median(el)
    stat['meanEL']   = "%.2f" % stats.mean(el)

    try:
      stat['stdevEL']  = "%.2f" % stats.stdev(el)
    except ZeroDivisionError:
      stat['stdevEL']  = '0.00' 

    # Histogram of Ellipticity PA (-180 to 180, bins of 45 deg)
    #stat['histoTHETA'] = stats.histogram(pa,8,[-180,180])[0]

    # Histogram of Stellarity (0 to 1, bins of 0.05)
    #stat['histoStella']  = stats.histogram(stella,20,[0,1.01])[0]   

    return stat
Example #3
0
  def statsex(self, objects):

    """
	Do some statistics on a source list
	Return dictionary
    """

    import stats, pstat
    
    # Return if we have no objects
    if len(objects) == 0:
      return 0	 

    # Define dictionary to hold statistics	
    stat = {}

    # Get number of objects
    stat['N'] = str(len(objects))

    # Define list (float) of FWHM values
    fwhm = [ float(obj[7]) for obj in objects ]
 
    # Define list (float) of ELLIPTICITY values
    el = [ float(obj[6]) for obj in objects ]

    # Define list (float) of THETA_IMAGE values
    pa = [ float(obj[5]) for obj in objects ]

    # Define list (float) of 'Stella-like' values
    stella = [ float(obj[9]) for obj in objects ]	

    # Create a histogram of FWHM values of binsize 1 pixel
    hfwhm = stats.histogram(fwhm,40,[0,40])[0]
    
    stat['medianFWHM'] = "%.2f" % stats.median(fwhm)
    stat['meanFWHM']   = "%.2f" % stats.mean(fwhm)
    stat['modeFWHM']   = "%.2f" % float(hfwhm.index(max(hfwhm))+0.5)

    try:	
       stat['stdevFWHM']  = "%.2f" % stats.stdev(fwhm)
    except ZeroDivisionError:
       stat['stdevFWHM'] = '0.00';

    stat['medianEL'] = "%.2f" % stats.median(el)
    stat['meanEL']   = "%.2f" % stats.mean(el)

    try:
      stat['stdevEL']  = "%.2f" % stats.stdev(el)
    except ZeroDivisionError:
      stat['stdevEL']  = '0.00' 

    # Histogram of Ellipticity PA (-180 to 180, bins of 45 deg)
    #stat['histoTHETA'] = stats.histogram(pa,8,[-180,180])[0]

    # Histogram of Stellarity (0 to 1, bins of 0.05)
    #stat['histoStella']  = stats.histogram(stella,20,[0,1.01])[0]   

    return stat
Example #4
0
def test_median1():
    obs = median([0,0,0,0])
    assert_equal(obs, 0)

    obs = median([0,1,2])
    assert_equal(obs, 1)

    obs = median([0,1])
    assert_equal(obs, 0.5)

    assert_raises(TypeError, median, ['a', 'b', 'v'])
Example #5
0
def collect_median(input_list):
    """ Collect time execution of median of each module """

    begin_py_median = clock()
    py_median(input_list)
    end_py_median = clock()

    begin_median = clock()
    median(input_list)
    end_median = clock()

    times = format_times(end_py_median - begin_py_median,
                         end_median - begin_median)
    save_times(times, logs['median'])
def test_matplotlib_barchart(loop_times=LOOP_TIMES):
    render_time = []
    all_render_time_np = []
    for (i, j) in SCREEN_SIZE:
        print("Screen Size: ", (i, j))
        plt.rcParams["figure.figsize"] = (i, j)
        for x in range(loop_times):
            plt.ion()
            objects = ('Python', 'C++', 'Java', 'Perl', 'Scala', 'Lisp')
            y_pos = np.arange(len(objects))
            performance = [10, 8, 6, 4, 2, 1]

            tstart = time.time()
            plt.bar(y_pos, performance, align='center', alpha=0.5)
            plt.xticks(y_pos, objects)
            plt.ylabel('Usage')
            plt.title('Programming language usage')

            plt.show()
            plt.close('all')
            tend = time.time()
            render_time.append(tend - tstart)
            print(6, "Bar Chart Draw Time: ", tend - tstart)
        render_time_np = np.array(render_time)
        all_render_time_np.append(render_time_np)
        print("Mean Render Time: ", render_time_np.mean())
        print("Median Render Time: ", median(render_time_np))
        print()
    print("Xtiles:")
    for np_arr in all_render_time_np:
        print(xtile(np_arr, lo=0, hi=2))
def test_matplotlib_sign(loop_times=LOOP_TIMES):
    render_time = []
    all_render_time_np = []
    for (i, j) in SCREEN_SIZE:
        print("Screen Size: ", (i, j))
        plt.rcParams["figure.figsize"] = (i, j)
        for k in range(loop_times):
            x = np.arange(0, 2 * np.pi, 0.01)
            y = np.sin(x)

            fig, axes = matplotlib.pyplot.subplots(nrows=6)

            styles = ['r-', 'g-', 'y-', 'm-', 'k-', 'c-']
            lines = [ax.plot(x, y, style)[0] for ax, style in zip(axes, styles)]

            tstart = time.time()
            for i in range(1, NUM_OF_SIN_CURVES):
                for j, line in enumerate(lines, start=1):
                    line.set_ydata(np.sin(j * x + i / 10.0))
                fig.canvas.draw()
            fig.show()
            plt.close(fig)
            tend = time.time()
            render_time.append(tend - tstart)
            print(NUM_OF_SIN_CURVES, "Sine Curve Draw Time: ", tend-tstart)
        render_time_np = np.array(render_time)
        all_render_time_np.append(render_time_np)
        print("Mean Render Time: ", render_time_np.mean())
        print("Median Render Time: ", median(render_time_np))
        print()
    print("Xtiles:")
    for np_arr in all_render_time_np:
        print(xtile(np_arr, lo=0, hi=2))
Example #8
0
def sync_check():
#    print 'Checking sync...'
    max_mcnt_difference=4
    mcnts=dict()
    mcnts_list=[]
    mcnt_tot=0

    for f,fpga in enumerate(fpgas):
        mcnts[f]=dict()
        try:
            hdr_index=bram_oob[f]['hdr'].index(1)
        except:
            print 'ERR: No headers found in BRAM. Are the F engines properly connected?'
            exit()

        pkt_64bit = struct.unpack('>Q',bram_dmp['bram_msb'][f]['data'][(4*hdr_index):(4*hdr_index)+4]+bram_dmp['bram_lsb'][f]['data'][(4*hdr_index):(4*hdr_index)+4])[0]
        mcnts[f]['mcnt'] =(pkt_64bit&((2**64)-(2**16)))>>16
        mcnts_list.append(mcnts[f]['mcnt'])
#        print '[%s] MCNT: %i'%(servers[f],mcnts[f]['mcnt'])

    mcnts['mean']=stats.mean(mcnts_list)
    mcnts['median']=stats.median(mcnts_list)
    mcnts['mode']=stats.mode(mcnts_list)
    mcnts['modalmean']=stats.mean(mcnts['mode'][1])

#    print 'mean: %i, median: %i, modal mean: %i mode:'%(mcnts['mean'],mcnts['median'],mcnts['modalmean']),mcnts['mode']
    
    for f,fpga in enumerate(fpgas):
        if mcnts[f]['mcnt']>(mcnts['modalmean']+max_mcnt_difference) or mcnts[f]['mcnt'] < (mcnts['modalmean']-max_mcnt_difference):
            print '%s OUT OF SYNC!!'%servers[f]
            mcnts[f]['sync_status']='FAIL with error of %i'%(mcnts[f]['mcnt']-mcnts['modalmean'])
        else:
            mcnts[f]['sync_status']='PASS'

    return mcnts
def _getRatingAverages():
    count = 0
    android_rating_total = 0
    ios_rating_total = 0
    android_ratings = []
    ios_ratings = []

    global android_rating_average
    global ios_rating_average

    global android_rating_median
    global ios_rating_median

    global android_rating_q1
    global ios_rating_q1

    global android_rating_q3
    global ios_rating_q3

    for app in collection_ios.find().batch_size(30):
        # count=count+1
        # android_rating_total=android_rating_total+float(app['android_ratingsAllVersions'].replace(',',''))
        # ios_rating_total=ios_rating_total+float(app['ios_ratingsAllVersions_new'].replace(',',''))
        android_ratings.append(float(app["android_ratingsAllVersions"].replace(",", "")))
        ios_ratings.append(float(app["ios_ratingsAllVersions_new"].replace(",", "")))

    android_rating_average = stats.mean(android_ratings)
    ios_rating_average = stats.mean(ios_ratings)

    android_rating_median = stats.median(android_ratings)
    ios_rating_median = stats.median(ios_ratings)

    android_rating_q1 = stats.quartiles(android_ratings)[0]
    ios_rating_q1 = stats.quartiles(ios_ratings)[0]

    android_rating_q3 = stats.quartiles(android_ratings)[1]
    ios_rating_q3 = stats.quartiles(ios_ratings)[1]

    print "ios stats"
    print ios_rating_q1
    print ios_rating_median
    print ios_rating_q3
    print "Android stats"
    print android_rating_q1
    print android_rating_median
    print android_rating_q3
def calculations(x):
    from stats import median, mean, mode, MAD, RANGE
    return {
        'median': median(x),
        'mean': mean(x),
        'mode': mode(x),
        'range': RANGE(x),
        'MAD': MAD(x)
    }
Example #11
0
    def forward(self):
        self.xsampled = []
        self.ysampled = []
        if self.sampling_distribution == 'uniform':
            radians = np.random.uniform(0,1,self.Nsamples) * 2 * np.pi
            r = np.random.uniform(0,self.radius,self.Nsamples)

        elif self.sampling_distribution == 'normal':
            radians = np.random.normal(0,1,self.Nsamples) * 2 * np.pi
            r = np.random.normal(0,self.radius,self.Nsamples) 

        for x,y in zip(np.array(r * np.cos(radians)) +  np.array([self.center[0]]*len(radians)),np.array(r * np.sin(radians)) +  np.array([self.center[1]]*len(radians))):

            if self.pos_sample_count < self.pos_samples:
                self.xsampled.append(x)
                self.ysampled.append(y)
                features = FeatureExtract(rgb2gray(asarray(get_window(self.PIL_image, (x,y), self.crop_size, self.resolution))), self.feature_types, self.feature_coords)
                features = pd.DataFrame([features])
                class_prediction = self.trained_ModelObject.predict(features)
                
                if class_prediction == 'pos':
                #print(class_prediction)
                    self.data.append([x,y,features,1])
                    self.pos_sample_count += 1
                else:
                    self.data.append([x,y,features,0])
            else:
                break
        if self.pos_sample_count != 0:
            self.data = np.array(self.data, dtype=object)

            posLandmarks = self.data[self.data[:,3] == 1] 
            self.posLandmarks_Xcoords = posLandmarks[:,0]
            self.posLandmarks_Ycoords = posLandmarks[:,1]
            if self.finalpredstat == 'median':
                self.pred_coord = (stats.median(self.posLandmarks_Xcoords),stats.median(self.posLandmarks_Ycoords))
            if self.finalpredstat == 'mean':
                self.pred_coord = (stats.mean(self.posLandmarks_Xcoords),stats.mean(self.posLandmarks_Ycoords))
            if self.true_landmark != None:
                self.error = float(math.sqrt((abs(self.true_landmark[0]- self.pred_coord[0]))**2 + (abs(self.true_landmark[1]- self.pred_coord[1]))**2 ))
                self.no_pred = 0
        else:
            self.no_pred = 1
def print_tiles(list1, name_of_list1, list2, name_of_list2):
    my_lo = min(list1)
    my_hi = max(list1)
    if min(list2) < my_lo:
        my_lo = min(list2)
    if max(list2) > my_hi:
        my_hi = max(list2)

    def show(lst):
        return stats.xtile(lst,
                           lo=my_lo,
                           hi=my_hi,
                           width=25,
                           show=lambda s: " %3.2f" % s)

    print(name_of_list1, show(list1), "| Median: ",
          round(stats.median(list1), 2))
    print(name_of_list2, show(list2), "| Median: ",
          round(stats.median(list2), 2))
def calculate_stats(population):

    # find the growth in population in consecutive years
    growth = []
    for i in range(0, len(population)-1):
        growth.append(population[i+1] - population[i])
    print('Mean growth: {0:.5f}'.format(mean(growth)))
    print('Median growth: {0:.5f}'.format(median(growth)))
    print('Variance/Sd growth: {0:.5f}, {1:.5f}'.format(*variance_sd(growth)))
    return growth
Example #14
0
def calculate_stats(population):
    #find the growth in population in consecutive years
    growth = []
    for i in range(0, len(population)-1):
        growth.append(population[i+1] - population[i])
    print("Mean growth: {0:.5f}".format(mean(growth)))
    print("Median growth: {0:.5f}".format(median(growth)))
    print("Variance/Sd growth:{0:.5f}, {1:.5f}".format(*variance_sd(growth)))

    return growth
Example #15
0
def calculations(x):
    from stats import median, mean, mode, MAD, RANGE, maximum, minimum
    return {
        'median': median(x),
        'mean': mean(x),
        'mode': mode(x),
        'range': RANGE(x),
        'MAD': MAD(x),
        'max': maximum(x),
        'min': minimum(x)
    }
Example #16
0
def PlotKineticEnergyOverHeight(df, block=False, xlabel='', ylabel='', MaxStep=100000, saveflag=False, savedir='', savename='', writeflag=False):
    print("Compute Kinetic Energy Profile")
    ###### Constants
    # redundant redefinition
    kB = 1.3806503e-23
    e0 = 1.60217662e-19
    au = 1.66053904e-27
    step = MaxStep
    mass = 40
    m = mass * au
    Bound = 5.0
    MaxHeight = df['z'].max()

    stepsize = 0.5
    HeightArr = np.arange(0,MaxHeight-2.,stepsize)
    xKE_In = []
    xKE_Out = []
    yKE_In = []
    yKE_Out = []
    zKE_In = []
    zKE_Out = []
    AvgWindow = 1000000
    lengthArray = len(HeightArr)

    for h in HeightArr:
        VelocityArrIn = df.loc[(df['z'] > h) & (df['z'] <= h+stepsize) & (df['traj'] < 20) &
                                        (df['step'] >= MaxStep-AvgWindow) & (df['vz'] <= 0),
                                        ['vx', 'vy', 'vz']]
        VelocityArrIn['xke'] = 0.5 * m * (VelocityArrIn['vx'] * 100.) ** 2 / kB
        VelocityArrIn['yke'] = 0.5 * m * (VelocityArrIn['vy'] * 100.) ** 2 / kB
        VelocityArrIn['zke'] = 0.5 * m * (VelocityArrIn['vz'] * 100.) ** 2 / kB

        VelocityArrOut = df.loc[(df['z'] > h) & (df['z'] <= h+stepsize) & (df['traj'] < 20) &
                                        (df['step'] >= MaxStep-AvgWindow) & (df['vz'] > 0),
                                        ['vx', 'vy', 'vz']]
        VelocityArrOut['xke'] = 0.5 * m * (VelocityArrOut['vx'] * 100.) ** 2 / kB
        VelocityArrOut['yke'] = 0.5 * m * (VelocityArrOut['vy'] * 100.) ** 2 / kB
        VelocityArrOut['zke'] = 0.5 * m * (VelocityArrOut['vz'] * 100.) ** 2 / kB

        xKE_In.append(VelocityArrIn['xke'].mean())
        xKE_Out.append(VelocityArrOut['xke'].mean())
        yKE_In.append(VelocityArrIn['yke'].mean())
        yKE_Out.append(VelocityArrOut['yke'].mean())
        zKE_In.append(VelocityArrIn['zke'].mean())
        zKE_Out.append(VelocityArrOut['zke'].mean())

    from stats import median
    xKEmean = 0.5 * (median(xKE_In[lengthArray//2:]) + median(xKE_Out[lengthArray//2:]))
    yKEmean = 0.5 * (median(yKE_In[lengthArray//2:]) + median(yKE_Out[lengthArray//2:]))
    zKEmean = 0.5 * (median(zKE_In[lengthArray//2:]) + median(zKE_Out[lengthArray//2:]))
    print("KEmean",(xKEmean + yKEmean + zKEmean) / 3.0)



    if writeflag == True:
        WritePlot(X=HeightArr, Y=[xKE_In, yKE_In, zKE_In], name=savedir+savename+'In', xlabel=xlabel, ylabel=ylabel+' x,y,z', header=True, action='w')
        WritePlot(X=HeightArr, Y=[xKE_Out, yKE_Out, zKE_Out], name=savedir+savename+'Out', xlabel=xlabel, ylabel=ylabel+' x,y,z', header=True, action='w')
    plt.plot(HeightArr, [xKE_In[i] + yKE_In[i] + zKE_In[i] for i in range(len(xKE_In))], label='Kin Energy In')
    plt.plot(HeightArr, [xKE_Out[i] + yKE_Out[i] + zKE_Out[i] for i in range(len(xKE_Out))], label='Kin Energy Out')
    MakePlot(saveflag=saveflag, block=block, xlabel=xlabel, ylabel=ylabel, savepath=savedir+savename)
def calculations(x):
    rang = lambda y, z: y - z
    y = max(x)
    z = min(x)
    from stats import median, mode
    return {
        'median': median(x),
        'mean': avg(x),
        'mode': mode(x),
        'range': rang(y, z),
        'max': y,
        'min': z
    }
Example #18
0
def report(x):
    print 'n =', len(x)
    print 'minimum =', min(x)
    print 'maximum =', max(x)
    print 'mean =', stats.mean(x)
    print 'median =', stats.median(x)
    print 'population variance =', stats.popvar(x)
    print 'sample variance =', stats.samvar(x)
    print 'population standard deviation =', stats.popstd(x)
    print 'sample standard deviation =', stats.samstd(x)
    print 'median deviation =', stats.median_deviation(x)
    print 'mean deviation =', stats.mean_deviation(x)
    print 'population skewness =', stats.popskw(x)
    print 'sample skewness =', stats.samskw(x)
    print 'nonparametric skew =', stats.nonparametric_skew(x)
Example #19
0
def upload(reactor, url, project, revision, revision_date, benchmark, param, statistic, backend, environment, samples):
    d = _upload(
        reactor,
        url=url,
        project=project,
        revision=revision,
        revision_date=revision_date,
        benchmark='%s-%s-%s' % (benchmark, param, statistic),
        executable='%s-backend' % (backend,),
        environment=environment,
        result_value=median(samples),
        result_date=datetime.now(),
        std_dev=mad(samples),  # Not really!
        max_value=max(samples),
        min_value=min(samples))
    d.addErrback(err, "Upload failed")
    return d
Example #20
0
def calc(x, conf):
	
	size = len(x)
	sum = stats.sum(x)
	av = stats.average(sum, size)
	gm = stats.gmean(x)
	v = stats.var(sum, stats.sqsum(x), size)
	med = stats.median(x)

	if v != 'error':
		sd = stats.stdv1(v)
		c = stats.conf(float(conf), sd, size)
	else:
		sd = 'error'
		c = 'none'

	return av, gm, v, sd, c, med
Example #21
0
def upload(reactor, url, project, revision, revision_date, benchmark, param, statistic, backend, environment, samples):
    d = _upload(
        reactor,
        url=url,
        project=project,
        revision=revision,
        revision_date=revision_date,
        benchmark='%s-%s-%s' % (benchmark, param, statistic),
        executable='%s-backend' % (backend,),
        environment=environment,
        result_value=median(samples),
        result_date=datetime.now(),
        std_dev=mad(samples),  # Not really!
        max_value=max(samples),
        min_value=min(samples))
    d.addErrback(err, "Upload failed")
    return d
Example #22
0
def sync_check():
    #    print 'Checking sync...'
    max_mcnt_difference = 4
    mcnts = dict()
    mcnts_list = []
    mcnt_tot = 0

    for f, fpga in enumerate(fpgas):
        mcnts[f] = dict()
        try:
            hdr_index = bram_oob[f]['hdr'].index(1)
        except:
            print 'ERR: No headers found in BRAM. Are the F engines properly connected?'
            exit()

        pkt_64bit = struct.unpack(
            '>Q',
            bram_dmp['bram_msb'][f]['data'][(4 * hdr_index):(4 * hdr_index) +
                                            4] +
            bram_dmp['bram_lsb'][f]['data'][(4 * hdr_index):(4 * hdr_index) +
                                            4])[0]
        mcnts[f]['mcnt'] = (pkt_64bit & ((2**64) - (2**16))) >> 16
        mcnts_list.append(mcnts[f]['mcnt'])


#        print '[%s] MCNT: %i'%(servers[f],mcnts[f]['mcnt'])

    mcnts['mean'] = stats.mean(mcnts_list)
    mcnts['median'] = stats.median(mcnts_list)
    mcnts['mode'] = stats.mode(mcnts_list)
    mcnts['modalmean'] = stats.mean(mcnts['mode'][1])

    #    print 'mean: %i, median: %i, modal mean: %i mode:'%(mcnts['mean'],mcnts['median'],mcnts['modalmean']),mcnts['mode']

    for f, fpga in enumerate(fpgas):
        if mcnts[f]['mcnt'] > (mcnts['modalmean'] +
                               max_mcnt_difference) or mcnts[f]['mcnt'] < (
                                   mcnts['modalmean'] - max_mcnt_difference):
            print '%s OUT OF SYNC!!' % servers[f]
            mcnts[f]['sync_status'] = 'FAIL with error of %i' % (
                mcnts[f]['mcnt'] - mcnts['modalmean'])
        else:
            mcnts[f]['sync_status'] = 'PASS'

    return mcnts
def _getRatingAverages():
    count=0
    android_rating_total=0
    ios_rating_total=0
    android_ratings=[]
    ios_ratings=[]

    global android_rating_average
    global ios_rating_average

    global android_rating_median
    global ios_rating_median

    global android_rating_q1
    global ios_rating_q1

    global android_rating_q3
    global ios_rating_q3

    for app in  collection_ios.find(no_cursor_timeout=True):
        #count=count+1
        #android_rating_total=android_rating_total+float(app['android_ratingsAllVersions'].replace(',',''))
        #ios_rating_total=ios_rating_total+float(app['ios_ratingsAllVersions_new'].replace(',',''))
        android_ratings.append(float(app['android_success']-app['ios_success']))
        #ios_ratings.append(float(app['ios_success']))
        #difference


    android_rating_average=stats.mean(android_ratings)
    #ios_rating_average=stats.mean(ios_ratings)

    android_rating_median=stats.median(android_ratings)
    #ios_rating_median=stats.median(ios_ratings)

    android_rating_q1=stats.quartiles(android_ratings)[0]
    #ios_rating_q1=stats.quartiles(ios_ratings)[0]

    android_rating_q3=stats.quartiles(android_ratings)[1]
    #ios_rating_q3=stats.quartiles(ios_ratings)[1]

    print "Android stats"
    print android_rating_q1
    print android_rating_median
    print android_rating_q3
def _getRatingAverages():
    count = 0
    android_rating_total = 0
    ios_rating_total = 0
    android_ratings = []
    ios_ratings = []

    global android_rating_average
    global ios_rating_average

    global android_rating_median
    global ios_rating_median

    global android_rating_q1
    global ios_rating_q1

    global android_rating_q3
    global ios_rating_q3

    for app in collection_ios.find(no_cursor_timeout=True):
        #count=count+1
        #android_rating_total=android_rating_total+float(app['android_ratingsAllVersions'].replace(',',''))
        #ios_rating_total=ios_rating_total+float(app['ios_ratingsAllVersions_new'].replace(',',''))
        android_ratings.append(
            float(app['android_success'] - app['ios_success']))
        #ios_ratings.append(float(app['ios_success']))
        #difference

    android_rating_average = stats.mean(android_ratings)
    #ios_rating_average=stats.mean(ios_ratings)

    android_rating_median = stats.median(android_ratings)
    #ios_rating_median=stats.median(ios_ratings)

    android_rating_q1 = stats.quartiles(android_ratings)[0]
    #ios_rating_q1=stats.quartiles(ios_ratings)[0]

    android_rating_q3 = stats.quartiles(android_ratings)[1]
    #ios_rating_q3=stats.quartiles(ios_ratings)[1]

    print "Android stats"
    print android_rating_q1
    print android_rating_median
    print android_rating_q3
Example #25
0
def length_filter(ref_gtf,
                  filter_gtf,
                  spread_lower,
                  spread_upper,
                  verbose=False):
    # hash lengths
    transcript_lengths = {}
    for line in open(ref_gtf):
        a = line.split('\t')
        tid = gtf_kv(a[8])['transcript_id']
        transcript_lengths[tid] = transcript_lengths.get(tid, 0) + int(
            a[4]) - int(a[3]) + 1

    # determine length boundaries
    length_median = float(stats.median(transcript_lengths.values()))
    if spread_lower:
        length_spread_min = length_median / spread_lower
    else:
        length_spread_min = 0
    if spread_upper:
        length_spread_max = length_median * spread_upper
    else:
        length_spread_max = max(transcript_lengths.values())

    # remove too short and too long
    transcripts_kept = set()
    filter_out = open(filter_gtf, 'w')
    for line in open(ref_gtf):
        a = line.split('\t')
        tid = gtf_kv(a[8])['transcript_id']
        tlen = transcript_lengths.get(tid, 0)
        if length_spread_min <= tlen <= length_spread_max:
            print >> filter_out, line,
            transcripts_kept.add(tid)
    filter_out.close()

    if verbose:
        print >> sys.stderr, 'Transcript length median:  %6d' % length_median
        print >> sys.stderr, 'Transcript length min:     %6d' % length_spread_min
        print >> sys.stderr, 'Transcript length max:     %6d' % length_spread_max
        print >> sys.stderr, '%6d of %6d (%.3f) transcripts used.' % (
            len(transcripts_kept), len(transcript_lengths),
            len(transcripts_kept) / float(len(transcript_lengths)))
def test_plotly_barchat(loop_times=LOOP_TIMES):
    SCREEN_PIXEL = convert_inch_to_pixel(SCREEN_SIZE)
    render_time = []
    all_render_time_np = []
    for (i, j) in SCREEN_PIXEL:
        print("Screen Size: ", (i, j))
        for x in range(loop_times):
            objects = ('Python', 'C++', 'Java', 'Perl', 'Scala', 'Lisp')
            y_pos = np.arange(len(objects))
            performance = [10, 8, 6, 4, 2, 1]

            layout = go.Layout(
                autosize=False,
                width=i,
                height=j,
                margin=go.layout.Margin(
                    l=50,
                    r=50,
                    b=100,
                    t=100,
                    pad=4
                ),
                paper_bgcolor='#7f7f7f',
                plot_bgcolor='#c7c7c7'
            )

            tstart = time.time()
            data = [go.Bar(x=objects, y=performance)]
            fig = go.Figure(data=data, layout=layout)

            plotly.offline.plot(fig, filename='basic-bar')

            tend = time.time()
            render_time.append(tend - tstart)
            print(6, "Bar Chart Draw Time: ", tend - tstart)
        render_time_np = np.array(render_time)
        all_render_time_np.append(render_time_np)
        print("Mean Render Time: ", render_time_np.mean())
        print("Median Render Time: ", median(render_time_np))
        print()
    print("Xtiles:")
    for np_arr in all_render_time_np:
        print(xtile(np_arr, lo=0, hi=2))
Example #27
0
 def calculate_stats_num(self, name, per = [5,25,50,75,95]):
     # get columnt instance
     Col = self.get_column(name)
     # type validation
     assert Col.type == 'numerical', 'only possible numerical columns.'
     # get data
     data = self.get_data(name)
     # initialize
     dstats = dict()
     # calculate statistics
     dstats['mean'] = stats.mean(data)
     dstats['median'] = stats.median(data)
     dstats['std'] = stats.std(data)
     dstats['min'] = stats.min(data)
     dstats['max'] = stats.max(data)
     dstats['skew'] = stats.skew(data)
     dstats['kurtosis'] = stats.kurtosis(data)
     for ip in per:
         dstats['per%s'%ip] = stats.percentile(data, ip)
     # return
     Col.stats = dstats
Example #28
0
def getQuartilesData(numbersList):
    mean = 0
    median = 0
    quartiles = 0.0, 0.0, 0.0
    inQuarts = countValuesInQuartiles(numbersList)
    q1, q2, q3 = inQuarts
    q1percent = calculatePercentage(q1, len(numbersList))
    q2percent = calculatePercentage(q2, len(numbersList))
    q3percent = calculatePercentage(q3, len(numbersList))

    try:
        quartiles = stats.quartiles(numbersList)
    except Exception as e:
        pass

    try:
        median = stats.median(numbersList)
    except Exception as e:
        pass

    try:
        mean = stats.mean(numbersList)
    except Exception as e:
        pass

    return {
        "mean": mean,
        "median": median,
        "Q1": round(q1, 2),
        "Q2": round(q2, 2),
        "Q3": round(q3, 2),
        "Q1Perc": round(q1percent, 2),
        "Q2Perc": round(q2percent, 2),
        "Q3Perc": round(q3percent, 2),
        "quartileCount": inQuarts,
        "quartiles": quartiles
    }
Example #29
0
def length_filter(ref_gtf, filter_gtf, spread_lower, spread_upper, verbose=False):
     # hash lengths
    transcript_lengths = {}
    for line in open(ref_gtf):
        a = line.split('\t')
        tid = gtf_kv(a[8])['transcript_id']
        transcript_lengths[tid] = transcript_lengths.get(tid,0) + int(a[4]) - int(a[3]) + 1

    # determine length boundaries
    length_median = float(stats.median(transcript_lengths.values()))
    if spread_lower:
        length_spread_min = length_median / spread_lower
    else:
        length_spread_min = 0
    if spread_upper:
        length_spread_max = length_median * spread_upper
    else:
        length_spread_max = max(transcript_lengths.values())

    # remove too short and too long
    transcripts_kept = set()
    filter_out = open(filter_gtf, 'w')
    for line in open(ref_gtf):
        a = line.split('\t')
        tid = gtf_kv(a[8])['transcript_id']
        tlen = transcript_lengths.get(tid,0)
        if length_spread_min <= tlen <= length_spread_max:
            print >> filter_out, line,
            transcripts_kept.add(tid)
    filter_out.close()

    if verbose:
        print >> sys.stderr, 'Transcript length median:  %6d' % length_median
        print >> sys.stderr, 'Transcript length min:     %6d' % length_spread_min
        print >> sys.stderr, 'Transcript length max:     %6d' % length_spread_max
        print >> sys.stderr, '%6d of %6d (%.3f) transcripts used.' % (len(transcripts_kept), len(transcript_lengths), len(transcripts_kept)/float(len(transcript_lengths)))
Example #30
0
'''
statistics_calculator.py

Read numbers from a file, calculate and print statistical measures:
mean, median, mode, variance, standard deviation
'''

from stats import mean, median, mode, variance_sd


def read_data(filename):
    numbers = []
    with open(filename) as f:
        for line in f:
            numbers.append(float(line))

    return numbers


if __name__ == '__main__':
    data = read_data('mydata.txt')
    m = mean(data)
    median = median(data)
    mode = mode(data)
    variance, sd = variance_sd(data)
    print('Mean: {0:.5f}'.format(m))
    print('Median: {0:.5f}'.format(median))
    print('Mode: {0:.5f}'.format(mode))
    print('Variance: {0:.5f}'.format(variance))
    print('Standard deviation: {0:.5f}'.format(sd))
Example #31
0
 def check_median(self):
     assert_equal(stats.median(self.a1), 4)
     assert_equal(stats.median(self.a2), 2.5)
     assert_equal(stats.median(self.a3), 3.5)
"""
statistics_calculator.py

Read numbers from a file, calculate and print statistical measures:
mean, median, mode, variance, standard deviation
"""

from stats import mean, median, mode, variance_sd


def read_data(filename):
    numbers = []
    with open(filename) as f:
        for line in f:
            numbers.append(float(line))

    return numbers


if __name__ == "__main__":
    data = read_data("mydata.txt")
    m = mean(data)
    median = median(data)
    mode = mode(data)
    variance, sd = variance_sd(data)
    print("Mean: {0:.5f}".format(m))
    print("Median: {0:.5f}".format(median))
    print("Mode: {0:.5f}".format(mode))
    print("Variance: {0:.5f}".format(variance))
    print("Standard deviation: {0:.5f}".format(sd))
Example #33
0
except ImportError:
    pass

l = range(1,21)
lf = range(1,21)
lf[2] = 3.0
a = N.array(l)
af = N.array(lf)
ll = [l]*5
aa = N.array(ll)

print('\nCENTRAL TENDENCY')
print('geometricmean:',stats.geometricmean(l), stats.geometricmean(lf), stats.geometricmean(a), stats.geometricmean(af))
print('harmonicmean:',stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af))
print('mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af))
print('median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af))
print('medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af))
print('mode:',stats.mode(l),stats.mode(a))
print('\nMOMENTS')
print('moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af))
print('variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af))
print('skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af))
print('kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af))
print('mean:',stats.mean(a),stats.mean(af))
print('var:',stats.var(a),stats.var(af))
print('stdev:',stats.stdev(a),stats.stdev(af))
print('sem:',stats.sem(a),stats.sem(af))
print('describe:')
print(stats.describe(l))
print(stats.describe(lf))
print(stats.describe(a))
Example #34
0
    def stats():
        '''returns stats for given macs between timestamp'''
        after = request.args.get('after')
        if after is not None:
            try:
                after = time.mktime(time.strptime(after, '%Y-%m-%dT%H:%M:%S'))
            except ValueError as v:
                raise InvalidUsage('Invalid after parameter')
        before = request.args.get('before')
        if before is not None:
            try:
                before = time.mktime(time.strptime(before,
                                                   '%Y-%m-%dT%H:%M:%S'))
            except ValueError as v:
                raise InvalidUsage('Invalid before parameter')
        macs = request.args.getlist('macs')
        rssi, zero, day = None, False, False

        cur = get_db().cursor()
        # to store temp table and indices in memory
        sql = 'pragma temp_store = 2;'
        cur.execute(sql)

        sql, sql_args = build_sql_query(after, before, macs, rssi, zero, day)
        try:
            cur.execute(sql, sql_args)
        except sqlite3.OperationalError as e:
            return jsonify({
                'status': 'error',
                'message': 'sqlite3 db is not accessible'
            }), 500

        # gather stats about each mac, same code as in stats.py
        # TODO: just import that
        macs = {}
        for row in cur.fetchall():
            mac = row[1]
            if is_local_bit_set(mac):
                # create virtual mac for LAA mac address
                mac = 'LAA'
            if mac not in macs:
                macs[mac] = {
                    'vendor': row[2],
                    'ssid': [],
                    'rssi': [],
                    'last': row[0],
                    'first': row[0]
                }
            d = macs[mac]
            if row[3] != '' and row[3] not in d['ssid']:
                d['ssid'].append(row[3])
            if row[0] > d['last']:
                d['last'] = row[0]
            if row[0] < d['first']:
                d['first'] = row[0]
            if row[4] != 0:
                d['rssi'].append(row[4])

        # sort on frequency of appearence of a mac
        tmp = [(k, len(v['rssi'])) for k, v in macs.items()]
        tmp = [m for m, _ in reversed(sorted(tmp, key=lambda k: k[1]))]

        data = []
        # dump our stats
        for m in tmp:
            v = macs[m]
            first = time.strftime('%Y-%m-%dT%H:%M:%S',
                                  time.localtime(v['first']))
            last = time.strftime('%Y-%m-%dT%H:%M:%S',
                                 time.localtime(v['last']))
            t = {
                'mac': m,
                'vendor': v['vendor'],
                'ssids': sorted(v['ssid']),
                'first': first,
                'last': last
            }
            rssi = v['rssi']
            if rssi != []:
                t.update({
                    'rssi': {
                        'count': len(rssi),
                        'min': min(rssi),
                        'max': max(rssi),
                        'avg': sum(rssi) / len(rssi),
                        'median': int(median(rssi))
                    }
                })
            data.append(t)

        return jsonify(data)
Example #35
0
def main():
    usage = 'usage: %prog [options] <gff file>'
    parser = OptionParser(usage)
    parser.add_option('-c', dest='cons_dir', default='%s/research/common/data/phylop' % os.environ['HOME'], help='Conservation directory [Default: %default]')
    parser.add_option('-l', dest='lncrna', action='store_true', default=False, help='Use the lncRNA specific file to speed things up [Default: %default]')
    (options,args) = parser.parse_args()

    if len(args) != 1:
        parser.error('Must provide gff file to intersect')
    else:
        gff_file = args[0]

    t2g = gff.t2g(gff_file)

    # build interval trees
    lnc_lengths = {}
    chr_features = {}
    interval2lnc = {}
    lnc_cons = {}
    for line in open(gff_file):
        a = line.split('\t')

        chrom = a[0]
        start = int(a[3])
        end = int(a[4])
        tid = gff.gtf_kv(a[8])['transcript_id']
        align = (chrom,start,end)

        lnc_cons[tid] = []
        lnc_lengths[tid] = lnc_lengths.get(tid,0) + (end-start+1)
        if interval2lnc.has_key(align):
            interval2lnc[align].add(tid)
        else:
            interval2lnc[align] = set([tid])
            chr_features.setdefault(chrom, IntervalTree()).insert_interval(Interval(start,end))

    # process overlapping chromosome blocks
    if options.lncrna:
        lnc_wig = glob.glob('%s/lnc_catalog.*wigFix*' % options.cons_dir)[0]
        process_file(chr_features, interval2lnc, lnc_cons, lnc_wig)

    else:
        for cons_file in glob.glob('%s/chr*' % options.cons_dir):
            process_file(chr_features, interval2lnc, lnc_cons, cons_file)

    # print table
    for tid in lnc_lengths:
        cons_len = len(lnc_cons[tid])
        cons_cov = float(cons_len) / lnc_lengths[tid]
        if cons_len == 0:
            cons_mean = 0.0
            cons_median = 0.0
            cons_pos = 0.0
            cons_neg = 0.0
        else:
            cons_mean = stats.mean(lnc_cons[tid])
            cons_median = stats.median(lnc_cons[tid])
            cons_pos = len([c for c in lnc_cons[tid] if c > 1]) / float(cons_len)
            cons_neg = len([c for c in lnc_cons[tid] if c < 1]) / float(cons_len)

        cols = (tid, t2g[tid], lnc_lengths[tid], cons_cov, cons_mean, cons_median, cons_neg, cons_pos)
        print '%-15s %-15s %7d %9.4f %9.4f %9.4f %9.4f %9.4f' % cols
Example #36
0
        d = map(float, sys.stdin.xreadlines())
    else:
        d = map(long, sys.stdin.xreadlines())
except ValueError, err:
    sys.stderr.write("Bad datum: %s\n" % str(err))
    sys.exit(1)

if len(d) == 0:
    sys.stderr.write("No data given\n")
    sys.exit(1)
        
d.sort()

print "           N =", len(d)
print "         SUM =", stats.sum(d)
print "         MIN =", min(d)
print "1ST-QUARTILE =", stats.firstquartile(d)
print "      MEDIAN =", stats.median(d)
print "3RD-QUARTILE =", stats.thirdquartile(d)
print "         MAX =", max(d)
print "        MEAN =", stats.mean(d)

if d[0] < 0:
    print "         N50 = NA"
else:
    print "         N50 =", stats.n50(d)

if options.showMode:
    modeinfo = stats.mode(d)
    print "     MODE(S) =", ','.join(map(str, modeinfo[0])), "(%d)" % modeinfo[1]
Example #37
0
 def testSingleMedian(self):
         # passing only one data point to median() must return the same value
     self.assertEqual(stats.median([7]), 7)
Example #38
0
		offset.append(teloff) # pixels away from the center. (07.10.04)
	  elif distance < 800:
		offset.append(teloff)
		offset.append(teloff)
	  else:
		offset.append(teloff)
        else:
          offset.append(teloff) 

	#writeLog(logpath,file,"FocusPyr: teloffset= %d" % offset)  
	#print "FocusPyr: teloffset= %d distance=%f (%f,%f)" % (teloff,distance,x1,y1) 	

      if len(offset) > 0:
        # Determine mean, median and stdev of unclipped offsets
        mean  = stats.mean(offset) 
        median = stats.median(offset) 
        try:	
          stdev = stats.stdev(offset)
        except ZeroDivisionError:
          stdev = '0.00';

        # Do a 1-sigma clipping
        clipLowLimit  =  float(mean) - 1 * float(stdev)
        clipHighLimit =  float(mean) + 1 * float(stdev)
        offset = [off for off in offset
		if float(off) < clipHighLimit and float(off) > clipLowLimit ]	

        # Determine stats on sigma clipped data 
        mean_c  = stats.mean(offset) 
        median_c = stats.median(offset) 
        try:	
Example #39
0
 def findBase(self, mapNum, sugar, phos5, phos3, baseType, direction = 3):
     """Rotate the sugar center by 360 degrees in ROTATE_SUGAR_INTERVAL increments
     
     ARGUMENTS:
         mapNum   - the molecule number of the Coot map to use
         sugar    - the coordinates of the C1' atom
         phos5    - the coordinates of the 5' phosphate
         phos3    - the coordinates of the 3' phosphate
         baseType - the base type (A, C, G, or U)
     OPTIONAL ARGUMENTS:
         direction - which direction are we tracing the chain
                     if it is 5 (i.e. 3'->5'), then phos5 and phos3 will be flipped
                     all other values will be ignored
                     defaults to 3 (i.e. 5'->3')
     RETURNS:
         baseObj  - a list of [baseType, baseCoordinates]
     """
     
     if direction == 5:
         (phos5, phos3) = (phos3, phos5)
     
     #calculate the bisector of the phos-sugar-phos angle
     #first, calculate a normal to the phos-sugar-phos plane
     sugarPhos5Vec = minus(phos5, sugar)
     sugarPhos3Vec = minus(phos3, sugar)
     normal = crossProd(sugarPhos5Vec, sugarPhos3Vec)
     normal = scalarProd(normal, 1.0/magnitude(normal))
     
     phosSugarPhosAngle = angle(phos5, sugar, phos3)
     
     bisector = rotate(sugarPhos5Vec, normal, phosSugarPhosAngle/2.0)
     
     
     #flip the bisector around (so it points away from the phosphates) and scale its length to 5 A
     startingBasePos = scalarProd(bisector, -1/magnitude(bisector))
     
     #rotate the base baton by 10 degree increments about half of a sphere
     rotations = [startingBasePos] #a list of coordinates for all of the rotations
     for curTheta in range(-90, -1, 10) + range(10, 91, 10):
         curRotation = rotate(startingBasePos, normal, curTheta)
         rotations.append(curRotation) #here's where the phi=0 rotation is accounted for
         
         for curPhi in range(-90, -1, 10) + range(10, 91, 10):
             rotations.append(rotate(curRotation, startingBasePos, curPhi))
             
     #test electron density along all base batons
     for curBaton in rotations:
         curDensityTotal = 0
         densityList = []
         for i in range(1, 9):
             (x, y, z) = plus(sugar, scalarProd(i/2.0, curBaton))
             curPointDensity = density_at_point(mapNum, x, y, z)
             curDensityTotal += curPointDensity
             densityList.append(curPointDensity)
         curBaton.append(curDensityTotal)        #the sum of the density (equivalent to the mean for ordering purposes)
         curBaton.append(median(densityList))    #the median of the density
         curBaton.append(min(densityList))       #the minimum of the density
     
     #find the baton with the max density (as measured using the median)
     #Note that we ignore the sum and minimum of the density.  Those calculations could be commented out,
     #   but they may be useful at some point in the future.  When we look at higher resolutions maybe?
     #   Besides, they're fast calculations.)
     baseDir = max(rotations, key = lambda x: x[4])
     
     #rotate the stock base+sugar structure to align with the base baton
     rotationAngle = angle(self.__baseStrucs["C"]["C4"], [0,0,0], baseDir)
     axis = crossProd(self.__baseStrucs["C"]["C4"], baseDir[0:3])
     
     orientedBase = rotateAtoms(self.__baseStrucs["C"], axis, rotationAngle)
     
     #rotate the base about chi to find the best fit to density
     bestFitBase = None
     maxDensity = -999999
     for curAngle in range(0,360,5):
         rotatedBase = rotateAtoms(orientedBase, orientedBase["C4"], curAngle, sugar)
         curDensity = 0
         for curAtom in ["N1", "C2", "N3", "C4", "C5", "C6"]:
             curDensity += density_at_point(mapNum, rotatedBase[curAtom][0], rotatedBase[curAtom][1], rotatedBase[curAtom][2])
         
         #this is "pseudoChi" because it uses the 5' phosphate in place of the O4' atom
         pseudoChi = torsion(phos5, sugar, rotatedBase["N1"], rotatedBase["N3"])
         curDensity *= self.__pseudoChiInterp.interp(pseudoChi)
         
         if curDensity > maxDensity:
             maxDensity = curDensity
             bestFitBase = rotatedBase
     
     baseObj = ["C", bestFitBase]
     
     #mutate the base to the appropriate type
     if baseType != "C":
         baseObj = self.mutateBase(baseObj, baseType)
     
     return baseObj
Example #40
0
timing_data, unreviewed_patchnums, owner_no_follow_ups, need_review_followup = review_timings.load_data(REVIEWS_FILENAME)
client_timing_data, client_unreviewed_patchnums, client_owner_no_follow_ups, client_need_review_followup = review_timings.load_data(CLIENT_REVIEWS_FILENAME)

# timing_data.update(client_timing_data)

# trim off the top and bottom few percent
outliers = int(len(timing_data) * .1) // 2
owner_data = sorted([x[0] for x in timing_data.itervalues()])[outliers:-outliers]
reviewer_data = sorted([x[1] for x in timing_data.itervalues()])[outliers:-outliers]

template_vars['open_patches'] = '%d' % len(timing_data.keys())
template_vars['unreviewed_patches'] = '%d' % (len(unreviewed_patchnums) )#+ len(client_unreviewed_patchnums))
template_vars['need_followup_count'] = '%d' % len(need_review_followup)
template_vars['no_follow_ups'] = '%d' % (len(owner_no_follow_ups) )#+ len(client_owner_no_follow_ups))
owner_time = timedelta(seconds=stats.median(owner_data))
reviewer_time = timedelta(seconds=stats.median(reviewer_data))
template_vars['owner_response'] = str(owner_time)
template_vars['reviewer_response'] = str(reviewer_time)

with open(PERCENT_ACTIVE_FILENAME, 'rb') as f:
    total_contributors = len(f.readlines())
template_vars['total_contributors'] = total_contributors

with open(AVERAGES_FILENAME, 'rb') as f:
    actives_windows, actives_avg = json.load(f)
for aw, rolling_avg_windows in actives_windows[-1:]:
    aw = str(aw)
    for r_a_w in rolling_avg_windows[:1]:
        r_a_w = str(r_a_w)
        active_contributors = int(
Example #41
0
    # timing_data.update(client_timing_data)
    # unreviewed.extend(client_unreviewed)

    outliers = int(len(timing_data) * .1) // 2

    owner_data = sorted([x[0] for x in timing_data.itervalues()])[outliers:-outliers]
    reviewer_data = sorted([x[1] for x in timing_data.itervalues()])[outliers:-outliers]

    histogram(owner_data, 'owner')
    histogram(reviewer_data, 'reviewer')

    print 'Stats for %d patches' % len(timing_data.keys())
    print 'Patch owner review stats:'
    print ' mean: %s' % str(datetime.timedelta(seconds=stats.mean(owner_data)))
    print ' median: %s' % str(datetime.timedelta(seconds=stats.median(owner_data)))
    print ' std_deviation: %s' % str(datetime.timedelta(seconds=stats.std_deviation(owner_data)))
    print ' max_difference: %s' % str(datetime.timedelta(seconds=stats.min_max_difference(owner_data)))
    print ' %d patches with no follow-up' % len(owner_no_follow_ups)
    print
    print 'Patch reviewer stats:'
    print ' mean: %s' % str(datetime.timedelta(seconds=stats.mean(reviewer_data)))
    print ' median: %s' % str(datetime.timedelta(seconds=stats.median(reviewer_data)))
    print ' std_deviation: %s' % str(datetime.timedelta(seconds=stats.std_deviation(reviewer_data)))
    print ' max_difference: %s' % str(datetime.timedelta(seconds=stats.min_max_difference(reviewer_data)))
    print ' %d unreviewed patches' % len(unreviewed)
    print ' %d patches need reviewer follow-up' % len(need_review_followup)
    if '--show-unreviewed' in sys.argv:
        for patch_number in unreviewed:
            print 'https://review.openstack.org/#/c/%d/' % patch_number[0]
    if '--show-need-review' in sys.argv:
Example #42
0
I = la.make_matrix(5, 5, la.is_diagonal)
print("identity matrix = ", I)

print("\n\n")
print("*** Test Module <stats> ***")

A = [1, 3, 5, 7, 9, 2, 3, 4, 4, 4, 6, 8, 10, 13, 15, 17]

print("vector A = ", A)
print("sorted A = ", sorted(A))

mean = st.mean(A)
print("A's mean = ", mean)

median = st.median(A)
print("A's median = ", median)

quantile = st.quantile(A, 0.2)
print("A's 20% quantile = ", quantile)

quantile = st.quantile(A, 0.9)
print("A's 90% quantile = ", quantile)

mode = st.mode(A)
print("A's mode = ", mode)

data_range = st.data_range(A)
print("A's range = ", data_range)

variance = st.variance(A)
Example #43
0
def test_median0():
    obs = median([3.,7,3,4.,0.])
    exp = 3
    assert_equal(obs,exp)
Example #44
0
def test_median():
    obs = median([5.])
    exp = 5.
    assert_equal(obs,exp)
		offset.append(teloff)
	else:
		offset.append(teloff)

	writeLog(logpath,file,"FocusPyr: teloffset= %d" % teloff)  
	#print "FocusPyr: teloffset= %d distance=(%f,%f) (%f,%f) %s" % (teloff,xdist,ydist,x1,y1,o[11]) 	

	# Append to a list to be inserted into Objects table
	pyrobjects.append((teloff,xdist,ydist,x1,y1,o[11]))        


  if len(offset) > 0:

    # Determine mean, median and stdev of unclipped offsets
    mean  = stats.mean(offset) 
    median = stats.median(offset) 
    try:	
      stdev = stats.stdev(offset)
    except ZeroDivisionError:
      stdev = '0.00';

    # Do a 1-sigma clipping
    clipLowLimit  =  float(mean) - 1 * float(stdev)
    clipHighLimit =  float(mean) + 1 * float(stdev)
    offset = [off for off in offset
	if float(off) <= clipHighLimit and float(off) >= clipLowLimit ]	

    # Determine stats on sigma clipped data 
    h['meanFocusOffset']   = stats.mean(offset) 
    h['medianFocusOffset'] = stats.median(offset) 
    try:	
Example #46
0
 def testSingleMedian(self):
     # passing only one data point to median() must return the same value
     self.assertEqual(stats.median([7]), 7)
def test_plotly_sign_curve(loop_times=LOOP_TIMES):
    #TODO
    x = np.arange(0, 2 * np.pi, 0.01)
    y = np.sin(x)
    trace1 = {
        "x": x,
        "y": y,
        "line": {
            "color": "rgb(0,113.985,188.955)",
            "dash": "solid",
            "width": 0.5
        },
        "marker": {
            "color": "rgb(0,113.985,188.955)",
            "line": {"width": 0.5},
            "size": 6
        },
        "mode": "lines",
        "name": "",
        "showlegend": True,
        "type": "scatter",
        "visible": True,
        "xaxis": "x1",
        "yaxis": "y1"
    }
    data = Data([trace1])
    SCREEN_PIXEL = convert_inch_to_pixel(SCREEN_SIZE)
    render_time = []
    all_render_time_np = []

    for (i, j) in SCREEN_PIXEL:
        print("Screen Size: ", (i, j))
        for k in range(loop_times):
            tstart = time.time()
            layout = {
                "annotations": [
                    {
                        "x": 0.5175,
                        "y": 0.935,
                        "align": "center",
                        "bordercolor": "rgba(0,0,0,0)",
                        "borderpad": 3,
                        "borderwidth": 0.5,
                        "font": {
                            "color": "rgb(0,0,0)",
                            "family": "Arial, sans-serif",
                            "size": 11
                        },
                        "showarrow": False,
                        "text": "<b>Sine curve</b>",
                        "textangle": 0,
                        "xanchor": "center",
                        "xref": "paper",
                        "yanchor": "bottom",
                        "yref": "paper"
                    },
                    {
                        "x": 0.5175,
                        "y": 0.461162790698,
                        "align": "center",
                        "bordercolor": "rgba(0,0,0,0)",
                        "borderpad": 3,
                        "borderwidth": 0.5,
                        "font": {
                            "color": "rgb(0,0,0)",
                            "family": "Arial, sans-serif",
                            "size": 11
                        },
                        "showarrow": False,
                        "text": "<b>Cosine curve</b>",
                        "textangle": 0,
                        "xanchor": "center",
                        "xref": "paper",
                        "yanchor": "bottom",
                        "yref": "paper"
                    }
                ],
                "autosize": False,
                "height": i,
                "hovermode": "closest",
                "margin": {
                    "r": 0,
                    "t": 0,
                    "b": 0,
                    "l": 0,
                    "pad": 0
                },
                "paper_bgcolor": "rgb(255,255,255)",
                "plot_bgcolor": "rgba(0,0,0,0)",
                "showlegend": False,
                "title": "<b>Sine curve</b>",
                "titlefont": {"color": "rgba(0,0,0,0)"},
                "width": j,
                "xaxis1": {
                    "anchor": "y1",
                    "autorange": False,
                    "domain": [0.13, 0.905],
                    "exponentformat": "none",
                    "gridcolor": "rgb(38.25,38.25,38.25)",
                    "gridwidth": 1,
                    "linecolor": "rgb(38.25,38.25,38.25)",
                    "linewidth": 1,
                    "mirror": "ticks",
                    "nticks": 9,
                    "range": [0, 7],
                    "showgrid": False,
                    "showline": True,
                    "side": "bottom",
                    "tickcolor": "rgb(38.25,38.25,38.25)",
                    "tickfont": {
                        "color": "rgb(38.25,38.25,38.25)",
                        "family": "Arial, sans-serif",
                        "size": 10
                    },
                    "ticklen": 6.51,
                    "ticks": "inside",
                    "tickwidth": 1,
                    "titlefont": {
                        "color": "rgb(38.25,38.25,38.25)",
                        "family": "Arial, sans-serif",
                        "size": 11
                    },
                    "type": "linear",
                    "zeroline": False
                },
                "xaxis2": {
                    "anchor": "y2",
                    "autorange": False,
                    "domain": [0.13, 0.905],
                    "exponentformat": "none",
                    "gridcolor": "rgb(38.25,38.25,38.25)",
                    "gridwidth": 1,
                    "linecolor": "rgb(38.25,38.25,38.25)",
                    "linewidth": 1,
                    "mirror": "ticks",
                    "nticks": 9,
                    "range": [0, 7],
                    "showgrid": False,
                    "showline": True,
                    "side": "bottom",
                    "tickcolor": "rgb(38.25,38.25,38.25)",
                    "tickfont": {
                        "color": "rgb(38.25,38.25,38.25)",
                        "family": "Arial, sans-serif",
                        "size": 10
                    },
                    "ticklen": 6.51,
                    "ticks": "inside",
                    "tickwidth": 1,
                    "titlefont": {
                        "color": "rgb(38.25,38.25,38.25)",
                        "family": "Arial, sans-serif",
                        "size": 11
                    },
                    "type": "linear",
                    "zeroline": False
                },
                "yaxis1": {
                    "anchor": "x1",
                    "autorange": False,
                    "domain": [0.583837209302, 0.925],
                    "exponentformat": "none",
                    "gridcolor": "rgb(38.25,38.25,38.25)",
                    "gridwidth": 1,
                    "linecolor": "rgb(38.25,38.25,38.25)",
                    "linewidth": 1,
                    "mirror": "ticks",
                    "nticks": 6,
                    "range": [-1, 1],
                    "showgrid": False,
                    "showline": True,
                    "showticklabels": True,
                    "side": "left",
                    "tickcolor": "rgb(38.25,38.25,38.25)",
                    "tickfont": {
                        "color": "rgb(38.25,38.25,38.25)",
                        "family": "Arial, sans-serif",
                        "size": 10
                    },
                    "ticklen": 6.51,
                    "ticks": "inside",
                    "tickwidth": 1,
                    "titlefont": {
                        "color": "rgb(38.25,38.25,38.25)",
                        "family": "Arial, sans-serif",
                        "size": 11
                    },
                    "type": "linear",
                    "zeroline": False
                },
                "yaxis2": {
                    "anchor": "x2",
                    "autorange": False,
                    "domain": [0.11, 0.451162790698],
                    "exponentformat": "none",
                    "gridcolor": "rgb(38.25,38.25,38.25)",
                    "gridwidth": 1,
                    "linecolor": "rgb(38.25,38.25,38.25)",
                    "linewidth": 1,
                    "mirror": "ticks",
                    "nticks": 6,
                    "range": [-1, 1],
                    "showgrid": False,
                    "showline": True,
                    "showticklabels": True,
                    "side": "left",
                    "tickcolor": "rgb(38.25,38.25,38.25)",
                    "tickfont": {
                        "color": "rgb(38.25,38.25,38.25)",
                        "family": "Arial, sans-serif",
                        "size": 10
                    },
                    "ticklen": 6.51,
                    "ticks": "inside",
                    "tickwidth": 1,
                    "titlefont": {
                        "color": "rgb(38.25,38.25,38.25)",
                        "family": "Arial, sans-serif",
                        "size": 11
                    },
                    "type": "linear",
                    "zeroline": False
                }
            }

            fig = go.Figure(data=data, layout=layout)
            plot_url = plotly.offline.plot(fig)
            tend = time.time()
            render_time.append(tend - tstart)
            print(NUM_OF_SIN_CURVES, "Sine Curve Draw Time: ", tend - tstart)
        render_time_np = np.array(render_time)
        all_render_time_np.append(render_time_np)
        print("Mean Render Time: ", render_time_np.mean())
        print("Median Render Time: ", median(render_time_np))
        print()
    print("Xtiles:")
    for np_arr in all_render_time_np:
        print(xtile(np_arr, lo=0, hi=2))
Example #48
0
import stats

my_list = [4,1,5,7,6,8,9,10,8,3,3,8,12]


mean  = stats.mean(my_list)
print('The mean is: ' +  str(mean))

median = stats.median(my_list)
print('The median is: ' +  str(median))

range = stats.range(my_list)
print('The range is: ' +  str(range))

sum = stats.sum(my_list)
print('The sum of all numbers is: ' + str(su
Example #49
0
    def days():
        cur = get_db().cursor()
        # to store temp table and indices in memory
        sql = 'pragma temp_store = 2;'
        cur.execute(sql)
        macs = request.args.getlist('macs')

        if macs is None:
            # return list of days with probes in db
            try:
                sql = 'select date from probemon'
                sql_args = ()
                cur.execute(sql, sql_args)
            except sqlite3.OperationalError as e:
                return jsonify({
                    'status': 'error',
                    'message': 'sqlite3 db is not accessible'
                }), 500

            days = set()
            for row in cur.fetchall():
                t = time.strftime('%Y-%m-%d', time.localtime(row[0]))
                days.add(t)
            days = sorted(list(days))
            missing = []
            last = datetime.strptime(days[-1], '%Y-%m-%d')
            day = datetime.strptime(days[0], '%Y-%m-%d')
            while day != last:
                d = day.strftime('%Y-%m-%d')
                if d not in days:
                    missing.append(d)
                day += timedelta(days=1)
            data = {'first': days[0], 'last': days[-1], 'missing': missing}
            return jsonify(data)
        else:
            # check if stats table is available
            try:
                cur.execute(
                    'select count(*) from sqlite_master where type=? and name=?',
                    ('table', 'stats'))
            except sqlite3.OperationalError as e:
                return jsonify({
                    'status': 'error',
                    'message': 'sqlite3 db is not accessible'
                }), 500
            if cur.fetchone()[0] == 1:
                # return day-by-day stats for macs
                params = ','.join(['?'] * len(macs))
                sql = f'''select mac.id, address from mac
                    inner join vendor on vendor.id=mac.vendor
                    where address in ({params});'''
                cur.execute(sql, macs)
                mac_ids = {}
                for row in cur.fetchall():
                    mac_ids[row[0]] = row[1]
                data = []
                for m in list(mac_ids.keys()):
                    md = []
                    ssids = set()
                    sql = 'select date, first_seen, last_seen, count, min, max, avg, med, ssids from stats where mac_id=? order by date;'
                    cur.execute(sql, (m, ))
                    for d, first, last, count, rmin, rmax, ravg, rmed, ssid in cur.fetchall(
                    ):
                        first = time.mktime(
                            time.strptime(f'{d}T{first}',
                                          '%Y-%m-%dT%H:%M:%S')) * 1000
                        last = time.mktime(
                            time.strptime(f'{d}T{last}',
                                          '%Y-%m-%dT%H:%M:%S')) * 1000
                        md.append({
                            'day': d.replace('-', ''),
                            'count': count,
                            'last': last,
                            'first': first,
                            'min': rmin,
                            'max': rmax,
                            'avg': ravg,
                            'median': rmed
                        })
                        ssids = ssids.union(ssid.split(','))
                    ssids = sorted(list(ssids))
                    if '' in ssids:
                        ssids.remove('')
                    data.append({
                        'mac': mac_ids[m],
                        'days': md,
                        'ssids': ssids
                    })
                return jsonify(data)
            else:
                params = ','.join(['?'] * len(macs))
                sql = f'''select date,mac.address,rssi,ssid.name from probemon
                 inner join ssid on ssid.id=probemon.ssid
                 inner join mac on mac.id=probemon.mac
                 where mac.address in ({params})'''
                sql_args = macs
                cur.execute(sql, sql_args)
                # WARNING: this is copy-pasted from stats.py
                stats = {}
                for row in cur.fetchall():
                    if row[1] not in list(stats.keys()):
                        stats[row[1]] = {'ssids': set()}
                    stats[row[1]]['ssids'].add(row[3])
                    day = time.strftime('%Y%m%d', time.localtime(row[0]))
                    if day in stats[row[1]]:
                        smd = stats[row[1]][day]
                        smd['rssi'].append(row[2])
                        if row[0] > smd['last']:
                            smd['last'] = row[0]
                        if row[0] < smd['first']:
                            smd['first'] = row[0]
                    else:
                        stats[row[1]][day] = {
                            'rssi': [row[2]],
                            'first': row[0],
                            'last': row[0]
                        }

                data = []
                for mac in list(stats.keys()):
                    md = []
                    for d in sorted(stats[mac].keys()):
                        if d == 'ssids':
                            continue
                        rssi = stats[mac][d]['rssi']
                        md.append({
                            'day': d,
                            'count': len(rssi),
                            'last': int(stats[mac][d]['last'] * 1000),
                            'first': int(stats[mac][d]['first'] * 1000),
                            'min': min(rssi),
                            'max': max(rssi),
                            'avg': sum(rssi) // len(rssi),
                            'median': median(rssi)
                        })
                    ssids = list(stats[mac]['ssids'])
                    if '' in ssids:
                        ssids.remove('')
                    data.append({'mac': mac, 'days': md, 'ssids': ssids})
                return jsonify(data)
Example #50
0
def test_median():
    obs = median([1, 2, 3, 4])
    exp = 2.5
    assert_equal(obs, exp)
Example #51
0
def main():
    usage = 'usage: %prog [options] <bam> <ref_gtf>'
    parser = OptionParser(usage)

    # IO options
    parser.add_option('-o', dest='out_dir', default='uniform', help='Output directory [Default: %default]')

    # window options
    parser.add_option('-w', dest='window_size', type='int', default=25, help='Window size for counting [Default: %default]')
    parser.add_option('-i', '--ignore', dest='ignore_gff', help='Ignore reads overlapping overlapping troublesome regions in the given GFF file')
    parser.add_option('-u', '--unstranded', dest='unstranded', action='store_true', default=False, help='Sequencing is unstranded [Default: %default]')

    # cufflinks options
    parser.add_option('--cuff_done', dest='cuff_done', action='store_true', default=False, help='The Cufflinks run to estimate the model parameters is already done [Default: %default]')
    parser.add_option('-t', dest='threads', type='int', default=2, help='Number of threads to use [Default: %default]')

    # debug options
    parser.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='Verbose output [Default: %default]')
    parser.add_option('-g', '--gene', dest='gene_only', help='Call peaks on the specified gene only')
    #parser.add_option('--print_windows', dest='print_windows', default=False, action='store_true', help='Print statistics for all windows [Default: %default]')

    (options,args) = parser.parse_args()

    if len(args) != 2:
        parser.error(usage)
    else:
        bam = args[0]
        ref_gtf = args[1]

    clip_peaks.out_dir = options.out_dir

    if not os.path.isdir(clip_peaks.out_dir):
        os.mkdir(clip_peaks.out_dir)

    ############################################
    # parameterize
    ############################################
    if not options.cuff_done:
        # make a new gtf w/ unspliced RNAs
        update_ref_gtf = clip_peaks.prerna_gtf(ref_gtf)

        subprocess.call('cufflinks -o %s -p %d -G %s %s' % (clip_peaks.out_dir, options.threads, update_ref_gtf, bam), shell=True)

    # store transcripts
    transcripts = clip_peaks.read_genes('%s/transcripts.gtf'%clip_peaks.out_dir, key_id='transcript_id')

    # merge overlapping genes
    g2t_merge, antisense_clusters = clip_peaks.merged_g2t('%s/transcripts.gtf'%clip_peaks.out_dir, options.unstranded)

    if options.unstranded:
        # alter strands
        clip_peaks.ambiguate_strands(transcripts, g2t_merge, antisense_clusters)

    # set transcript FPKMs
    clip_peaks.set_transcript_fpkms(transcripts, clip_peaks.out_dir, missing_fpkm=0)

    # possibly limit genes to examine
    if options.gene_only:
        gene_ids = []
        for gids in g2t_merge.keys():
            if options.gene_only in gids.split(','):
                gene_ids.append(gids)
        if len(gene_ids) == 0:
            print >> sys.stderr, 'gene_id %s not found' % options.gene_only
            exit(1)
    else:
        gene_ids = g2t_merge.keys()


    ############################################
    # filter BAM
    ############################################
    if options.ignore_gff:
        bam_ignore_fd, bam_ignore_file = tempfile.mkstemp(dir='%s/research/scratch/temp' % os.environ['HOME'])
        subprocess.call('intersectBed -v -abam %s -b %s > %s' % (bam, options.ignore_gff, bam_ignore_file), shell=True)
        bam = bam_ignore_file

    ############################################
    # process genes
    ############################################
    # index
    subprocess.call('samtools index %s' % bam, shell=True)

    # initialize stats
    table_out = open('%s/uniformity_table.txt' % clip_peaks.out_dir, 'w')
    id_list = []
    fpkm_list = []

    # open bam
    bam_in = pysam.Samfile(bam, 'rb')
    
    # for each gene
    for gene_id in gene_ids:
        # make a more focused transcript hash for this gene
        gene_transcripts = {}
        for tid in g2t_merge[gene_id]:
            gene_transcripts[tid] = transcripts[tid]

        # obtain basic gene attributes
        (gchrom, gstrand, gstart, gend) = clip_peaks.gene_attrs(gene_transcripts)

        # initialize window counts
        transcript_isoform_counts = {}
        for tid in gene_transcripts:
            transcript_isoform_counts[tid] = []

        # choose a single event position and weight the reads
        read_pos_weights = clip_peaks.position_reads(bam_in, gchrom, gstart, gend, gstrand, mapq_zero=True)

        # process read alignments
        for (pos, weight, mm) in read_pos_weights:
            # map pos to isoforms
            iso_pos = {}
            for tid in gene_transcripts:
                iso_pos[tid] = isoform_position(gene_transcripts[tid], pos)

            # sum fpkms for hit isoforms
            fpkm_sum = sum([gene_transcripts[tid].fpkm for tid in gene_transcripts if iso_pos[tid] != None])

            if fpkm_sum <= 0:
                pass
                #print >> sys.stderr, 'No FPKM for %s at %d' % (gene_id,pos)
            else:
                # distribute read to isoform counts
                for tid in gene_transcripts:
                    if iso_pos[tid] != None:
                        win_i = int(iso_pos[tid] / options.window_size)
                        while win_i >= len(transcript_isoform_counts[tid]):
                            transcript_isoform_counts[tid].append(0)
                        transcript_isoform_counts[tid][win_i] += weight*gene_transcripts[tid].fpkm/fpkm_sum

        # compute window stats
        for tid in gene_transcripts:
            if gene_transcripts[tid].fpkm > 1 and len(transcript_isoform_counts[tid]) > 5:
                u, sd = stats.mean_sd(transcript_isoform_counts[tid][:-1])
                if u > 0:
                    id_list.append(sd*sd/u)
                    fpkm_list.append(gene_transcripts[tid].fpkm)

                    cols = (tid, gene_transcripts[tid].fpkm, len(transcript_isoform_counts[tid])-1, u, sd, id_list[-1])
                    print >> table_out, '%-20s  %8.2f  %6d  %7.2f  %7.2f  %5.3f' % cols        

    bam_in.close()
    table_out.close()

    ############################################
    # summary stats
    ############################################
    median = stats.median(id_list)
    mean = stats.mean(id_list)

    fpkm_cv_sum = sum([id_list[i]*fpkm_list[i] for i in range(len(id_list))])
    fpkm_sum = sum(fpkm_list)
    fpkm_mean = fpkm_cv_sum / fpkm_sum

    logfpkm_cv_sum = sum([id_list[i]*math.log(fpkm_list[i]+1,2) for i in range(len(id_list))])
    logfpkm_sum = sum([math.log(f+1,2) for f in fpkm_list])
    logfpkm_mean = logfpkm_cv_sum / logfpkm_sum

    # print
    print 'Median:                %7.4f' % median
    print 'Mean:                  %7.4f' % mean
    print 'FPKM-weighted mean:    %7.4f' % fpkm_mean
    print 'logFPKM-weighted mean: %7.4f' % logfpkm_mean

    # clean cufflinks output
    if not options.cuff_done:
        os.remove(update_ref_gtf)
        os.remove('%s/skipped.gtf' % clip_peaks.out_dir)
        os.remove('%s/genes.fpkm_tracking' % clip_peaks.out_dir)

    if options.ignore_gff:
        os.close(bam_ignore_fd)
        os.remove(bam_ignore_file)
Example #52
0
def main():
    options = UploadOptions()
    try:
        options.parseOptions(sys.argv[1:])
    except UsageError, e:
        print e
        return 1

    fname, benchmark, param, statistic = options['statistic'].split(',')
    stat, samples = select(
        pickle.load(file(fname)), benchmark, param, statistic)

    d = upload(
        reactor,
        url=options['url'],
        project=options['project'],
        revision=options['revision'],
        revision_date=options['revision-date'],
        benchmark='%s-%s-%s' % (benchmark, param, statistic),
        executable='%s-backend' % (options['backend'],),
        environment=options['environment'],
        result_value=median(samples),
        result_date=datetime.now(),
        std_dev=mad(samples),  # Not really!
        max_value=max(samples),
        min_value=min(samples))
    d.addErrback(err, "Upload failed")
    reactor.callWhenRunning(d.addCallback, lambda ign: reactor.stop())
    reactor.run()
Example #53
0
def test_median():
    obs = median([1,2,3])
    exp = 2.0
    assert_equal(obs, exp)
Example #54
0
 def check_basic(self):
     data1 = [1,3,5,2,3,1,19,-10,2,4.0]
     data2 = [3,5,1,10,23,-10,3,-2,6,8,15]
     assert_almost_equal(stats.median(data1),2.5)
     assert_almost_equal(stats.median(data2),5)