コード例 #1
0
ファイル: testStats.py プロジェクト: larsyencken/code-library
 def testMean(self):
     """
     Check that mean works as expected.
     """
     self.assertAlmostEqual(stats.mean(self.dataA), self.meanA, 5)
     self.assertAlmostEqual(stats.mean(self.dataB), self.meanB, 5)
     return
コード例 #2
0
ファイル: regression.py プロジェクト: mjamesruggiero/tripp
def least_squares_fit(x, y):
    """given training values for x and y,
    find the least-squares values of alpha and beta"""
    beta = stats.correlation(x, y) * \
        stats.standard_deviation(y) / stats.standard_deviation(x)
    alpha = stats.mean(y) - beta * stats.mean(x)
    return alpha, beta
コード例 #3
0
ファイル: snap_xaui.py プロジェクト: asiaa/roach2Test20111216
def sync_check():
#    print 'Checking sync...'
    max_mcnt_difference=4
    mcnts=dict()
    mcnts_list=[]
    mcnt_tot=0

    for f,fpga in enumerate(fpgas):
        mcnts[f]=dict()
        try:
            hdr_index=bram_oob[f]['hdr'].index(1)
        except:
            print 'ERR: No headers found in BRAM. Are the F engines properly connected?'
            exit()

        pkt_64bit = struct.unpack('>Q',bram_dmp['bram_msb'][f]['data'][(4*hdr_index):(4*hdr_index)+4]+bram_dmp['bram_lsb'][f]['data'][(4*hdr_index):(4*hdr_index)+4])[0]
        mcnts[f]['mcnt'] =(pkt_64bit&((2**64)-(2**16)))>>16
        mcnts_list.append(mcnts[f]['mcnt'])
#        print '[%s] MCNT: %i'%(servers[f],mcnts[f]['mcnt'])

    mcnts['mean']=stats.mean(mcnts_list)
    mcnts['median']=stats.median(mcnts_list)
    mcnts['mode']=stats.mode(mcnts_list)
    mcnts['modalmean']=stats.mean(mcnts['mode'][1])

#    print 'mean: %i, median: %i, modal mean: %i mode:'%(mcnts['mean'],mcnts['median'],mcnts['modalmean']),mcnts['mode']
    
    for f,fpga in enumerate(fpgas):
        if mcnts[f]['mcnt']>(mcnts['modalmean']+max_mcnt_difference) or mcnts[f]['mcnt'] < (mcnts['modalmean']-max_mcnt_difference):
            print '%s OUT OF SYNC!!'%servers[f]
            mcnts[f]['sync_status']='FAIL with error of %i'%(mcnts[f]['mcnt']-mcnts['modalmean'])
        else:
            mcnts[f]['sync_status']='PASS'

    return mcnts
コード例 #4
0
def least_squares_fit(x, y):
    """given training values for x and y,
    find the least-squares values of alpha and beta"""
    beta = stats.correlation(x, y) * \
        stats.standard_deviation(y) / stats.standard_deviation(x)
    alpha = stats.mean(y) - beta * stats.mean(x)
    return alpha, beta
コード例 #5
0
ファイル: te_diff.py プロジェクト: radaniba/utility
def compute_stats(te_diffs, gene_diffs, plot_dir):
    pvals = []
    table_lines = []

    for te_or in te_diffs:
        rep, fam, orient = te_or
        
        for sample_key in te_diffs[te_or]:        
            sample1, sample2 = sample_key

            # if enough data
            if len(te_diffs[te_or][sample_key]) >= 10:
                wo_te = list((gene_diffs[sample_key] - te_diffs[te_or][sample_key]).elements())
                w_te = list(te_diffs[te_or][sample_key].elements())

                wo_mean = stats.mean(wo_te)
                w_mean = stats.mean(w_te)

                z, p = stats.mannwhitneyu(w_te, wo_te)

                cols = (rep, fam, orient, sample1, sample2, len(w_te), w_mean, wo_mean, z, p)
                table_lines.append('%-17s %-17s  %1s  %-10s %-10s %6d %9.2f %9.2f %8.2f %10.2e' % cols)

                pvals.append(p)

                # plot ...
                if rep in ['*'] and fam in ['*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']:
                    out_pdf = '%s/%s_%s_%s_%s-%s.pdf' % (plot_dir,rep.replace('/','-'),fam.replace('/','-'),orient,sample1,sample2)
                    cdf_plot(te_or, w_te, wo_te, out_pdf)

    return table_lines, pvals
コード例 #6
0
    def fit(self, X, y):
        n = len(X)
        # _x e _y serao as medias de x e y.
        _x = st.mean(X)
        _y = st.mean(y)

        self.__class__.b1 = np.sum((X - _x) * (y - _y)) / np.sum((X - _x)**2)
        self.__class__.b0 = _y - (self.__class__.b1 * _x)
コード例 #7
0
ファイル: sexmachine.py プロジェクト: eddienko/SamPy
  def statsex(self, objects):

    """
	Do some statistics on a source list
	Return dictionary
    """

    import stats, pstat
    
    # Return if we have no objects
    if len(objects) == 0:
      return 0	 

    # Define dictionary to hold statistics	
    stat = {}

    # Get number of objects
    stat['N'] = str(len(objects))

    # Define list (float) of FWHM values
    fwhm = [ float(obj[7]) for obj in objects ]
 
    # Define list (float) of ELLIPTICITY values
    el = [ float(obj[6]) for obj in objects ]

    # Define list (float) of THETA_IMAGE values
    pa = [ float(obj[5]) for obj in objects ]

    # Define list (float) of 'Stella-like' values
    stella = [ float(obj[9]) for obj in objects ]	

    # Create a histogram of FWHM values of binsize 1 pixel
    hfwhm = stats.histogram(fwhm,40,[0,40])[0]
    
    stat['medianFWHM'] = "%.2f" % stats.median(fwhm)
    stat['meanFWHM']   = "%.2f" % stats.mean(fwhm)
    stat['modeFWHM']   = "%.2f" % float(hfwhm.index(max(hfwhm))+0.5)

    try:	
       stat['stdevFWHM']  = "%.2f" % stats.stdev(fwhm)
    except ZeroDivisionError:
       stat['stdevFWHM'] = '0.00';

    stat['medianEL'] = "%.2f" % stats.median(el)
    stat['meanEL']   = "%.2f" % stats.mean(el)

    try:
      stat['stdevEL']  = "%.2f" % stats.stdev(el)
    except ZeroDivisionError:
      stat['stdevEL']  = '0.00' 

    # Histogram of Ellipticity PA (-180 to 180, bins of 45 deg)
    #stat['histoTHETA'] = stats.histogram(pa,8,[-180,180])[0]

    # Histogram of Stellarity (0 to 1, bins of 0.05)
    #stat['histoStella']  = stats.histogram(stella,20,[0,1.01])[0]   

    return stat
コード例 #8
0
  def statsex(self, objects):

    """
	Do some statistics on a source list
	Return dictionary
    """

    import stats, pstat
    
    # Return if we have no objects
    if len(objects) == 0:
      return 0	 

    # Define dictionary to hold statistics	
    stat = {}

    # Get number of objects
    stat['N'] = str(len(objects))

    # Define list (float) of FWHM values
    fwhm = [ float(obj[7]) for obj in objects ]
 
    # Define list (float) of ELLIPTICITY values
    el = [ float(obj[6]) for obj in objects ]

    # Define list (float) of THETA_IMAGE values
    pa = [ float(obj[5]) for obj in objects ]

    # Define list (float) of 'Stella-like' values
    stella = [ float(obj[9]) for obj in objects ]	

    # Create a histogram of FWHM values of binsize 1 pixel
    hfwhm = stats.histogram(fwhm,40,[0,40])[0]
    
    stat['medianFWHM'] = "%.2f" % stats.median(fwhm)
    stat['meanFWHM']   = "%.2f" % stats.mean(fwhm)
    stat['modeFWHM']   = "%.2f" % float(hfwhm.index(max(hfwhm))+0.5)

    try:	
       stat['stdevFWHM']  = "%.2f" % stats.stdev(fwhm)
    except ZeroDivisionError:
       stat['stdevFWHM'] = '0.00';

    stat['medianEL'] = "%.2f" % stats.median(el)
    stat['meanEL']   = "%.2f" % stats.mean(el)

    try:
      stat['stdevEL']  = "%.2f" % stats.stdev(el)
    except ZeroDivisionError:
      stat['stdevEL']  = '0.00' 

    # Histogram of Ellipticity PA (-180 to 180, bins of 45 deg)
    #stat['histoTHETA'] = stats.histogram(pa,8,[-180,180])[0]

    # Histogram of Stellarity (0 to 1, bins of 0.05)
    #stat['histoStella']  = stats.histogram(stella,20,[0,1.01])[0]   

    return stat
コード例 #9
0
ファイル: testStats.py プロジェクト: larsyencken/code-library
 def testOnTuples(self):
     """
     Checks that methods also work on tuples.
     """
     self.assertAlmostEqual(stats.mean(tuple(self.dataA)), self.meanA, 5)
     self.assertAlmostEqual(stats.mean(tuple(self.dataB)), self.meanB, 5)
     self.assertAlmostEqual(stats.stddev(tuple(self.dataA)), self.stddevA, 5)
     self.assertAlmostEqual(stats.stddev(tuple(self.dataB)), self.stddevB, 5)
     return
コード例 #10
0
def least_squares_fit(xs: Vector, ys: Vector) -> Tuple[float, float]:
    """
    Given a dataset represented by xs and ys, return the alpha, beta that provide the least squared error fit for a
    function y_i = alpha * x_i + beta
    """
    alpha = correlation(xs,
                        ys) * standard_deviation(ys) / standard_deviation(xs)
    beta = mean(ys) - alpha * mean(xs)
    return alpha, beta
コード例 #11
0
ファイル: test_stats.py プロジェクト: klimente/homework
 def test_validator_working_correct_negativa(self):
     with self.assertRaises(TypeError) as raised_exception:
         stats.missing_data(2, 3.5)
     self.assertEqual(raised_exception.exception.args[0],
                      "input type must be list")
     with self.assertRaises(ValueError) as raised_exception:
         stats.mean([1, 2, (2, 3.5)], [3.5, 2, 1])
     self.assertEqual(raised_exception.exception.args[0],
                      "value in x must be numeric")
コード例 #12
0
def findLSRline(dp):
    if !checkDPFormat(dp):
        print "ERROR: invalid dotplot format in findLSRline"
        return "invalid dotplot format"
    r = CorrCoef(dp)
    x = [i[0] for i in dp]
    sx = stats.stdDev(x)
    y = [i[1] for i in dp]
    sy = stats.stdDev(y)
    b = (r * sy) / sx
    a = stats.mean(y) - b * stats.mean(x)
コード例 #13
0
ファイル: multivar.py プロジェクト: bmcculley/pycalcstats
def corr(xdata, ydata):
    """corr(xydata) -> float
    corr(xdata, ydata) -> float

    Return the sample Pearson's Correlation Coefficient of (x,y) data.

    If ydata is None or not given, then xdata must be an iterable of (x, y)
    pairs. Otherwise, both xdata and ydata must be iterables of values, which
    will be truncated to the shorter of the two.

    >>> corr([(0.1, 2.3), (0.5, 2.7), (1.2, 3.1), (1.7, 2.9)])
    ... #doctest: +ELLIPSIS
    0.827429009335...

    The Pearson correlation is +1 in the case of a perfect positive
    correlation (i.e. an increasing linear relationship), -1 in the case of
    a perfect anti-correlation (i.e. a decreasing linear relationship), and
    some value between -1 and 1 in all other cases, indicating the degree
    of linear dependence between the variables.

    >>> xdata = [1, 2, 3, 4, 5, 6]
    >>> ydata = [2*x for x in xdata]  # Perfect correlation.
    >>> corr(xdata, ydata)
    1.0
    >>> corr(xdata, [5-y for y in ydata])  # Perfect anti-correlation.
    -1.0

    If there are not at least two data points, or if either all the x values
    or all the y values are equal, StatsError is raised.
    """
    n = len(xdata)
    assert n == len(ydata)
    if n < 2:
        raise StatsError(
            'correlation requires at least two data points, got %d' % n)
    # First pass is to determine the means.
    mx = stats.mean(xdata)
    my = stats.mean(ydata)
    # Second pass to determine the standard deviations.
    sx = stats.stdev(xdata, mx)
    sy = stats.stdev(ydata, my)
    if sx == 0:
        raise StatsError('all x values are equal')
    if sy == 0:
        raise StatsError('all y values are equal')
    # Third pass to calculate the correlation coefficient.
    ap = add_partial
    total = []
    for x, y in zip(xdata, ydata):
        term = ((x-mx)/sx) * ((y-my)/sy)
        ap(term, total)
    r = math.fsum(total)/(n-1)
    assert -1 <= r <= r
    return r
コード例 #14
0
def muestroEspeYVar(valores, esperanzaTeorica, alfa):
    print("El calculo de la Esperanza en la muestra es:", stats.mean(valores))
    print("Teoricamente la Esperanza es:", esperanzaTeorica)
    print("El calculo de la Varianza en la muestra es:", numpy.var(valores))
    print("Teoricamente la Varianza es:", calcularVarianza(alfa))
    print("El calculo del Desvio Estandar en la muestra es:",
          math.sqrt(numpy.var(valores)))
    print("Teoricamente es:", math.sqrt(calcularVarianza(alfa)))
    print("El parametro alfa de la muestra es :", 1 / (stats.mean(valores)))
    print("Teoricamente es :", alfa)
    print("\n")
コード例 #15
0
ファイル: tree.py プロジェクト: littleBroGitHub/treecut
 def get_modules(self, cutoff=.05):
     modules = []
     for e in self:
         if e.val < min(e.lo_min, e.hi_min, cutoff):
             if self.datatype=="continuous":
                 e.desc = "lo" if mean(e.a) < mean(e.b) else "hi"
             else:
                 e.desc = "enriched"
             modules.append(e)
         else:
             modules += e.get_modules(cutoff=cutoff)
     return modules
コード例 #16
0
ファイル: collect.py プロジェクト: pantuza/cpython-modules
def collect_mean(input_list):
    """ Collect time execution of mean of each module """

    begin_py_mean = clock()
    py_mean(input_list)
    end_py_mean = clock()

    begin_mean = clock()
    mean(input_list)
    end_mean = clock()

    times = format_times(end_py_mean - begin_py_mean, end_mean - begin_mean)
    save_times(times, logs['mean'])
コード例 #17
0
ファイル: multivar.py プロジェクト: bmcculley/pycalcstats
def _SP(xdata, mx, ydata, my):
    """SP = sum of product of deviations.
    Helper function for calculating covariance directly.
    """
    if mx is None:
        # Two pass algorithm.
        xdata = as_sequence(xdata)
        mx = stats.mean(xdata)
    if my is None:
        # Two pass algorithm.
        ydata = as_sequence(ydata)
        my = stats.mean(ydata)
    return _generalised_sum(zip(xdata, ydata), lambda t: (t[0]-mx)*(t[1]-my))
コード例 #18
0
ファイル: compare.py プロジェクト: MFahey0706/LocalMisc
def corr(x, y):
    N = len(x)
    if len(y) != N:
        raise Exception(
            "Sequences must be of the same length. X length: {0} ; Y length {1}"
            .format(N, len(y)))
    else:
        sum = 0
        for index, xi in enumerate(x):
            sum += xi * y[index]
        r = (sum - N * stats.mean(x) * stats.mean(y)) / (
            (N - 1) * stats.stdev(x) * stats.stdev(y))
        return r
コード例 #19
0
ファイル: test_stats.py プロジェクト: mbentz80/jzigbeercp
 def check_basic(self):
     a = [3,4,5,10,-3,-5,6]
     af = [3.,4,5,10,-3,-5,-6]
     Na = len(a)
     Naf = len(af)
     mn1 = 0.0
     for el in a:
         mn1 += el / float(Na)
     assert_almost_equal(stats.mean(a),mn1,11)
     mn2 = 0.0
     for el in af:
         mn2 += el / float(Naf)
     assert_almost_equal(stats.mean(af),mn2,11)
コード例 #20
0
def _getRatingStarsAverages():
    android_ratings = []
    ios_ratings = []
    android_stars = []
    ios_stars = []

    global android_ratings_average
    global android_stars_average

    global ios_ratings_average
    global ios_stars_average

    print collection_ios.count(
        {
            'android_ratings_float': {
                '$gte': 1
            },
            'ios_ratings_float': {
                '$gte': 1
            }
        },
        no_cursor_timeout=True)
    for app in collection_ios.find(
        {
            'android_ratings_float': {
                '$gte': 1
            },
            'ios_ratings_float': {
                '$gte': 1
            }
        },
            no_cursor_timeout=True):
        android_ratings.append(app['android_ratings_float'])
        android_stars.append(app['android_stars_float'])
        #ios stats
        ios_ratings.append(app['ios_ratings_float'])
        ios_stars.append(app['ios_stars_float'])

    android_ratings_average = stats.mean(android_ratings)
    android_stars_average = stats.mean(android_stars)

    ios_ratings_average = stats.mean(ios_ratings)
    ios_stars_average = stats.mean(ios_stars)

    print "android"
    print android_ratings_average
    print android_stars_average

    print "ios"
    print ios_ratings_average
    print ios_stars_average
コード例 #21
0
ファイル: test_stats.py プロジェクト: mbentz80/jzigbeercp
 def check_2d(self):
     a = [[1.0, 2.0, 3.0],
          [2.0, 4.0, 6.0],
          [8.0, 12.0, 7.0]]
     A = array(a,'d')
     N1,N2 = (3,3)
     mn1 = zeros(N2,'d')
     for k in range(N1):
         mn1 += A[k,:] / N1
     allclose(stats.mean(a),mn1,rtol=1e-13,atol=1e-13)
     mn2 = zeros(N1,'d')
     for k in range(N2):
         mn2 += A[:,k] / N2
     allclose(stats.mean(a,axis=0),mn2,rtol=1e-13,atol=1e-13)
コード例 #22
0
 def parameter_cedo_meanmean(self, TTL):
     """ Computes the CEDO distribution parameter with the mean of the mean IMT per each pair of meeting nodes. """
     raise Exception() #deprecated
     
     logger.debug("Estimating CEDO mean of means parameter.")
     
     imts = []
     
     for timestamp, l in self._all_im.values():
         if l:
             imts.append(mean(l))
     
     m = imts and mean(imts) or sys.maxsize
     return self.apply_ndp(TTL, m)
コード例 #23
0
def _getCollection():
    print "Sanity Check"
    print "android"
    print android_ratings_average
    print android_stars_average

    print "ios"
    print ios_ratings_average
    print ios_stars_average

    ratings_combined_average = stats.mean(
        [android_ratings_average, ios_ratings_average])
    stars_combined_average = stats.mean(
        [android_stars_average, ios_stars_average])

    print "average:"
    print ratings_combined_average
    print stars_combined_average
    for post in collection_ios.find(
        {
            'android_ratings_float': {
                '$gte': 1
            },
            'ios_ratings_float': {
                '$gte': 1
            }
        },
            no_cursor_timeout=True):
        android_app_id = post['android_app_id']
        android_app_rating = post['android_ratings_float']
        android_app_star = post['android_stars_float']

        ios_app_rating = post['ios_ratings_float']
        ios_app_star = post['ios_stars_float']

        successAndroid = (float(
            _sucess(android_app_rating, android_app_star,
                    ratings_combined_average, stars_combined_average)) /
                          5) * 100
        successIos = (float(
            _sucess(ios_app_rating, ios_app_star, ratings_combined_average,
                    stars_combined_average)) / 5) * 100

        collection_ios.find_one_and_update(
            {"android_app_id": android_app_id}, {
                '$set': {
                    'android_success_average': successAndroid,
                    'ios_success_average': successIos
                }
            })
def calculateDividingLine(gestures, maybeGestures, nonGestures):
	numFolds = min(TESTING_FOLDS, len(gestures))
	
	allGestureDistances = []
	allNonGestureDistances = []
	
	for foldNum in range(numFolds):	
		trainingGestures = [gesture for i, gesture in enumerate(gestures) if i % numFolds != foldNum]
		testingGestures = [localTimeGestures for i, localTimeGestures in enumerate(maybeGestures) if i % numFolds == foldNum]
		
		#print 'train, test #s: ', len(trainingGestures), len(testingGestures)
		
		#make a distance calculator based on the subset of hte training data
		distanceCalculator = gestureDistanceCalculator.GestureDistanceCalculator(trainingGestures)
		
		#each localTimeGestures is a list of the closest times to when a gesture was identified in training
		#since the output can be triggered at slightly different times, we should look for a minimum near where
		#the gesture is known to have happened, compared to the training gestures
		gestureDistances = []
		#print testingGestures
		for localTimeGestureSet in testingGestures:
			
			closestDistance = min(map(distanceCalculator.getDistance, localTimeGestureSet))
			gestureDistances.append(closestDistance)
		
		#gestureDistances = map(distanceCalculator.getDistance, testingGestures)
		#print gestureDistances
		nonGestureDistances = map(distanceCalculator.getDistance, nonGestures)
		#print gestureDistances
		
		allGestureDistances += gestureDistances
		allNonGestureDistances += nonGestureDistances
		#break
		
	#print len(allGestureDistances), len(allNonGestureDistances)
	print 'means: ', stats.mean(allGestureDistances), stats.mean(allNonGestureDistances)
	
	print 'std devs: ', stats.stdDev(allGestureDistances), stats.stdDev(allNonGestureDistances)
	
	meanGesture = stats.mean(allGestureDistances)
	meanNon = stats.mean(allNonGestureDistances)
	
	devGesture = stats.stdDev(allGestureDistances)
	devNon = stats.stdDev(allNonGestureDistances)
	
	line = (meanGesture * devNon + meanNon * devGesture) / ( devGesture + devNon)
	
	#print line
	return line
コード例 #25
0
ファイル: views.py プロジェクト: ChrisCWen/transportal
def liver(request):
        expressLevelsNishi = naturallysortedexpressionlist(Expression.objects.filter(organ='Liver', experiment='Nishimura'))
        expressLevelsPMT = naturallysortedexpressionlist(Expression.objects.filter(organ='Liver', experiment__startswith='PMT Sample'))
        expressBiotroveTransporters = naturallysortedtransporterlist(Transporter.objects.filter(expression__organ='Liver',expression__experiment__startswith='PMT Biotrove').distinct())
	important = Transporter.objects.filter(organ__name='Liver')
	importantNames = []
	for x in important:
		importantNames.append(str(x.symbol))
	synquery = Transporter.objects.all()
	syns = {}
	for x in synquery:
		syns[x.symbol] = x.synonyms
#Calculate mean expression across all PMT samples
        pmtTableValues = []
        for x in range(len(expressLevelsPMT)/3):
                build = []
                for y in range(3):
                        build.append(expressLevelsPMT[x*3+y].value)
                avg = stats.mean(build)
                stdev = stats.stdev(build)
                id = expressLevelsPMT[x*3].trans
                pmtTableValues.append([id] + build + [avg, stdev])
#Calculate median and quartiles across biotrove samples
        biotroveTableValues = []
        for x in expressBiotroveTransporters:
                values = Expression.objects.filter(organ='Liver', experiment__startswith='PMT Biotrove', trans=x).values_list('value',flat='True').order_by('value')
                build = []
                build.append(x.symbol)
                build.append(quartiles(values,1))
                build.append(quartiles(values,2))
                build.append(quartiles(values,3))
                biotroveTableValues.append(build)
        return render_to_response('liver.html', {'expressionNishi': expressLevelsNishi, 'expressionPMT': pmtTableValues, 'organ': 'Liver', 'syns': syns, 'important': importantNames, 'expressionBiotrove': biotroveTableValues})
コード例 #26
0
ファイル: regression.py プロジェクト: pminkov/wip
def linear_regression(x, y):
  """
  See: https://www.khanacademy.org/math/probability/regression/regression-correlation/v/regression-line-example
  """
  xy_mean = s.xy_mean(x, y)
  print xy_mean
  x_mean = s.mean(x)
  y_mean = s.mean(y)
  x_squared_mean = s.mean([xi ** 2 for xi in x])

  # Slope.
  a = (x_mean * y_mean - xy_mean) / (x_mean ** 2 - x_squared_mean)
  # Intercept.
  b = y_mean - a * x_mean

  return (a, b)
コード例 #27
0
 def ttest_1samp(a, popmean):
     t = (stats.mean(a) - popmean) / (stats.stddev(a) / len(a) ** 0.5)
     v = len(a) - 1.0
     p = gamma((v + 1) / 2) / ((v * pi) ** 0.5 * gamma(v / 2)) * (1 + t ** 2 / v) ** (-(v + 1) / 2)
     return (
         [t, None], 
         [p, None])
コード例 #28
0
def main():
    [(stat, first), (stat, second)] = load_stats(sys.argv[1:])

    # Attempt to increase robustness by dropping the outlying 10% of values.
    first = trim(first, 0.1)
    second = trim(second, 0.1)

    fmean = stats.mean(first)
    smean = stats.mean(second)
    p = 1 - ttest_1samp(second, fmean)[1][0]
    if p >= 0.95:
        # rejected the null hypothesis
        print sys.argv[1], 'mean of', fmean, 'differs from', sys.argv[2], 'mean of', smean, '(%2.0f%%)' % (p * 100,)
    else:
        # failed to reject the null hypothesis
        print 'cannot prove means (%s, %s) differ (%2.0f%%)' % (fmean, smean, p * 100,)
コード例 #29
0
def circvar(samples, high=2*pi, low=0):
    """Compute the circular variance for samples assumed to be in the range [low to high]
    """
    ang = (samples - low)*2*pi / (high-low)
    res = stats.mean(exp(1j*ang))
    V = 1-abs(res)
    return ((high-low)/2.0/pi)**2 * V
コード例 #30
0
def circstd(samples, high=2*pi, low=0):
    """Compute the circular standard deviation for samples assumed to be in the range [low to high]
    """
    ang = (samples - low)*2*pi / (high-low)
    res = stats.mean(exp(1j*ang))
    V = 1-abs(res)
    return ((high-low)/2.0/pi) * sqrt(V)
コード例 #31
0
ファイル: kde.py プロジェクト: AndreI11/SatStressGui
    def integrate_box_1d(self, low, high):
        """Computes the integral of a 1D pdf between two bounds.

        Parameters
        ----------
        low : scalar
            lower bound of integration
        high : scalar
            upper bound of integration

        Returns
        -------
        value : scalar
            the result of the integral

        Raises
        ------
        ValueError if the KDE is over more than one dimension.
        """
        if self.d != 1:
            raise ValueError("integrate_box_1d() only handles 1D pdfs")

        stdev = ravel(sqrt(self.covariance))[0]

        normalized_low = ravel((low - self.dataset) / stdev)
        normalized_high = ravel((high - self.dataset) / stdev)

        value = stats.mean(
            special.ndtr(normalized_high) - special.ndtr(normalized_low))
        return value
コード例 #32
0
ファイル: most_expr_isoform.py プロジェクト: BioXiao/utility
def diff_fpkm(diff_file, pseudocount):
    gene_fpkms = {}

    diff_in = open(diff_file)
    diff_in.readline()
    for line in diff_in:
        a = line.split('\t')

        gene_id = a[0]
        sample1 = a[4]
        sample2 = a[5]
        status = a[6]
        fpkm1 = float(a[7])
        fpkm2 = float(a[8])

        if status == 'OK':
            if gene_id in gene_fpkms:
                gene_fpkms[gene_id] += [fpkm1, fpkm2]
            else:
                gene_fpkms[gene_id] = [fpkm1, fpkm2]

    diff_in.close()

    gene_fpkm = {}
    for gene_id in gene_fpkms:
        log_fpkms = [math.log(fpkm+pseudocount,2) for fpkm in gene_fpkms[gene_id]]
        gene_fpkm[gene_id] = stats.mean(log_fpkms)

    return gene_fpkm
コード例 #33
0
def muestroEsperanzayVar(valores,alfa,ka):
    print("El calculo de la esperanza en la muestra es:",stats.mean(valores))
    print("Teoricamente la Esperanza es = ",ka/alfa)
    print("El calculo de la Varianza es = ",numpy.var(valores))
    print("Teoricamente la Varianza es = ",ka/(alfa**2))
    print("El Desvio estandar es =",math.sqrt(numpy.var(valores)))
    print("Teoricamente es =",math.sqrt(ka/(alfa**2)))
コード例 #34
0
def scale(data_matrix):
    num_rows, num_cols = shape(data_matrix)
    means = [mean(get_column(data_matrix,j))
             for j in range(num_cols)]
    stdevs = [standard_deviation(get_column(data_matrix,j))
              for j in range(num_cols)]
    return means, stdevs
コード例 #35
0
def sample_stats(n):
    """
    Compute mean and standard deviation on a bunch of 
    random numbers
    """
    sample = tuple(random.random() for i in range(n))
    return mean(sample), sd(sample)
コード例 #36
0
def main():
    [(_ignore_stat, first), (_ignore_stat, second)] = load_stats(sys.argv[1:])

    # Attempt to increase robustness by dropping the outlying 10% of values.
    first = trim(first, 0.1)
    second = trim(second, 0.1)

    fmean = stats.mean(first)
    smean = stats.mean(second)
    p = ttest_1samp(second, fmean)[1]
    if p >= 0.95:
        # rejected the null hypothesis
        print(sys.argv[1], 'mean of', fmean, 'differs from', sys.argv[2], 'mean of', smean, '(%2.0f%%)' % (p * 100,))
    else:
        # failed to reject the null hypothesis
        print('cannot prove means (%s, %s) differ (%2.0f%%)' % (fmean, smean, p * 100,))
コード例 #37
0
ファイル: test_perf.py プロジェクト: NVIDIA/DCGM
def _test_mem_bounded_golden_values_fields(activeGpuCount, memUsageTS,
                                           tailStart):
    goldenVal = 1148  # 1 KB plus some swag per field instance (Global, GPU). This is based off of the keyed vector block size and default number of blocks
    tolerance = 0.10  # low tolerance, amount of records stored is bounded

    for fieldId, series in memUsageTS.fieldVals.items():

        seriesTail = series[tailStart:]

        # skip fields that are not implemented
        if sum(seriesTail) == 0:
            continue

        #Don't check the size of binary fields since it's arbitrary per fieldId
        if helper_field_has_variable_size(fieldId):
            logger.info("Skipping variable-sized fieldId %d" % fieldId)
            continue

        mean = stats.mean(seriesTail)

        lowLimit = (1 - tolerance) * goldenVal
        highLimit = (1 + tolerance) * goldenVal * activeGpuCount
        assert lowLimit < mean < highLimit, \
            'Expected field "%d" memory usage to be between %s and %s but got %s' % \
            (fieldId, lowLimit, highLimit, mean) \
            + 'If this new value is expected, change the golden value used for comparison.'
コード例 #38
0
ファイル: compare.py プロジェクト: runt18/ccs-calendarserver
 def ttest_1samp(a, popmean):
     # T statistic - http://mathworld.wolfram.com/Studentst-Distribution.html
     t = (stats.mean(a) - popmean) / (stats.stddev(a) / len(a)**0.5)
     v = len(a) - 1.0
     p = gamma((v + 1) / 2) / (
         (v * pi)**0.5 * gamma(v / 2)) * (1 + t**2 / v)**(-(v + 1) / 2)
     return (t, p)
コード例 #39
0
ファイル: stroop_stats.py プロジェクト: unshur/OpenPsyc
def wordSummary(db, table):
    f = open("wordSummary_%s.txt" % table, 'w')
    d = {}
    header = "word, length, rtAVG, rtSTD, total, percCorrect\n"
    f.write(header)
    wordList = []
    sql = "SELECT DISTINCT(word) FROM %s" % table
    for w in db.query(sql):
        wordList.append(w[0])

    for word in wordList:
        sql = "SELECT RT FROM %s WHERE word = '%s' AND incorrect = 0" % (table,
                                                                         word)
        wordLen = len(word)
        rtList = []
        zList = []

        for rt in db.query(sql):
            rtList.append(rt[0])

        rtAVG = stats.mean(rtList)
        rtSTD = stats.samplestdev(rtList)

        total = db.query("SELECT COUNT(*) FROM %s WHERE word = '%s'" %
                         (table, word))[0][0]
        percCorrect = float(len(rtList)) / float(total) * 100.0

        print len(rtList), total

        myString = "%s, %i, %f, %f, %i, %f\n" % (word, wordLen, rtAVG, rtSTD,
                                                 total, percCorrect)
        print myString
        f.write(myString)

    f.close()
コード例 #40
0
ファイル: stroop_stats.py プロジェクト: CrossGini/OpenPsyc
def wordSummary(db, table):
	f = open("wordSummary_%s.txt" % table, 'w')
	d = {}
	header = "word, length, rtAVG, rtSTD, total, percCorrect\n"
	f.write(header)
	wordList = []
	sql = "SELECT DISTINCT(word) FROM %s" % table
	for w in db.query(sql):
		wordList.append(w[0])
		
	for word in wordList:
		sql = "SELECT RT FROM %s WHERE word = '%s' AND incorrect = 0" % (table, word)
		wordLen = len(word)
		rtList = []
		zList = []

		for rt in db.query(sql):
			rtList.append(rt[0])

		rtAVG = stats.mean(rtList)
		rtSTD = stats.samplestdev(rtList)


		total = db.query("SELECT COUNT(*) FROM %s WHERE word = '%s'" % (table, word))[0][0]
		percCorrect = float(len(rtList)) / float(total) * 100.0

		print len(rtList), total

		myString = "%s, %i, %f, %f, %i, %f\n" % (word, wordLen, rtAVG, rtSTD, total, percCorrect)
		print myString
		f.write(myString)


	f.close()
コード例 #41
0
def optimal_discard_6(hand):
    # loop over all possible discard options
    possible_hands_scores = {}
    for i in range(6):
        for j in range(6):
            if j > i:
                possible_hand = []
                counter = -1
                for card in hand['hand6']:
                    counter += 1
                    if counter == i or counter == j:
                        continue
                    possible_hand.append(card)

                # get total list of possible scores for that hand
                point_list = check_all_cuts(possible_hand)
                possible_hands_scores[tuple(possible_hand)] = point_list

    max_expected_points = 0
    best_hand = []
    for poss_hand, point_list in possible_hands_scores.iteritems():
        expected_points = mean(point_list)
        # print hand, expected_points
        if expected_points > max_expected_points:
            max_expected_points = expected_points
            best_hand = poss_hand

    discard = list(set(hand['hand6']) - set(best_hand))

    # print("Best Hand = {} for {} points".format(best_hand, max_expected_points))
    return list(best_hand), discard
コード例 #42
0
ファイル: evaluate.py プロジェクト: jurdan21/ReVir-UFPe
def print_latex_stats(diffs, label):
	print '%s & %.3f & %.3f & %.3f & %.3f \\\\' % (
		label,
		min(diffs) / 1000.0,
		max(diffs) / 1000.0,
		stats.mean(diffs) / 1000.0,
		stats.stdev(diffs) / 1000.0)
コード例 #43
0
ファイル: t_test.py プロジェクト: Rhomboidal1/CS301-Lab
def print_stats(L):
    """ Display some information about the lists """

    print "Let's compute some statistics..."
    print "\tMean: %d" % mean(L)
    print "\tStandard deviation: %d" % std(L)
    print "\t# of outliers: %d" % (len(L) - len(remove_outliers(L,1)))
コード例 #44
0
def test_mean1():
    obs = mean([0, 0, 0, 0])
    exp = 0
    assert_equal(obs, exp)

    obs = mean([0, 200])
    exp = 100
    assert_equal(obs, exp)

    obs = mean([0, -200])
    exp = -100
    assert_equal(obs, exp)

    obs = mean([0]) 
    exp = 0
    assert_equal(obs, exp)
コード例 #45
0
ファイル: kde.py プロジェクト: BackupTheBerlios/pulsar-svn
    def integrate_box_1d(self, low, high):
        """Computes the integral of a 1D pdf between two bounds.

        Parameters
        ----------
        low : scalar
          lower bound of integration
        high : scalar
          upper bound of integration

        Returns
        -------
        value : scalar
          the result of the integral
        """
        if self.d != 1:
            raise ValueError("integrate_box_1d() only handles 1D pdfs")

        stdev = ravel(sqrt(self.covariance))[0]

        normalized_low = ravel((low - self.dataset)/stdev)
        normalized_high = ravel((high - self.dataset)/stdev)

        value = stats.mean(special.ndtr(normalized_high) -
                           special.ndtr(normalized_low))
        return value
コード例 #46
0
ファイル: basic.py プロジェクト: bmcculley/pycalcstats
 def testVariance(self):
     data = [1, 2, 3]
     assert stats.mean(data) == 2
     self.assertEqual(stats.pvariance(data), 2/3)
     self.assertEqual(stats.variance(data), 1.0)
     self.assertEqual(stats.pstdev(data), math.sqrt(2/3))
     self.assertEqual(stats.stdev(data), 1.0)
コード例 #47
0
ファイル: morestats.py プロジェクト: mbentz80/jzigbeercp
def circvar(samples, high=2*pi, low=0):
    """Compute the circular variance for samples assumed to be in the range [low to high]
    """
    ang = (samples - low)*2*pi / (high-low)
    res = stats.mean(exp(1j*ang))
    V = 1-abs(res)
    return ((high-low)/2.0/pi)**2 * V
コード例 #48
0
ファイル: morestats.py プロジェクト: mbentz80/jzigbeercp
def circstd(samples, high=2*pi, low=0):
    """Compute the circular standard deviation for samples assumed to be in the range [low to high]
    """
    ang = (samples - low)*2*pi / (high-low)
    res = stats.mean(exp(1j*ang))
    V = 1-abs(res)
    return ((high-low)/2.0/pi) * sqrt(V)
コード例 #49
0
ファイル: test_stats.py プロジェクト: uwmstubbs/simplestats
def test_mean1():
    obs = mean([0, 0, 0, 0])
    exp = 0
    assert_equal(obs, exp)

    obs = mean([0, 200])
    exp = 100
    assert_equal(obs, exp)

    obs = mean([0, -200])
    exp = -100
    assert_equal(obs, exp)

    obs = mean([0])
    exp = 0
    assert_equal(obs, exp)
コード例 #50
0
def diff_fpkm(diff_file, pseudocount):
    gene_fpkms = {}

    diff_in = open(diff_file)
    diff_in.readline()
    for line in diff_in:
        a = line.split('\t')

        gene_id = a[0]
        sample1 = a[4]
        sample2 = a[5]
        status = a[6]
        fpkm1 = float(a[7])
        fpkm2 = float(a[8])

        if status == 'OK':
            if gene_id in gene_fpkms:
                gene_fpkms[gene_id] += [fpkm1, fpkm2]
            else:
                gene_fpkms[gene_id] = [fpkm1, fpkm2]

    diff_in.close()

    gene_fpkm = {}
    for gene_id in gene_fpkms:
        log_fpkms = [
            math.log(fpkm + pseudocount, 2) for fpkm in gene_fpkms[gene_id]
        ]
        gene_fpkm[gene_id] = stats.mean(log_fpkms)

    return gene_fpkm
コード例 #51
0
ファイル: score.py プロジェクト: poojasgupta/swga
def default_score_set(expression, primer_set, primer_locs, max_dist, bg_dist_mean):
    """Evaluate an expression using the provided values and a set of metrics.

    :returns: the score and the metrics used to calculate it
    """
    # Calculate various metrics
    binding_distances = stats.seq_diff(primer_locs)
    namespace = {
        'set_size': len(primer_set),
        'fg_dist_mean': stats.mean(binding_distances),
        'fg_dist_std': stats.stdev(binding_distances),
        'fg_dist_gini': stats.gini(binding_distances),
        'bg_dist_mean': bg_dist_mean,
        'fg_max_dist': max_dist,
        '__builtins__': None}
    permitted_var_str = ", ".join(
        [key for key in namespace.keys() if key is not "__builtins__"])
    score = None
    try:
        score = eval(expression, namespace, {'__builtins__': {}})
    except NameError as e:
        raise NameError(
            e.message +
            '. Permitted variables are %s. Refer to README or docs for help.'
            % permitted_var_str)
    del namespace['__builtins__']
    return score, namespace
コード例 #52
0
def scaleTestMinFinding():
	xs = range(10)
	distances = []
	noise = 3.5
	n = 1000000
	for i in range(n):
		a = random()
		b = random()
		c = random()
		ys = [x*x*a + x*b + c + random() * noise for x in xs]
		
		#print a, b, c, polynomialFit(xs, ys)[::-1]
		minExp, unc = polynomialFindMinimum(xs, ys, returnErrors = True)
		minCalc = -b/(2.0*a)
		dist = (minCalc - minExp) / unc
		#print minCalc, minExp, unc, dist
		distances.append(dist)
		
	print 'mean: %f' % stats.mean(distances)
	print 'stdDev: %f' % stats.stdDev(distances)
	for sigma in [1, 2, 3]:
		print 'With %d sigma: %f%%' % (sigma, 100.0 * sum([int(abs(d) < sigma) for d in distances]) / n)
	
	pylab.hist(distances, bins = 50, range = (-5, 5))
	pylab.show()
コード例 #53
0
ファイル: most_expr_isoform.py プロジェクト: BioXiao/utility
def cuff_fpkm(fpkm_file, pseudocount):
    cuff = cufflinks.fpkm_tracking(fpkm_tracking_file)

    gene_fpkm = {}
    for gene_id in cuff.genes:
        gene_fpkm[gene_id] = stats.mean([math.log(pseudocount+e,2) for e in cuff.gene_expr(gene_id, not_found=0, fail=0)])

    return gene_fpkm
コード例 #54
0
ファイル: morestats.py プロジェクト: mbentz80/jzigbeercp
def circmean(samples, high=2*pi, low=0):
    """Compute the circular mean for samples assumed to be in the range [low to high]
    """
    ang = (samples - low)*2*pi / (high-low)
    res = angle(stats.mean(exp(1j*ang)))
    if (res < 0):
        res = res + 2*pi
    return res*(high-low)/2.0/pi + low
コード例 #55
0
ファイル: scaling_data.py プロジェクト: nfowler50/DataScience
def scale(data_matrix):
    """returns means and standard deviations of each column"""
    num_rows, num_columns = la.shape(data_matrix)
    means = [stats.mean(la.get_c(data_matrix, j)) for j in range(num_columns)]
    stdevs = [
        stats.std_dev(la.get_c(data_matrix, j)) for j in range(num_columns)
    ]
    return means, stdevs
コード例 #56
0
def circmean(samples, high=2*pi, low=0):
    """Compute the circular mean for samples assumed to be in the range [low to high]
    """
    ang = (samples - low)*2*pi / (high-low)
    res = angle(stats.mean(exp(1j*ang)))
    if (res < 0):
        res = res + 2*pi
    return res*(high-low)/2.0/pi + low
コード例 #57
0
def _getRatingAverages():
    count = 0
    android_rating_total = 0
    ios_rating_total = 0
    android_ratings = []
    ios_ratings = []

    global android_rating_average
    global ios_rating_average

    global android_rating_median
    global ios_rating_median

    global android_rating_q1
    global ios_rating_q1

    global android_rating_q3
    global ios_rating_q3

    for app in collection_ios.find().batch_size(30):
        # count=count+1
        # android_rating_total=android_rating_total+float(app['android_ratingsAllVersions'].replace(',',''))
        # ios_rating_total=ios_rating_total+float(app['ios_ratingsAllVersions_new'].replace(',',''))
        android_ratings.append(float(app["android_ratingsAllVersions"].replace(",", "")))
        ios_ratings.append(float(app["ios_ratingsAllVersions_new"].replace(",", "")))

    android_rating_average = stats.mean(android_ratings)
    ios_rating_average = stats.mean(ios_ratings)

    android_rating_median = stats.median(android_ratings)
    ios_rating_median = stats.median(ios_ratings)

    android_rating_q1 = stats.quartiles(android_ratings)[0]
    ios_rating_q1 = stats.quartiles(ios_ratings)[0]

    android_rating_q3 = stats.quartiles(android_ratings)[1]
    ios_rating_q3 = stats.quartiles(ios_ratings)[1]

    print "ios stats"
    print ios_rating_q1
    print ios_rating_median
    print ios_rating_q3
    print "Android stats"
    print android_rating_q1
    print android_rating_median
    print android_rating_q3
コード例 #58
0
def scale(data_matrix):
    #各列の平均と標準偏差を返す
    num_rows, num_cols = shape(data_matrix)
    means = [mean(get_column(data_matrix, j)) for j in range(num_cols)]
    stdevs = [
        standard_deviation(get_column(data_matrix, j)) for j in range(num_cols)
    ]
    return means, stdevs