Ejemplo n.º 1
0
  def statsex(self, objects):

    """
	Do some statistics on a source list
	Return dictionary
    """

    import stats, pstat
    
    # Return if we have no objects
    if len(objects) == 0:
      return 0	 

    # Define dictionary to hold statistics	
    stat = {}

    # Get number of objects
    stat['N'] = str(len(objects))

    # Define list (float) of FWHM values
    fwhm = [ float(obj[7]) for obj in objects ]
 
    # Define list (float) of ELLIPTICITY values
    el = [ float(obj[6]) for obj in objects ]

    # Define list (float) of THETA_IMAGE values
    pa = [ float(obj[5]) for obj in objects ]

    # Define list (float) of 'Stella-like' values
    stella = [ float(obj[9]) for obj in objects ]	

    # Create a histogram of FWHM values of binsize 1 pixel
    hfwhm = stats.histogram(fwhm,40,[0,40])[0]
    
    stat['medianFWHM'] = "%.2f" % stats.median(fwhm)
    stat['meanFWHM']   = "%.2f" % stats.mean(fwhm)
    stat['modeFWHM']   = "%.2f" % float(hfwhm.index(max(hfwhm))+0.5)

    try:	
       stat['stdevFWHM']  = "%.2f" % stats.stdev(fwhm)
    except ZeroDivisionError:
       stat['stdevFWHM'] = '0.00';

    stat['medianEL'] = "%.2f" % stats.median(el)
    stat['meanEL']   = "%.2f" % stats.mean(el)

    try:
      stat['stdevEL']  = "%.2f" % stats.stdev(el)
    except ZeroDivisionError:
      stat['stdevEL']  = '0.00' 

    # Histogram of Ellipticity PA (-180 to 180, bins of 45 deg)
    #stat['histoTHETA'] = stats.histogram(pa,8,[-180,180])[0]

    # Histogram of Stellarity (0 to 1, bins of 0.05)
    #stat['histoStella']  = stats.histogram(stella,20,[0,1.01])[0]   

    return stat
Ejemplo n.º 2
0
  def statsex(self, objects):

    """
	Do some statistics on a source list
	Return dictionary
    """

    import stats, pstat
    
    # Return if we have no objects
    if len(objects) == 0:
      return 0	 

    # Define dictionary to hold statistics	
    stat = {}

    # Get number of objects
    stat['N'] = str(len(objects))

    # Define list (float) of FWHM values
    fwhm = [ float(obj[7]) for obj in objects ]
 
    # Define list (float) of ELLIPTICITY values
    el = [ float(obj[6]) for obj in objects ]

    # Define list (float) of THETA_IMAGE values
    pa = [ float(obj[5]) for obj in objects ]

    # Define list (float) of 'Stella-like' values
    stella = [ float(obj[9]) for obj in objects ]	

    # Create a histogram of FWHM values of binsize 1 pixel
    hfwhm = stats.histogram(fwhm,40,[0,40])[0]
    
    stat['medianFWHM'] = "%.2f" % stats.median(fwhm)
    stat['meanFWHM']   = "%.2f" % stats.mean(fwhm)
    stat['modeFWHM']   = "%.2f" % float(hfwhm.index(max(hfwhm))+0.5)

    try:	
       stat['stdevFWHM']  = "%.2f" % stats.stdev(fwhm)
    except ZeroDivisionError:
       stat['stdevFWHM'] = '0.00';

    stat['medianEL'] = "%.2f" % stats.median(el)
    stat['meanEL']   = "%.2f" % stats.mean(el)

    try:
      stat['stdevEL']  = "%.2f" % stats.stdev(el)
    except ZeroDivisionError:
      stat['stdevEL']  = '0.00' 

    # Histogram of Ellipticity PA (-180 to 180, bins of 45 deg)
    #stat['histoTHETA'] = stats.histogram(pa,8,[-180,180])[0]

    # Histogram of Stellarity (0 to 1, bins of 0.05)
    #stat['histoStella']  = stats.histogram(stella,20,[0,1.01])[0]   

    return stat
Ejemplo n.º 3
0
def corr(xdata, ydata):
    """corr(xydata) -> float
    corr(xdata, ydata) -> float

    Return the sample Pearson's Correlation Coefficient of (x,y) data.

    If ydata is None or not given, then xdata must be an iterable of (x, y)
    pairs. Otherwise, both xdata and ydata must be iterables of values, which
    will be truncated to the shorter of the two.

    >>> corr([(0.1, 2.3), (0.5, 2.7), (1.2, 3.1), (1.7, 2.9)])
    ... #doctest: +ELLIPSIS
    0.827429009335...

    The Pearson correlation is +1 in the case of a perfect positive
    correlation (i.e. an increasing linear relationship), -1 in the case of
    a perfect anti-correlation (i.e. a decreasing linear relationship), and
    some value between -1 and 1 in all other cases, indicating the degree
    of linear dependence between the variables.

    >>> xdata = [1, 2, 3, 4, 5, 6]
    >>> ydata = [2*x for x in xdata]  # Perfect correlation.
    >>> corr(xdata, ydata)
    1.0
    >>> corr(xdata, [5-y for y in ydata])  # Perfect anti-correlation.
    -1.0

    If there are not at least two data points, or if either all the x values
    or all the y values are equal, StatsError is raised.
    """
    n = len(xdata)
    assert n == len(ydata)
    if n < 2:
        raise StatsError(
            'correlation requires at least two data points, got %d' % n)
    # First pass is to determine the means.
    mx = stats.mean(xdata)
    my = stats.mean(ydata)
    # Second pass to determine the standard deviations.
    sx = stats.stdev(xdata, mx)
    sy = stats.stdev(ydata, my)
    if sx == 0:
        raise StatsError('all x values are equal')
    if sy == 0:
        raise StatsError('all y values are equal')
    # Third pass to calculate the correlation coefficient.
    ap = add_partial
    total = []
    for x, y in zip(xdata, ydata):
        term = ((x-mx)/sx) * ((y-my)/sy)
        ap(term, total)
    r = math.fsum(total)/(n-1)
    assert -1 <= r <= r
    return r
Ejemplo n.º 4
0
def corr(x, y):
    N = len(x)
    if len(y) != N:
        raise Exception(
            "Sequences must be of the same length. X length: {0} ; Y length {1}"
            .format(N, len(y)))
    else:
        sum = 0
        for index, xi in enumerate(x):
            sum += xi * y[index]
        r = (sum - N * stats.mean(x) * stats.mean(y)) / (
            (N - 1) * stats.stdev(x) * stats.stdev(y))
        return r
Ejemplo n.º 5
0
 def testVariance(self):
     data = [1, 2, 3]
     assert stats.mean(data) == 2
     self.assertEqual(stats.pvariance(data), 2/3)
     self.assertEqual(stats.variance(data), 1.0)
     self.assertEqual(stats.pstdev(data), math.sqrt(2/3))
     self.assertEqual(stats.stdev(data), 1.0)
Ejemplo n.º 6
0
def default_score_set(expression, primer_set, primer_locs, max_dist, bg_dist_mean):
    """Evaluate an expression using the provided values and a set of metrics.

    :returns: the score and the metrics used to calculate it
    """
    # Calculate various metrics
    binding_distances = stats.seq_diff(primer_locs)
    namespace = {
        'set_size': len(primer_set),
        'fg_dist_mean': stats.mean(binding_distances),
        'fg_dist_std': stats.stdev(binding_distances),
        'fg_dist_gini': stats.gini(binding_distances),
        'bg_dist_mean': bg_dist_mean,
        'fg_max_dist': max_dist,
        '__builtins__': None}
    permitted_var_str = ", ".join(
        [key for key in namespace.keys() if key is not "__builtins__"])
    score = None
    try:
        score = eval(expression, namespace, {'__builtins__': {}})
    except NameError as e:
        raise NameError(
            e.message +
            '. Permitted variables are %s. Refer to README or docs for help.'
            % permitted_var_str)
    del namespace['__builtins__']
    return score, namespace
Ejemplo n.º 7
0
def print_latex_stats(diffs, label):
	print '%s & %.3f & %.3f & %.3f & %.3f \\\\' % (
		label,
		min(diffs) / 1000.0,
		max(diffs) / 1000.0,
		stats.mean(diffs) / 1000.0,
		stats.stdev(diffs) / 1000.0)
Ejemplo n.º 8
0
def liver(request):
        expressLevelsNishi = naturallysortedexpressionlist(Expression.objects.filter(organ='Liver', experiment='Nishimura'))
        expressLevelsPMT = naturallysortedexpressionlist(Expression.objects.filter(organ='Liver', experiment__startswith='PMT Sample'))
        expressBiotroveTransporters = naturallysortedtransporterlist(Transporter.objects.filter(expression__organ='Liver',expression__experiment__startswith='PMT Biotrove').distinct())
	important = Transporter.objects.filter(organ__name='Liver')
	importantNames = []
	for x in important:
		importantNames.append(str(x.symbol))
	synquery = Transporter.objects.all()
	syns = {}
	for x in synquery:
		syns[x.symbol] = x.synonyms
#Calculate mean expression across all PMT samples
        pmtTableValues = []
        for x in range(len(expressLevelsPMT)/3):
                build = []
                for y in range(3):
                        build.append(expressLevelsPMT[x*3+y].value)
                avg = stats.mean(build)
                stdev = stats.stdev(build)
                id = expressLevelsPMT[x*3].trans
                pmtTableValues.append([id] + build + [avg, stdev])
#Calculate median and quartiles across biotrove samples
        biotroveTableValues = []
        for x in expressBiotroveTransporters:
                values = Expression.objects.filter(organ='Liver', experiment__startswith='PMT Biotrove', trans=x).values_list('value',flat='True').order_by('value')
                build = []
                build.append(x.symbol)
                build.append(quartiles(values,1))
                build.append(quartiles(values,2))
                build.append(quartiles(values,3))
                biotroveTableValues.append(build)
        return render_to_response('liver.html', {'expressionNishi': expressLevelsNishi, 'expressionPMT': pmtTableValues, 'organ': 'Liver', 'syns': syns, 'important': importantNames, 'expressionBiotrove': biotroveTableValues})
Ejemplo n.º 9
0
def print_latex_stats(diffs, label):
    print "%s & %.3f & %.3f & %.3f & %.3f \\\\" % (
        label,
        min(diffs) / 1000.0,
        max(diffs) / 1000.0,
        stats.mean(diffs) / 1000.0,
        stats.stdev(diffs) / 1000.0,
    )
Ejemplo n.º 10
0
 def describe(self):
     df = OrderedDict([("names", ["mean", "stdev", "count", "min", "max"])])
     if stats.is_numeric(self.x)==False:
         return
     df['value'] = [stats.mean(self.x), stats.stdev(self.x),
             len([i for i in self if i is not None]),
             min(self.x), max(self.x)]
     return DataFrame(df)
Ejemplo n.º 11
0
    def estimate(self, l):
        mu = stats.mean(l)
        self.sigma = stats.stdev(l) / 2
        self.mu0 = mu - self.sigma
        self.mu1 = mu + self.sigma
        self.p0 = self.p1 = 0.5

        return self.mu0, self.mu1, self.sigma, self.p0, self.p1
Ejemplo n.º 12
0
 def estimate(self,l):
     mu=stats.mean(l)
     self.sigma=stats.stdev(l)/2
     self.mu0=mu-self.sigma
     self.mu1=mu+self.sigma
     self.p0=self.p1=0.5
 
     return self.mu0, self.mu1, self.sigma, self.p0, self.p1
Ejemplo n.º 13
0
def scale(data_matrix):
    """returns the means and std dev of each col"""
    num_rows, num_cols = matrix.shape(data_matrix)
    means = [stats.mean(matrix.get_col(j,data_matrix)) for j in
             range(num_cols)]
    stdevs = [stats.stdev(matrix.get_col(j, data_matrix)) for j in
             range(num_cols)]

    return means, stdevs
Ejemplo n.º 14
0
def print_ascii_stats(diffs, label):
    print "%8d %8d %8d %8d %8d   %-20s" % (
        len(diffs),
        min(diffs),
        max(diffs),
        stats.mean(diffs),
        stats.stdev(diffs),
        label,
    )
Ejemplo n.º 15
0
	def log_normal_distribution(self):
	# take logs of sequence
	 log_sequence = []
	 for number in self.sequence:
		log_sequence.append(math.log(number,math.e))
	 mean= stats.lmean(log_sequence)
	 stdev = stats.stdev(log_sequence)
	 number_of_points = len(self.sequence)
	 distribution = log_sequence
	 for each_value in range(number_of_points):
	  distribution[each_value]=(distribution[each_value] - mean)/stdev	 
         return distribution
Ejemplo n.º 16
0
 def describe(self):
     df = OrderedDict([("names", ["mean", "stdev", "count", "min", "max"])])
     for k, v in self:
         if stats.is_numeric(v.x)==False:
             continue
         df[k] = [
             stats.mean(v.x),
             stats.stdev(v.x),
             len([i for i in v if i is not None]),
             v.min(),
             v.max()
         ]
     return DataFrame(df)
Ejemplo n.º 17
0
 def log_normal_distribution(self):
     # take logs of sequence
     log_sequence = []
     for number in self.sequence:
         log_sequence.append(math.log(number, math.e))
     mean = stats.lmean(log_sequence)
     stdev = stats.stdev(log_sequence)
     number_of_points = len(self.sequence)
     distribution = log_sequence
     for each_value in range(number_of_points):
         distribution[each_value] = (distribution[each_value] -
                                     mean) / stdev
     return distribution
Ejemplo n.º 18
0
def zNormal(f0s):
    """ 
    def zNormal(f0s):


    Input: list of (time,F0) tuples
    Output: list of z-normalized (time,F0) tuples
     """
    from stats import stdev # it's easiest this way
    nF0s = [f0 for (time,f0) in f0s] # destructure 
    mu = sum(nF0s)/len(nF0s) # get mean
    sigma = stdev(nF0s) # get s.d.
    return [(time,(f0-mu)/sigma) for (time,f0) in f0s] # apply normalization
Ejemplo n.º 19
0
	def __remove_outliers(self, lista):
		#if len(lista) < 2:
		if True:
			return lista
		else: 	
			from stats import mean, stdev
			#Preparamos listas y limites
			lmean = mean(lista)
			limstdev = 2 * stdev(lista)
			cleaned_list = []
			for item in lista:
				if abs(item - lmean) < limstdev:
					cleaned_list.append(item)
		#return cleaned_list
		return lista
Ejemplo n.º 20
0
def default_score_set(expression, primer_set, primer_locs, max_dist, bg_dist_mean):
    # Calculate various metrics
    binding_distances = seq_diff(primer_locs)
    namespace = {
        'set_size': len(primer_set),
        'fg_dist_mean': stats.mean(binding_distances),
        'fg_dist_std': stats.stdev(binding_distances),
        'fg_dist_gini': stats.gini(binding_distances),
        'bg_dist_mean': bg_dist_mean,
        'fg_max_dist': max_dist,
        '__builtins__': None}
    permitted_var_str = ", ".join([key for key in namespace.keys() if key is not "__builtins__"])
    score = None
    try:
        score = eval(expression, namespace, {'__builtins__': {}})
    except NameError as e:
        raise NameError(e.message + '. Permitted variables are %s. Refer to README or docs for help.' % permitted_var_str)
    del namespace['__builtins__']
#    print_primer_set(primer_set, [score, namespace], output_handle)
    return score, namespace
Ejemplo n.º 21
0
def get_avg_box_width():
    box_widths = []
    
    filename = './image/test_bi3.jpg'
    image = cvLoadImage(filename, CV_8UC1)
    storage = cvCreateMemStorage(0)
    input_image = cvCloneImage(image)
#    output_image = cvCloneImage(image)
    output_image = cvCreateImage(cvGetSize(input_image), 8, 3)
    cvCvtColor(input_image, output_image, CV_GRAY2BGR)
    count, contours = cvFindContours (input_image, storage, sizeof_CvContour, CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE, cvPoint (0,0))
    for contour in contours.hrange():
        bbox = cvBoundingRect(contour, 0)
        box_width = bbox.width
        if 100 > box_width > 10:
            box_widths.append(box_width)
#    return box_widths
    width_mean = mean(box_widths)
    width_lmean = lmean(box_widths)
    width_stdev = stdev(box_widths)
    width_lstdev = lstdev(box_widths)    
    return (width_mean,width_lmean,width_stdev,width_lstdev)
Ejemplo n.º 22
0
def get_avg_box_width():
    box_widths = []

    filename = './image/test_bi3.jpg'
    image = cvLoadImage(filename, CV_8UC1)
    storage = cvCreateMemStorage(0)
    input_image = cvCloneImage(image)
    #    output_image = cvCloneImage(image)
    output_image = cvCreateImage(cvGetSize(input_image), 8, 3)
    cvCvtColor(input_image, output_image, CV_GRAY2BGR)
    count, contours = cvFindContours(input_image, storage, sizeof_CvContour,
                                     CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE,
                                     cvPoint(0, 0))
    for contour in contours.hrange():
        bbox = cvBoundingRect(contour, 0)
        box_width = bbox.width
        if 100 > box_width > 10:
            box_widths.append(box_width)
#    return box_widths
    width_mean = mean(box_widths)
    width_lmean = lmean(box_widths)
    width_stdev = stdev(box_widths)
    width_lstdev = lstdev(box_widths)
    return (width_mean, width_lmean, width_stdev, width_lstdev)
Ejemplo n.º 23
0
for line in sys.stdin:
	if line.startswith('#'):
		continue

	(mp_id, thread_id, secs, ns) = [int(x) for x in line.split()]
	nanosecs = (secs*1000000000+ns)

	#print mp_id, thread_id, nanosecs

	if mp_id == mp_id_start:
		times[thread_id] = nanosecs
	elif mp_id == mp_id_stop and times.has_key(thread_id):
		differences.append(nanosecs - times[thread_id])
		del times[thread_id]


# print 'Values = %8d' % len(differences)
# print 'Min    = %8d' % min(differences)
# print 'Max    = %8d' % max(differences)
# print 'Mean   = %8d' % stats.mean(differences)
# print 'Stdev  = %8d' % stats.stdev(differences)

label = '#' + str(mp_id_start) + '-#' + str(mp_id_stop)

print ' Benchmark     Values      Min      Max     Mean    Stdev'
print '  %8s   %8d %8d %8d %8d %8d' % (
	label, len(differences), min(differences),
	max(differences), stats.mean(differences), stats.stdev(differences))

# EOF
Ejemplo n.º 24
0
for line in sys.stdin:
    if line.startswith('#'):
        continue

    (mp_id, thread_id, secs, ns) = [int(x) for x in line.split()]
    nanosecs = (secs * 1000000000 + ns)

    #print mp_id, thread_id, nanosecs

    if mp_id == mp_id_start:
        times[thread_id] = nanosecs
    elif mp_id == mp_id_stop and times.has_key(thread_id):
        differences.append(nanosecs - times[thread_id])
        del times[thread_id]

# print 'Values = %8d' % len(differences)
# print 'Min    = %8d' % min(differences)
# print 'Max    = %8d' % max(differences)
# print 'Mean   = %8d' % stats.mean(differences)
# print 'Stdev  = %8d' % stats.stdev(differences)

label = '#' + str(mp_id_start) + '-#' + str(mp_id_stop)

print ' Benchmark     Values      Min      Max     Mean    Stdev'
print '  %8s   %8d %8d %8d %8d %8d' % (
    label, len(differences), min(differences), max(differences),
    stats.mean(differences), stats.stdev(differences))

# EOF
Ejemplo n.º 25
0
 def test_stdev(self):
     """standard deviation is sqrt of variance"""
     self.assertEqual(0.0, stdev([2,2,2,2]))
     self.assertEqual(sqrt(2), stdev([1,2,3,4,5]))
Ejemplo n.º 26
0
        # remove the records with invalid responses
        df2 = df[(df['in_hospital']<=2) & (df['health_status']<=5)]

        health_h1 = df2['health_status'][df2['in_hospital']==1]
        health_h0 = df2['health_status'][df2['in_hospital']==2]

        # First, count number of records to make sure we match MHE. Should be
        # 07774 hospital
        # 90049 no hospital
        n = len(df2)
        n_h1 = len(health_h1)
        n_h0 = len(health_h0)
        mean_h1 = s.mean(health_h1)
        mean_h0 = s.mean(health_h0)
        stdev_h1 = s.stdev(health_h1)
        stdev_h0 = s.stdev(health_h0)
        sterr_h1 = s.sterrmean(stdev_h1, n_h1)
        sterr_h0 = s.sterrmean(stdev_h0, n_h0)

        # calculate two-sample t-test to test if means are significantly different
        tt = (mean_h1 - mean_h0)/np.sqrt((stdev_h1**2/float(n_h1)) + (stdev_h0**2/float(n_h0)))
        pval = stats.t.sf(np.abs(tt), n-1)*2 # two sided t-value, prob(abs(t))>tt
       
        # do the same using scipy.stats canned routine
        # unequal variance
        tt2 = float(stats.ttest_ind(health_h1, health_h0, equal_var=0)[0])
        pval2 = stats.t.sf(np.abs(tt2), n-1)*2 # two sided t-value, prob(abs(t))>tt

        print("Total Sample: {0}".format(n))
        print("Group\t\tSample Size\t\tMean\t\tStd. Err")
 def get_lowerbound( self, confidence_interval, lst ):
     return self.get_mean( lst ) - (self.get_z( confidence_interval ) * stats.stdev( lst ) / math.sqrt ( len( lst ) ) )
Ejemplo n.º 28
0
def print_ascii_stats(diffs, label):
	print '%8d %8d %8d %8d %8d   %-20s' % (
		len(diffs), min(diffs), max(diffs),
		stats.mean(diffs), stats.stdev(diffs), label)
Ejemplo n.º 29
0
print('\nCENTRAL TENDENCY')
print('geometricmean:',stats.geometricmean(l), stats.geometricmean(lf), stats.geometricmean(a), stats.geometricmean(af))
print('harmonicmean:',stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af))
print('mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af))
print('median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af))
print('medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af))
print('mode:',stats.mode(l),stats.mode(a))
print('\nMOMENTS')
print('moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af))
print('variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af))
print('skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af))
print('kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af))
print('mean:',stats.mean(a),stats.mean(af))
print('var:',stats.var(a),stats.var(af))
print('stdev:',stats.stdev(a),stats.stdev(af))
print('sem:',stats.sem(a),stats.sem(af))
print('describe:')
print(stats.describe(l))
print(stats.describe(lf))
print(stats.describe(a))
print(stats.describe(af))
print('\nFREQUENCY')
print('freqtable:')
print('itemfreq:')
print(stats.itemfreq(l))
print(stats.itemfreq(a))
print('scoreatpercentile:',stats.scoreatpercentile(l,40),stats.scoreatpercentile(lf,40),stats.scoreatpercentile(a,40),stats.scoreatpercentile(af,40))
print('percentileofscore:',stats.percentileofscore(l,12),stats.percentileofscore(lf,12),stats.percentileofscore(a,12),stats.percentileofscore(af,12))
print('histogram:',stats.histogram(l),stats.histogram(a))
print('cumfreq:')
Ejemplo n.º 30
0
		offset.append(teloff)

	writeLog(logpath,file,"FocusPyr: teloffset= %d" % teloff)  
	#print "FocusPyr: teloffset= %d distance=(%f,%f) (%f,%f) %s" % (teloff,xdist,ydist,x1,y1,o[11]) 	

	# Append to a list to be inserted into Objects table
	pyrobjects.append((teloff,xdist,ydist,x1,y1,o[11]))        


  if len(offset) > 0:

    # Determine mean, median and stdev of unclipped offsets
    mean  = stats.mean(offset) 
    median = stats.median(offset) 
    try:	
      stdev = stats.stdev(offset)
    except ZeroDivisionError:
      stdev = '0.00';

    # Do a 1-sigma clipping
    clipLowLimit  =  float(mean) - 1 * float(stdev)
    clipHighLimit =  float(mean) + 1 * float(stdev)
    offset = [off for off in offset
	if float(off) <= clipHighLimit and float(off) >= clipLowLimit ]	

    # Determine stats on sigma clipped data 
    h['meanFocusOffset']   = stats.mean(offset) 
    h['medianFocusOffset'] = stats.median(offset) 
    try:	
      h['stdevFocusOffset'] = stats.stdev(offset)
    except ZeroDivisionError:
Ejemplo n.º 31
0
	def __parse_data_file__(self, datafile, table):
		from stats import mean, stdev
		rt_list = []
		#Parseamos el archivo de datos y rellenamos tablas
		for line in datafile:
			campos = line.split()
	
			if campos.__len__() == 0 :	#Si la linea esta vacia 
				continue
			elif campos[0] == "\n":
				continue	
			elif campos[0] == "#?" : 	#Si es un texto de ayuda
				continue	
			elif campos[0] == "#!":		#Si empezamos el datablock
				continue		
			#Si no es ninguna de las anteriores, es un datablock
			trial = table.row
			trial['order'] 	= int(campos[0])
			trial['trial_type'] = trial_enum[campos[1]]
			trial['ctoa'] 	= float(campos[2])
			trial['ctd'] 	= float(campos[3])
			trial['rt'] 	= int(campos[4])	
			trial['train'] = False
			
			#Codificamos el tipo de trial: cued/uncued		
			if (campos[1] == 'LL' or campos[1] == 'RR'):
				trial['cueing'] = cueing_enum['cued']			
			elif (campos[1] == 'LR' or campos[1] == 'RL'):
				trial['cueing'] = cueing_enum['uncued']	
				
			#Codificamos la tecla pulsada			
			if (int(campos[5]) == 19):
				trial['key'] 	= key_enum['R']
			elif (int(campos[5]) == 20):
				trial['key'] 	= key_enum['L']
				
			#Codificamos la validez del ensayo
			#
			#	Respondiendo a la derecha debe:
			if   trial['key'] 			== key_enum['R'] 	\
			and 250 < trial['rt'] < 550						\
			and (trial['trial_type'] 	== trial_enum['LR'] \
			or   trial['trial_type'] 	== trial_enum['RR'] ):
				trial['valid'] = True
			#
			#	Respondiendo a la izquierda debe:
			elif trial['key'] 			== key_enum['L'] 	\
			and 300 < trial['rt'] < 450						\
			and  (trial['trial_type'] 	== trial_enum['RL'] \
			or    trial['trial_type'] 	== trial_enum['LL'] ):
				trial['valid'] = True
			#
			#	Si no, es inválido							
			else:
				trial['valid'] = False	
			
			#Ahora vamos a generar una lista con los rt válidos
			if	trial['valid'] == True:
				rt_list.append(trial['rt'])
				
			#Añadimos la fila y guardamos	
			trial.append()
			table.flush()
		#Fin del bucle, ahora calculamos los valores normalizados de rt
		rt_mean = mean(rt_list)
		rt_stdev= stdev(rt_list)	
		
		for trial in table.iterrows():
			trial['rt_norm'] = (trial['rt'] - rt_mean) / rt_stdev
			trial.update()		
		#Devuelve la lista de rt válidos para ser usada en promedios posteriores
		table.flush()
		return rt_list
Ejemplo n.º 32
0
	def __simple_stats__(self, group):
		from stats import mean, stdev, sterr, ttest_ind
	
		#Creamos la descripción de la tabla de analisis estadistico simple
		class DiscreteData(tables.IsDescription):
			ctoa 			= tables.Float32Col()
			ctd 			= tables.Float32Col()
			mean			= tables.Float32Col()
			stdev			= tables.Float32Col()
			frequency		= tables.UInt16Col()
		
		#Creamos la tabla para estadistica y un puntero a la de datos en bruto	
		table_cued = self.h5file.createTable(group, 'discrete_data_cued', DiscreteData, "Discrete organization of cued IOR continous data")
		table_uncued = self.h5file.createTable(group, 'discrete_data_uncued', DiscreteData, "Discrete organization of uncued IOR continous data")
		table_raw = group.rawdata
		
		#Creamos un array para contener los valores y plotearlos con matrixplot3D
		self.cued_array = zeros((self.ctoa_size, self.ctd_size))
		self.uncued_array = zeros((self.ctoa_size, self.ctd_size))
		self.rt_diff = zeros((self.ctoa_size, self.ctd_size))
		
		# Ahora, debería generamos intervalos para las clases en los datos
		# y hacer estadística con esas clases
		ctoa_index= -1
		for ctoa in self.ctoa_range:
			ctoa_index += 1
			ctoa_min = ctoa - self.ctoa_bean_width / 2
			ctoa_max = ctoa + self.ctoa_bean_width / 2
			ctd_index = -1
			for ctd in self.ctd_range:
				ctd_index += 1
				ctd_min = ctd - self.ctd_bean_width / 2
				ctd_max = ctd + self.ctd_bean_width / 2
				cued_lst_tmp = [ trial_raw['rt'] for trial_raw in table_raw.iterrows()
								if  ctoa_min < trial_raw['ctoa'] < ctoa_max		\
								and ctd_min  < trial_raw['ctd']  < ctd_max 		\
								and trial_raw['cueing'] == cueing_enum['cued'] 	\
								and trial_raw['valid'] 	== True 				]
				
				uncued_lst_tmp = [ trial_raw['rt'] for trial_raw in table_raw.iterrows()
								if  ctoa_min < trial_raw['ctoa'] < ctoa_max		\
								and ctd_min  < trial_raw['ctd']  < ctd_max 		\
								and trial_raw['cueing'] == cueing_enum['uncued']\
								and trial_raw['valid'] 	== True 				]
				cued_lst = self.__remove_outliers(cued_lst_tmp)
				uncued_lst = self.__remove_outliers(uncued_lst_tmp)
				
				cued = table_cued.row
				cued['ctoa'] 		= ctoa
				cued['ctd'] 		= ctd
				cued['frequency']	= len(cued_lst) 
				if cued['frequency'] == 0:
					cued['mean'] 	= 0
					cued['stdev'] 	= 0
				elif cued['frequency'] == 1:
					cued['mean'] 	= mean(cued_lst)
					cued['stdev'] 	= 0
				else:
					cued['mean'] 	= mean(cued_lst)
					cued['stdev'] 	= stdev(cued_lst)	
				
				uncued = table_uncued.row
				uncued['ctoa'] 		= ctoa
				uncued['ctd'] 		= ctd
				uncued['frequency']	= len(uncued_lst) 
				if uncued['frequency'] == 0:
					uncued['mean'] 	= 0
					uncued['stdev'] = 0
				elif uncued['frequency'] == 1:
					uncued['mean'] 	= mean(uncued_lst)
					uncued['stdev'] = 0
				else:	
					uncued['mean'] 	= mean(uncued_lst)
					uncued['stdev'] = stdev(uncued_lst)
								 	
				self.cued_array[ctoa_index][ctd_index] 		= cued['mean']
				self.uncued_array[ctoa_index][ctd_index] 	= uncued['mean']
				self.rt_diff[ctoa_index][ctd_index] 		= cued['mean'] - uncued['mean']
				cued.append()
				uncued.append()
				table_cued.flush()
				table_uncued.flush()
Ejemplo n.º 33
0
directDelays = [centralizeDelay((sampleDelay+removedSamples)*c/samplingRate, x, y, z, earDistance)
	for x,y,z,sampleDelay, azimuth, elevation in sampleDelays 
		if elevation<math.radians(aboveCutoff) and elevation>math.radians(belowCutoff) and
		azimuth>math.radians(190) and azimuth<math.radians(260) ]

for x,y,z,sampleDelay, azimuth, elevation in sampleDelays  :
	if elevation<math.radians(aboveCutoff) and elevation>math.radians(belowCutoff) and \
	azimuth>math.radians(190) and azimuth<math.radians(260) :
		if centralizeDelay((sampleDelay+removedSamples)*c/samplingRate, x, y, z, earDistance) < 1.46:
			print math.degrees(azimuth), math.degrees(elevation)
		if centralizeDelay((sampleDelay+removedSamples)*c/samplingRate, x, y, z, earDistance) > 1.50:
			print math.degrees(azimuth), math.degrees(elevation)

import stats
print "Direct delays stdev:", stats.stdev(directDelays)
print "Direct delays mean:", stats.mean(directDelays)

import pylab
pylab.grid()
pylab.hist(directDelays,bins=7)
pylab.show()

nominalDistance = 1.4
meanDelay = sum((sampleDelay for x,y,z,sampleDelay,_,_ in sampleDelays))*c/samplingRate/len(sampleDelays)
for x,y,z,sampleDelay,azimuth,elevation in sampleDelays :
	distanceDelay = sampleDelay * c / samplingRate - meanDelay + nominalDistance
	centralizedDelay = centralizeDelay(distanceDelay, x, y, z, earDistance)
	deviations.append(sampleDelay*c/samplingRate)

	if (abs(sampleDelay-41)>13) : 
Ejemplo n.º 34
0
	def __load_data__(self):
		"""
			Funcion que lee los datos de salida del programa C  
		"""
		from stats import mean, stdev	
		#Creamos la descripción de la tabla de entrada de datos sin procesar
		class Trial(tables.IsDescription):
			trial_type = tables.EnumCol(trial_enum,'LL', base='uint8')
			key 	= tables.EnumCol(key_enum, 'L', base='uint8')
			cueing  = tables.EnumCol(cueing_enum,'cued', base='uint8')
			ctoa 	= tables.Float32Col()
			ctd 	= tables.Float32Col()
			valid	= tables.UInt8Col()
			rt 		= tables.Float32Col()
			rt_norm = tables.Float32Col()
			rt_corr = tables.Float32Col()
			index 	= tables.UInt16Col()
			order 	= tables.UInt16Col()
			train	= tables.UInt8Col()
			
		#Creamos el archivo h5	
		h5file = tables.openFile(self.h5filename, mode = "w", title = "IOR experiment results")
		
		#Generamos la lista de ficheros de datos en crudo que serán procesado
		rawdata_filelist = [f for f in os.listdir(self.rawdata_path)
									if os.path.isfile(os.path.join(self.rawdata_path, f)) \
									and f.find(self.rawdata_str) == 0 ]

		#Para cada sujeto, generamos un grupo de datos
		
		for rawdata_filename in rawdata_filelist:	
			#Creamos los grupos y tablas necesarios
			datafile = open(os.path.join(self.rawdata_path,rawdata_filename), "r")
			group = h5file.createGroup("/", \
									'Data'+rawdata_filename[len(self.rawdata_str):],\
									'Data from '+rawdata_filename[len(self.rawdata_str):])
			table = h5file.createTable(group, 'rawdata', Trial, "Raw data from IOR continous experiment")
			
			#Los rellenamos por medio del metodo privado __parse_data_file__()
			rt_tmp = self.__parse_data_file__(datafile, table)
			self.rt.extend(rt_tmp)
			datafile.close()
		
		#Aqui creo la tabla de datos raw combinada de todos los sujetos
		groupeto = h5file.createGroup("/", 'DataCombined', "Combined data from all subjects of IOR experiment")
		table = h5file.createTable(groupeto, 'rawdata', Trial, "Raw data from IOR experiment")

		for group in h5file.walkGroups():
			if group._v_name == '/' or group._v_name == 'DataCombined' : continue		
			temp_table = group.rawdata[:]
			table.append(temp_table)
			table.flush()
		#Calculamos la media y stdev del conjunto total de datos, para reescalar
		rt_mean = mean(self.rt)
		rt_stdev= stdev(self.rt)
		for trial in table.iterrows():
			#if trial['valid'] == True:
			trial['rt'] = trial['rt_norm']
			trial.update()
		table.flush()
				#trial['rt'] = trial['rt_norm'] * rt_stdev + rt_mean
				#Chapuzilla para que chute rapido
		#Devuelve el archivo h5 creado 		
		return h5file
Ejemplo n.º 35
0
print(stats.relfreq(lf))
print(stats.relfreq(l))
print(stats.relfreq(lf))

print('\nVARIATION')
print('obrientransform:')

l = [float(f) for f in list(range(1,21))]
ll = [l]*5

print(stats.obrientransform(l,l,l,l,l))

print('samplevar:',stats.samplevar(l),stats.samplevar(l))
print('samplestdev:',stats.samplestdev(l),stats.samplestdev(l))
print('var:',stats.var(l),stats.var(l))
print('stdev:',stats.stdev(l),stats.stdev(l))
print('sterr:',stats.sterr(l),stats.sterr(l))
print('sem:',stats.sem(l),stats.sem(l))
print('z:',stats.z(l,4),stats.z(l,4))
print('zs:')
print(stats.zs(l))
print(stats.zs(l))

print('\nTRIMMING')
print('trimboth:')
print(stats.trimboth(l,.2))
print(stats.trimboth(lf,.2))
print(stats.trimboth(l,.2))
print(stats.trimboth(lf,.2))
print('trim1:')
print(stats.trim1(l,.2))
Ejemplo n.º 36
0
# print 'Values = %8d' % len(differences)
# print 'Min    = %8d' % min(differences)
# print 'Max    = %8d' % max(differences)
# print 'Mean   = %8d' % stats.mean(differences)
# print 'Stdev  = %8d' % stats.stdev(differences)

# label = '#' + str(mp_id_start) + '-#' + str(mp_id_stop)

# print '\hline'
# print 'Benchmark & Min & Max & Mean & StdDev \\\\'
# print '\hline'
# print '\hline'

# no result for these meassuring points
if len(differences) == 0:
	sys.exit(0)

line = '%s & %.3f & %.3f & %.3f & %.3f \\\\' % (
	label,
	min(differences) / 1000.0,
	max(differences) / 1000.0,
	stats.mean(differences) / 1000.0,
	stats.stdev(differences) / 1000.0)

line = line.replace('.', ',')

print line

# EOF
Ejemplo n.º 37
0
    a), stats.median(af)
print 'medianscore:', stats.medianscore(l), stats.medianscore(
    lf), stats.medianscore(a), stats.medianscore(af)
print 'mode:', stats.mode(l), stats.mode(a)

print '\nMOMENTS'
print 'moment:', stats.moment(l), stats.moment(lf), stats.moment(
    a), stats.moment(af)
print 'variation:', stats.variation(l), stats.variation(a), stats.variation(
    lf), stats.variation(af)
print 'skew:', stats.skew(l), stats.skew(lf), stats.skew(a), stats.skew(af)
print 'kurtosis:', stats.kurtosis(l), stats.kurtosis(lf), stats.kurtosis(
    a), stats.kurtosis(af)
print 'mean:', stats.mean(a), stats.mean(af)
print 'var:', stats.var(a), stats.var(af)
print 'stdev:', stats.stdev(a), stats.stdev(af)
print 'sem:', stats.sem(a), stats.sem(af)
print 'describe:'
print stats.describe(l)
print stats.describe(lf)
print stats.describe(a)
print stats.describe(af)

print '\nFREQUENCY'
print 'freqtable:'
print 'itemfreq:'
print stats.itemfreq(l)
print stats.itemfreq(a)
print 'scoreatpercentile:', stats.scoreatpercentile(
    l, 40), stats.scoreatpercentile(lf, 40), stats.scoreatpercentile(
        a, 40), stats.scoreatpercentile(af, 40)
Ejemplo n.º 38
0
def least_squares_fit(x, y):
    """x와 y가 학습 데이터로 주어졌을 때
	오류의 제곱 값을 최소화해 주는 알파와 베타를 계산"""
    beta = corr(x, y) * stdev(y) / stdev(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta
Ejemplo n.º 39
0
print '\nVARIATION'
print 'obrientransform:'

l = range(1,21)
a = N.array(l)
ll = [l]*5
aa = N.array(ll)

print stats.obrientransform(l,l,l,l,l)
print stats.obrientransform(a,a,a,a,a)

print 'samplevar:',stats.samplevar(l),stats.samplevar(a)
print 'samplestdev:',stats.samplestdev(l),stats.samplestdev(a)
print 'var:',stats.var(l),stats.var(a)
print 'stdev:',stats.stdev(l),stats.stdev(a)
print 'sterr:',stats.sterr(l),stats.sterr(a)
print 'sem:',stats.sem(l),stats.sem(a)
print 'z:',stats.z(l,4),stats.z(a,4)
print 'zs:'
print stats.zs(l)
print stats.zs(a)

print '\nTRIMMING'
print 'trimboth:'
print stats.trimboth(l,.2)
print stats.trimboth(lf,.2)
print stats.trimboth(a,.2)
print stats.trimboth(af,.2)
print 'trim1:'
print stats.trim1(l,.2)
Ejemplo n.º 40
0
		offset.append(teloff)
		offset.append(teloff)
	  else:
		offset.append(teloff)
        else:
          offset.append(teloff) 

	#writeLog(logpath,file,"FocusPyr: teloffset= %d" % offset)  
	#print "FocusPyr: teloffset= %d distance=%f (%f,%f)" % (teloff,distance,x1,y1) 	

      if len(offset) > 0:
        # Determine mean, median and stdev of unclipped offsets
        mean  = stats.mean(offset) 
        median = stats.median(offset) 
        try:	
          stdev = stats.stdev(offset)
        except ZeroDivisionError:
          stdev = '0.00';

        # Do a 1-sigma clipping
        clipLowLimit  =  float(mean) - 1 * float(stdev)
        clipHighLimit =  float(mean) + 1 * float(stdev)
        offset = [off for off in offset
		if float(off) < clipHighLimit and float(off) > clipLowLimit ]	

        # Determine stats on sigma clipped data 
        mean_c  = stats.mean(offset) 
        median_c = stats.median(offset) 
        try:	
          stdev_c = stats.stdev(offset)
        except ZeroDivisionError:
Ejemplo n.º 41
0
def calculateStats(scores, nullvalues):
	"""Calculate statistics based on actual DEAP scores and null value scores
	Args:
		scores: a list of pairs of floats representing expression scores
		nullvalues: a list of lists of pairs of floats representing null expression scores
	Returns:
		A triplet of StatOutput objects (minimum, maximum, absolute)
	"""
	minvalues=[]
	maxvalues=[]
	absvalues=[]
	for pairs in nullvalues:
		minList=[]
		maxList=[]
		for pair in pairs:
			minList.append(pair[0])
			maxList.append(pair[1])
		minVal=stats.mean(minList)
		maxVal=stats.mean(maxList)
		minvalues.append(minVal)
		maxvalues.append(maxVal)
		absvalues.append(max(maxVal,abs(minVal)))

	maxmean=stats.mean(maxvalues)
	maxstdev=stats.stdev(maxvalues)
	minmean=stats.mean(minvalues)
	minstdev=stats.stdev(minvalues)
	absmean=stats.mean(absvalues)
	absstdev=stats.stdev(absvalues)

	realMaxVals=[]
	realMinVals=[]
	realMaxPathSubset=''
	realMinPathSubset=''
	for score in scores:
		realMaxVals.append(score[1])
		realMinVals.append(score[0])
		realMaxPathSubset=score[3]
		realMinPathSubset=score[2]
	maxVal=stats.mean(realMaxVals)
	minVal=stats.mean(realMinVals)
	maxCount=0
	for val in maxvalues:
		if val>=maxVal:
			maxCount+=1
	maxPval=float(maxCount)/float(len(maxvalues))
	maxStats=StatOutput(mean=maxmean,stdev=maxstdev,curval=maxVal,qval=maxPval)
	minCount=0
	for val in minvalues:
		if val<=minVal:
			minCount+=1
	minPval=float(minCount)/float(len(minvalues))
	minStats=StatOutput(mean=minmean,stdev=minstdev,curval=minVal,qval=minPval)
	absVal=max(maxVal,abs(minVal))
	absPathSubset=realMinPathSubset
	if maxVal==absVal:
		absPathSubset=realMaxPathSubset
	absCount=0
	for i in range(0,len(maxvalues)):
		if max(maxvalues[i],abs(minvalues[i]))>=absVal:
			absCount+=1
	absPval=float(absCount)/float(len(maxvalues))
	absStats=StatOutput(mean=absmean,stdev=absstdev,curval=absVal,qval=absPval,pathSubset=absPathSubset)
	return [minStats,maxStats,absStats]
 def get_lowerbound(self, confidence_interval, lst):
     return self.get_mean(lst) - (self.get_z(confidence_interval) *
                                  stats.stdev(lst) / math.sqrt(len(lst)))
Ejemplo n.º 43
0
 def test_stdev(self):
     """Standard Deviation is sqrt of variance"""
     self.assertEqual(0.0, stdev([2, 2, 2, 2]))
     self.assertEqual(0.5, stdev([1, 1, 2, 2]))
Ejemplo n.º 44
0
                        offset.append(teloff)
                        offset.append(teloff)
                    else:
                        offset.append(teloff)
                else:
                    offset.append(teloff)

                #writeLog(logpath,file,"FocusPyr: teloffset= %d" % offset)
                #print "FocusPyr: teloffset= %d distance=%f (%f,%f)" % (teloff,distance,x1,y1)

            if len(offset) > 0:
                # Determine mean, median and stdev of unclipped offsets
                mean = stats.mean(offset)
                median = stats.median(offset)
                try:
                    stdev = stats.stdev(offset)
                except ZeroDivisionError:
                    stdev = '0.00'

                # Do a 1-sigma clipping
                clipLowLimit = float(mean) - 1 * float(stdev)
                clipHighLimit = float(mean) + 1 * float(stdev)
                offset = [
                    off for off in offset
                    if float(off) < clipHighLimit and float(off) > clipLowLimit
                ]

                # Determine stats on sigma clipped data
                mean_c = stats.mean(offset)
                median_c = stats.median(offset)
                try: