Ejemplo n.º 1
0
def findPeakMax(data, max_ev, min_ev, peak_number, spectra):
    columns = [
        'Field', spectra + '_' + peak_number + '_amp_max',
        spectra + '_' + peak_number + '_amp_min'
    ]
    amplitudePair = []
    for col in data:
        if 'deltaA_diff' in col or 'absorption' in col or 'absorbance' in col:
            field = col.split("_")[0]
            if field is not '0':
                xdata = data.loc[
                    data['energy'].between(min_ev, max_ev, inclusive='both'),
                    'energy'].values
                ydata = data.loc[
                    data['energy'].between(min_ev, max_ev, inclusive='both'),
                    col].values
                try:
                    if np.mode(ydata) > 0:
                        ydataAmplitude = np.max(ydata)
                    elif np.mode(ydata) < 0:
                        ydataAmplitude = np.min(ydata)
                except ValueError:
                    print(
                        'Error! Zero-size array possible. Perhaps peak limits are set incorrectly?'
                    )
                amplitudePair.append([int(field), ydataAmplitude])
    amplitudeData = pd.DataFrame(amplitudePair, columns=columns)
    return amplitudeData
Ejemplo n.º 2
0
    def fit( self, X, y = None ):
        X.host_response_rate = X.host_response_rate.str.replace('%', '').astype(float)
        X.host_acceptance_rate = X.host_acceptance_rate.str.replace('%', '').astype(float)
        
        #Si hay valores infinitos los convertimos en NaN
        X = X.replace( [ np.inf, -np.inf ], np.nan )
        
        for col in X.columns:
            if col=='number_of_reviews_ltm':
                default_value=0
            elif col=='number_of_reviews':
                default_value=0
            elif col=='host_listings_count':
                default_value=1
            elif self._default_strategy=='median':
                default_value=np.median(X[col].dropna())
            elif self._default_strategy=='mode':
                default_value=np.mode(X[col].dropna())
            elif self._default_strategy=='mean':
                default_value=np.mean(X[col].dropna())
            else:
                default_value=np.median(X[col].dropna())
            self._default_values[col]=default_value

        return self 
Ejemplo n.º 3
0
def extract_data(X,
                 Y=None,
                 Y_on=True,
                 featureScaling=None,
                 avgNormalisation=None):
    m, n = X.shape[0], X.shape[1]
    if Y_on:
        K = Y.shape[1]
    else:
        K = None

    if avgNormalisation == 'Mean':
        avgs = np.mean(X, axis=0)
    elif avgNormalisation == 'Median':
        avgs = np.median(X, axis=0)
    elif avgNormalisation == 'Mode':
        avgs = np.mode(X, axis=0)
    else:
        avgs = np.zeros(X.shape[1])

    if featureScaling == 'Range':
        scales = X.ptp(axis=0)
    elif featureScaling == 'Standard Deviation':
        scales = X.std(axis=0)
    elif featureScaling == 'Variance':
        scales = X.var(axis=0)
    else:
        scales = np.ones(X.shape[1])

    scales = np.where(scales != 0, scales, 1)
    return X, Y, m, n, K, avgs, scales
Ejemplo n.º 4
0
def _merge_values(values, strategy='list'):
    """
    Function used by merge_dataframes_by_smiles. Returns a summary of the values in 'values', unless
    'strategy' == 'list', in which case it returns values itself.
    """
    try:
        values.remove('')
    except:
        values = values

    if values is None:
        val = float('NaN')
    elif strategy == 'list':
        val = values
    elif strategy == 'uniquelist':
        val = list(set(values))
    elif strategy == 'mean':
        val = np.mean(values)
    elif strategy == 'geomean':
        val = np.geomean(values)
    elif strategy == 'median':
        val = np.median(values)
    elif strategy == 'mode':
        val = np.mode(values)
    elif strategy == 'max':
        val = max(values)
    elif strategy == 'min':
        val = min(values)
    else:
        raise Exception('Unknown column merge strategy: %s', columnmerge)

    if type(val) is list and len(val) == 1:
        val = val[0]
    return val
Ejemplo n.º 5
0
    def sgd_calc(self, x_datas, cost, consts, X, model_params, hyper_params,
                 rng):
        n_iters = hyper_params['n_iters']
        learning_rate = hyper_params['learning_rate']
        minibatch_size = hyper_params['minibatch_size']
        n_mod_history = hyper_params['n_mod_history']
        calc_history = hyper_params['calc_history']

        gparams = T.grad(cost=cost,
                         wrt=model_params_list,
                         consider_constant=consts)
        updates = [(param, param - learning_rate * gparam)
                   for param, gparam in zip(model_params, gparams)]

        train = theano.function(inputs=[X], outputs=cost, updates=updates)

        validate = theano.function(inputs=[X], outputs=cost)

        n_samples = x_datas.shape[0]
        cost_history = []

        for i in xrange(n_iters):
            ixs = rng.permutation(n_samples)[:minibatch_size]
            minibatch_cost = train(x_datas[ixs])

            if np.mode(i, n_mod_history) == 0:
                print '%d epoch error: %f' % (i, minibatch_cost)
                if calc_history == 'minibatch':
                    cost_history.append((i, minibatch_cost))
                else:
                    cost_history.append((i, validate(x_datas[ixs])))
        return cost_history
Ejemplo n.º 6
0
def merge_values(values,strategy='list'):
    try:
        values.remove('')
    except:
        values = values
                    
    if values is None:
        val = float('NaN')
    elif strategy == 'list':
        val = values
    elif strategy == 'uniquelist':
        val = list(set(values))
    elif strategy == 'mean':
        val = np.mean(values)
    elif strategy == 'geomean':
        val = np.geomean(values)
    elif strategy == 'median':
        val = np.median(values)
    elif strategy == 'mode':
        val = np.mode(values)
    elif strategy == 'max':
        val = max(values)
    elif strategy == 'min':
        val = min(values)
    else:
        raise Exception('Unknown column merge strategy: %s', columnmerge)
    
    if type(val) is list and len(val) == 1:
        val = val[0]
    return val
Ejemplo n.º 7
0
def plot_scores(workout_num, results, division):
    data1 = np.array([ath['scores'][workout_num - 1]
                      for ath in results]).astype(np.float)
    data1 = data1[~np.isnan(data1)]
    # reject extreme outliers 5*std_dev
    data1 = data1[np.abs(data1 - mu) < 4 * sigma]
    mu = np.mean(data1)
    sigma = np.std(data1)
    median = np.median(data1)
    mode = np.mode(data1)
    print(mu)
    print(sigma)
    print(median)
    print(mode)
    print(len(data1))
    # get best fit for data
    lower = mu - 4 * sigma
    upper = mu + 4 * sigma
    x = np.linspace(lower, upper, 1000)
    pdf = 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(x - mu)**2 /
                                                    (2 * sigma**2))
    hist, edges = np.histogram(data1, density=True, bins=100)
    p1 = figure(title='18.{} {}'.format(workout_num, division),
                background_fill_color='#E8DDCB')
    p1.quad(top=hist,
            bottom=0,
            left=edges[:-1],
            right=edges[1:],
            fill_color="#036564",
            line_color="#033649")
    p1.line(x, pdf, line_color="#D95B43", line_width=8, alpha=0.7)
    output_file('18.{}_{}.html'.format(workout_num, division))
    show(p1)
Ejemplo n.º 8
0
 def basicAnalysis(results):
     mean = np.mean(results)
     median = np.median(results)
     mode = np.mode(results)
     maxv = max(results)
     minv = min(results)
     hist = plt.hist(results, bins=maxv - minv + 1)
     return mean, median, mode, hist
Ejemplo n.º 9
0
def get_color_for(colors, mode):
    if mode == MEAN:
        return np.average(colors)
    elif mode == MEDIAN:
        return np.median(colors)
    elif mode == MODE:
        return np.mode(colors)
    return 255
Ejemplo n.º 10
0
def row_stats(lead_cols, pass_prefix, query_stats, source):
    for line in source:
        data = []
        stats = []

        if pass_prefix is not None and line.startswith(pass_prefix):
            print line.rstrip()
            continue
        else:
            v = line.rstrip().split('\t')
            for i in xrange(lead_cols, len(v)):
                # print v[i]
                try:
                    data.append(float(v[i]))
                except ValueError:
                    continue

            if len(data) == 0:
                for q in query_stats:
                    if q == 'count':
                        s = len(data)
                    else:
                        s = 'NA'
                    stats.append(s)
            else:
                for q in query_stats:
                    if q == 'mean':
                        s = np.mean(data)
                    elif q == 'median':
                        s = np.median(data)
                    elif q == 'mode':
                        s = np.mode(data)
                    elif q == 'min':
                        s = min(data)
                    elif q == 'max':
                        s = max(data)
                    elif q == 'sum':
                        s = np.sum(data)
                    elif q == 'product':
                        s = np.prod(data)
                    elif q == 'count':
                        s = len(data)
                    stats.append(s)

            print '\t'.join(v[x]
                            for x in xrange(lead_cols)) + '\t' + '\t'.join(
                                map(str, stats))

    source.close()
    return
Ejemplo n.º 11
0
def row_stats(lead_cols, pass_prefix, query_stats, source):
    for line in source:
        data = []
        stats = []

        if pass_prefix is not None and  line.startswith(pass_prefix):
            print line.rstrip()
            continue
        else:
            v = line.rstrip().split('\t')
            for i in xrange(lead_cols, len(v)):
                # print v[i]
                try:
                    data.append(float(v[i]))
                except ValueError:
                    continue

            if len(data) == 0:
                for q in query_stats:
                    if q == 'count':
                        s = len(data)
                    else:
                        s = 'NA'
                    stats.append(s)
            else:
                for q in query_stats:
                    if q == 'mean':
                        s = np.mean(data)
                    elif q == 'median':
                        s = np.median(data)
                    elif q == 'mode':
                        s = np.mode(data)
                    elif q == 'min':
                        s = min(data)
                    elif q == 'max':
                        s = max(data)
                    elif q == 'sum':
                        s = np.sum(data)
                    elif q == 'product':
                        s = np.prod(data)
                    elif q == 'count':
                        s = len(data)
                    stats.append(s)
        
            print '\t'.join(v[x] for x in xrange(lead_cols)) + '\t' + '\t'.join(map(str, stats))

    source.close()
    return
Ejemplo n.º 12
0
def summary_info(post_sample, lim=5):
	#post_sample = solar_params
	#lim=5
	"""
	Input extracted STAN model sample: post_sample
	lim - confidence interval ie lim = 5 => 95 central posterior interval
	"""

	params = []
	means = []
	medians = []
	modes = []
	low_ci = []
	high_ci = []
	for i in post_sample:
		param_sim = post_sample[i]
		if (param_sim.shape[0] == param_sim.size):
			params.append(i)
			means.append(np.mean(param_sim))
			medians.append(np.median(param_sim))
			n_samples = len(param_sim)
			n_tail = n_samples*(lim/100)/2
			param_sim=np.sort(param_sim)
			low_ci.append(param_sim[n_tail])
			high_ci.append(param_sim[n_samples-n_tail])
		else:
			for par_case in param_sim.T:
				params.append(i)
				means.append(np.mean(par_case))
				medians.append(np.median(par_case))
				modes.append(np.mode(parcase))
				n_samples = len(par_case)
				n_tail = n_samples*(lim/100)/2
				par_case=np.sort(par_case)
				low_ci.append(par_case[n_tail])
				high_ci.append(par_case[n_samples-n_tail])
	output = {"params":params,
		"means" : means,
		"medians" : medians,
		"low_ci" : low_ci,
		"high_ci" : high_ci}
	output = pd.DataFrame(output)
	return(output)
Ejemplo n.º 13
0
    def sgd_calc(self, x_datas, cost, consts, X, model_params, hyper_params, rng):
        n_iters = hyper_params['n_iters']
        learning_rate = hyper_params['learning_rate']
        minibatch_size = hyper_params['minibatch_size']
        n_mod_history = hyper_params['n_mod_history']
        calc_history = hyper_params['calc_history']

        gparams = T.grad(
            cost=cost,
            wrt=model_params_list,
            consider_constant=consts
        )
        updates = [(param, param - learning_rate * gparam)
                    for param, gparam in zip(model_params, gparams)]

        train = theano.function(
            inputs=[X],
            outputs=cost,
            updates=updates
        )

        validate = theano.function(
            inputs=[X],
            outputs=cost
        )

        n_samples = x_datas.shape[0]
        cost_history = []

        for i in xrange(n_iters):
            ixs = rng.permutation(n_samples)[:minibatch_size]
            minibatch_cost = train(x_datas[ixs])

            if np.mode(i, n_mod_history) == 0:
                print '%d epoch error: %f' % (i, minibatch_cost)
                if calc_history == 'minibatch':
                    cost_history.append((i, minibatch_cost))
                else:
                    cost_history.append((i, validate(x_datas[ixs])))
        return cost_history
Ejemplo n.º 14
0
    def check_missing(self):

        #print(self.df.isnull().sum())

        #drop_col here are OrderID & WorkID
        drop_col = self.df.loc[:,
                               self.df.isnull().sum() == len(self.df)].columns
        self.df.drop(drop_col, axis=1, inplace=True)

        flo, boo = self.get_flo_boo_list()

        if self.df.isnull().sum().max() > 0.05 * len(self.df):

            self.df = self.df.dropna()

        else:
            self.df[flo].fillna(lambda x: np.mean(x))
            self.df[boo].fillna(lambda x: np.mode(x))

        print('Shape after dealing with NA values is', self.df.shape)

        return self.df
Ejemplo n.º 15
0
def Mode(x):
    """
    Compute the statistical mode

    :Parameters:
     - `x`: a (non-empty) numeric vector of data values

    :Types:
     - `x`: float list

    :returns: the mode 
    :returntype: float list

    :attention:  x cannot be empty
    """

    res = numpy.mode(x)
    mode = list(res[0])
    count = list(res[1])

    data = {'modal value': mode, 'counts': count}
    return data
Ejemplo n.º 16
0
def Mode( x ):
    """
    Compute the statistical mode

    :Parameters:
     - `x`: a (non-empty) numeric vector of data values

    :Types:
     - `x`: float list

    :returns: the mode 
    :returntype: float list

    :attention:  x cannot be empty
    """

    res = numpy.mode(x)
    mode = list(res[0])
    count = list(res[1])

    data = {'modal value': mode, 'counts': count}
    return data
import numpy as np
N = input()
nums = np.array(map(int, raw_input().split()))
print nums
print nums.mean()
print np.median(nums)
print np.mode(nums)

Ejemplo n.º 18
0
def data_split(x, y=None, gap_length=3, data_length=10, av_diff=False, return_longest=False, verbose=True):
    """Split data at gaps where difference between x data points in much greater than the average/modal difference
    Return """
    i = np.arange(len(x))
    ## Find the average distace between the x data
    diff = np.diff(x)               # differences between adjacent data
    av_gap = np.mode(diff) if not av_diff else np.average(diff)       # average/modal separation
    ## Get indices of begining of gaps sufficiently greater than the average
    igap = np.nonzero(diff>gap_length*av_gap)[0] # nonzero nested in tuple
    
    if verbose: print('data_split: {} gap(s) identified: {}'.format(len(igap), igap))

    xsplit = []
    if y is not None:
        ysplit = []
    isplit_all = []
    ## No gap => 1 linear section, 1 gap => 2 linear sections, 2 pags => 3 linear sections etc.
    ## If no gaps, don't split the data
    if len(igap) == 0:
        xsplit.append(x)
        if y is not None:
            ysplit.append(y)
        isplit_all.append(i)
    else:
        ## First set of linear data before first gap
        if igap[0]-0 >= data_length: # Only add data if set is long enough
            isplit = np.arange(0, igap[0]) # begining of data to begining of gap
            xsplit.append(x[isplit])
            if y is not None:
                ysplit.append(y[isplit])
            isplit_all.append(isplit)
        else: 
            if verbose: print('data_split: First set exluded as too short')

        ## Deal with linear data that isn't bordered by the ends of the set
        for i in np.arange(1,len(igap)): # if start=stop, loop over empty array -> do nothing when len(ifap)=1
            ## Note: arange doesn't include stop, so len 2 just loops over i=1
            if igap[i]-igap[i-1]+1 >= data_length: # Only add data if set is long enough
                isplit = np.arange(igap[i-1]+1, igap[i]) # end of last gap begining of next gap
                xsplit.append(x[isplit])
                if y is not None:
                    ysplit.append(y[isplit])
                isplit_all.append(isplit)
            else: 
                if verbose: print('data_split: Set {} exluded as too short'.format(i))

        ## Last set of linear data after last gap
        if (len(x)-1)-igap[-1]+1 >= data_length: # Only add data if set is long enough
            isplit = np.arange(igap[-1]+1, len(x)-1) # end of last gap to end of data
            xsplit.append(x[isplit])
            if y is not None:
                ysplit.append(y[isplit])
            isplit_all.append(isplit)
        else: 
            if verbose: print('data_split: Last set exluded as too short')

    # If return_longest is True, only return longest section of data without gaps, else return all data with gap removed
    ind = np.array([len(x) for x in xsplit]).argmax() if return_longest else np.arange(len(xsplit))
    if y is not None:
        return isplit_all[ind], xsplit[ind], ysplit[ind]
    else:
        return isplit_all[ind], xsplit[ind]
print "Initial Average and std deviate:"
print imgave
print imgstd

xmax=img.shape[0]
ymax=img.shape[1]

test=1	
print "masking"
for y in range(0,ymax):
		for x in range(0, xmax):
			if img[x,y] > 3500 or img[x,y]< 3350:
				mask[x,y] = 0
print "apllying mask"
masked=img*mask	
imgmode = np.mode(masked)
imgave = np.average(masked)
imgstd = np.std(masked)
print "Initial Average and std deviate:"
print imgave
print imgstd
while (test !=0):



	for y in range(0,ymax):
		for x in range(0, xmax):
			if img[x,y]>5*imgstd+imgave:
				mask[x,y]= 0
				#print "Setting Zero"
Ejemplo n.º 20
0
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 11 16:41:44 2019

@author: Shrutika
"""
import numpy as np
from stats import mode
d=np.random.randint(1,10,100)
print(mode.(d))
g=np.mode(d)
Ejemplo n.º 21
0
n1.ppf(0.95, 60, 4)
n1.ppf(0.025) * 4 + 60
# ppf(확률): 해당 확률의 표준 정규 분포 상 좌표값 리턴

n2 = st.binom
# 주사위 예제: 내가 관심갖는 숫자 3개, n의 개수 5, 확률 1/6
# 발생할 확률이 0.032인것
n2.pmf(3, 5, 1 / 6)

#=======시험========

import numpy as np
test = [5, 6, 4, 7, 7, 12, 8]
np.median(test)
np.mean(test)
np.mode(test)
# mode는 pandas에 있다.
import pandas as pd

# 강제적으로 dataframe이나 series 값으로 변환해야 한다.
# Series()로 강제적으로 한 열로 들어가게 한다.
pd.Series(test).mean()
a2 = pd.Series(test)

a2.mean()
a2.median()
a2.mode()

n1.cdf(3.5, 2.8, 0.5) - n1.cdf(3.3, 2.8, 0.5)
n2.pmf(2, 5, 0.4)
Ejemplo n.º 22
0
 def mode(self):
     return np.mode(vectorArray)
Ejemplo n.º 23
0
    cleaned_column = []
    for i,cell in enumerate(column):
        try:
            float(cell)
        except ValueError:
            pass
        else:
            cleaned_column.append(float(cell))
    return cleaned_column

cleaner_funcs = {u'zero': lambda c,col: 0.0,
                 u'mean': lambda c,col: numpy.mean(clean_column(col)),
                 u'median': lambda c,col: numpy.median(clean_column(col)),
                 u'min': lambda c,col: numpy.min(clean_column(col)),
                 u'max': lambda c,col: numpy.max(clean_column(col)),
                 u'mode': lambda c,col: numpy.mode(clean_column(col)),
                }
#TODO: jperla: put in protections for exceptions


def save_filters(csv_location, short_code, filter_names):
    properties = load_properties(csv_location, short_code)
    properties[u'filters'] = filter_names
    save_properties(csv_location, short_code, properties)

def load_filter_names(csv_location, short_code):
    properties = load_properties(csv_location, short_code)
    filter_names = [c for c in properties.get(u'filters', [])]
    return filter_names

def save_cleaners(csv_location, short_code, cleaners):
Ejemplo n.º 24
0
 def rdd_mode_no_index(self):
     '''
   Cheack rdd_stats.mode() without index_field
   '''
     self.assertEqual(rdd_stats_no_index.mode(0)['ALL'], np.mode(np_data))
Ejemplo n.º 25
0
#!/usr/bin/env python

# Take in a set of numbers as command line arguments. Store them as an array and
# print out the min, max, mean, median, mode and range of the set.

# By rliu

import sys
import numpy as np

arr = sys.argv[1].split(' ')

meanNumbers = input("What numbers would you like to use?:")
print (np.average(meanNumbers))

medianNumbers = input("What numbers would like to use?:")
print (np.median(medianNumbers))

modeNumbers = input("What numbers would you like to use?:")
print (np.mode(modeNumbers))
 
rangeNumbers = input("What numbers would you like to use?:")
print (np.arange(rangeNumbers))
  
Ejemplo n.º 26
0
def data_split(x, y=None, gap_length=3, data_length=10, av_diff=False, return_longest=False, verbose=True):
    """ Split data at gaps where difference between x data points in much greater than the average/modal difference
    Return indices and values of data in each continuous section (and y values if supplied)"""
    i = np.arange(len(x))
    ## Find the average distace between the x data
    diff = np.diff(x)               # differences between adjacent data
    av_gap = np.mode(diff) if not av_diff else np.average(diff)       # average/modal separation
    ## Get indices of begining of gaps sufficiently greater than the average
    igap = np.nonzero(diff>gap_length*av_gap)[0] # nonzero nested in tuple

    if verbose: print('data_split: {} gap(s) identified: {}'.format(len(igap), igap))

    xsplit = []
    if y is not None:
        ysplit = []
    isplit_all = []
    ## No gap => 1 linear section, 1 gap => 2 linear sections, 2 pags => 3 linear sections etc.
    ## If no gaps, don't split the data
    if len(igap) == 0:
        xsplit.append(x)
        if y is not None:
            ysplit.append(y)
        isplit_all.append(i)
    else:
        ## First set of linear data before first gap
        if igap[0]-0 >= data_length: # Only add data if set is long enough
            isplit = np.arange(0, igap[0]) # begining of data to begining of gap
            xsplit.append(x[isplit])
            if y is not None:
                ysplit.append(y[isplit])
            isplit_all.append(isplit)
        else:
            if verbose: print('data_split: First set exluded as too short')

        ## Deal with linear data that isn't bordered by the ends of the set
        for i in np.arange(1,len(igap)): # if start=stop, loop over empty array -> do nothing when len(ifap)=1
            ## Note: arange doesn't include stop, so len 2 just loops over i=1
            if igap[i]-igap[i-1]+1 >= data_length: # Only add data if set is long enough
                isplit = np.arange(igap[i-1]+1, igap[i]) # end of last gap begining of next gap
                xsplit.append(x[isplit])
                if y is not None:
                    ysplit.append(y[isplit])
                isplit_all.append(isplit)
            else:
                if verbose: print('data_split: Set {} exluded as too short'.format(i))

        ## Last set of linear data after last gap
        if (len(x)-1)-igap[-1]+1 >= data_length: # Only add data if set is long enough
            isplit = np.arange(igap[-1]+1, len(x)-1) # end of last gap to end of data
            xsplit.append(x[isplit])
            if y is not None:
                ysplit.append(y[isplit])
            isplit_all.append(isplit)
        else:
            if verbose: print('data_split: Last set exluded as too short')

    # If return_longest is True, only return longest section of data without gaps, else return all data with gap removed
    ind = np.array([len(x) for x in xsplit]).argmax() if return_longest else np.arange(len(xsplit))
    if y is not None:
        return isplit_all[ind], xsplit[ind], ysplit[ind]
    else:
        return isplit_all[ind], xsplit[ind]
Ejemplo n.º 27
0
 def mode(self):
     return np.mode(self.vec)
Ejemplo n.º 28
0
 def fillNullwithMode(dataframe, fieldList):
     for field in fieldList:
        mode = np.mode(dataframe[field])
        dataframe[field].fillna(value=mode, inplace=True)
Ejemplo n.º 29
0
def evaluate_ic_quality(num_samples=Y_test.shape[0],
                        y_test=Y_test,
                        x_test=X_test,
                        inputs=inp_list,   #uniform_input
                        repeller=False,
                        types=False,
                        plot_targets=True,
                        plot_predictions=True,
                        print_correct=True,
                        print_predictions=True,
                        print_targets=True):
    errors = []
    correct = []
    for i in range(num_samples):
        target = y_test[i]
        target = np.reshape(target, (1, 205, 2))
        if plot_targets:
            plot_sequence(target, swapaxis=True, deltas=False, title='test_output_{}'.format(i))

        list_of_results = []
        print("length of inputs: ", len(inputs))
        for j in range(len(inputs)):
            print("current input: ", inputs[j][0][0])
            vector = iterate_gradients(inputs[j], target, iterations = 1, print_iterations=False, plot_iterations=False ,plot_y_true=False)
            vector = vector[0][0]
            if print_predictions:
                print("vector_{} after gradient iteration: ".format(i), vector)
                if types:
                    print("vector_{} after sum reduction over types dimension: ".format(i), np.sum(vector, axis=1))
            if print_targets:
                print("class_{}: ".format(i), x_test[i][0])
            #calculate error:
            if types:
                error = np.sqrt(np.mean(np.square(np.sum(vector, axis=1) - x_test[i][0])))
            else:
                error = np.sqrt(np.mean(np.square(vector - x_test[i][0])))
            print("error: ", error)
            errors.append(error)
            #check if classification is correct
            list_of_results.append(vector)

        print("list of results: ", list_of_results)
        vector = np.mode(list_of_results)
        #repeller:
        if repeller:
            n = len(vector)
            rounded_vec = np.around(vector * n)/n
            if not np.array_equal(rounded_vec, np.array(np.array([1/n] * n ))):
                rounded_vec = np.zeros_like(vector)
                rounded_vec[np.argmax(vector)] = 1
        elif types:
            #take the sum over the axis and reduce to dimension of 2:
            reduced_vector = np.sum(vector, axis=1)
            rounded_vec = np.zeros_like(reduced_vector)
            rounded_vec[np.argmax(reduced_vector)] = 1
            print("rounded reduced vec: ", rounded_vec)
        else:
            rounded_vec = np.zeros_like(vector)
            rounded_vec[np.argmax(vector)] = 1
        #compare result and target:
        if np.array_equal(rounded_vec, x_test[i][0]):
            correct.append(1)
        else:
            correct.append(0)
        if print_correct:
            print("correct: ", np.array_equal(rounded_vec, x_test[i][0]))
        #predict and plot
        if plot_predictions:
            prediction = rnn.predict(vector)
            plot_sequence(prediction, swapaxis=True, deltas=False, title='vector_{}_prediction'.format(i))
        #whether the prediction was correct

    accuracy = sum(correct)/num_samples
    mean_error = np.mean(errors)
    return(accuracy, mean_error)
Ejemplo n.º 30
0
    # Inputs: training data, labels, maximum depth, objective (classification or regression)
    # Uses treeSplit function to determine best feature and value to branch on
    # Stops at ID3 base cases and returns tree
    
    assert obj is "classification" or "regression" # make sure goal is defined
    n,d = X.shape

    # Initialize new node
    node = Node(None,None,None,None,None,None)

    # Prediction is mean label for regression
    if obj = "regression":
        node.prediction = np.mean(y)
    # Prediction is label mode for classification
    if obj = "classification":
        node.prediction = np.mode(y)
    
    # Base case: if leaves pure or insufficient features to partition or labels pure or max depth reached
    # Stop splitting and return tree
    if depth < 1 or n < 2 or len(np.unique(y)) == 1 or len(np.unique(X)) ==1:
        return node
    
    # Find best split to branch tree on
    split_idx,split_value,loss = treeSplit(X,y,w)
    
    # Isolate feature for split
    node.split_idx = split_idx
    
    # Use best split to assign threshold cut value
    node.split_value = split_value
    
Ejemplo n.º 31
0
def returnOurMode():
    data = returnDataBaseData()
    return round(numpy.mode(data[0]),3), round(numpy.mode(data[1]),3), round(numpy.mode(data[2]),3)
Ejemplo n.º 32
0
def row_stats(lead_cols, pass_prefix, query_stats, source):
    in_header = True
    header_v = []
    for line in source:
        raw_data = []
        data = []
        stats = []

        v = line.rstrip().split('\t')

        if in_header:
            header_v = v[lead_cols:]
            in_header = False
            continue
        else:
            for i in xrange(lead_cols, len(v)):
                # print v[i]
                try:
                    data.append(float(v[i]))
                    raw_data.append(v[i])
                except ValueError:
                    continue

            if len(data) == 0:
                for q in query_stats:
                    if q == 'count':
                        s = len(data)
                    else:
                        s = 'NA'
                    stats.append(s)
            else:
                for q in query_stats:
                    if q == 'mean':
                        s = np.mean(data)
                    elif q == 'median':
                        s = np.median(data)
                    elif q == 'mode':
                        s = np.mode(data)
                    elif q == 'min':
                        s = min(data)
                    elif q == 'max':
                        s = max(data)
                    elif q == 'sum':
                        s = np.sum(data)
                    elif q == 'product':
                        s = np.prod(data)
                    elif q == 'count':
                        s = len(data)
                    elif q == 'min_col':
                        val = min(data)
                        raw_val = raw_data[data.index(val)]
                        s = header_v[v[lead_cols:].index(raw_val)]
                    elif q == 'max_col':
                        val = max(data)
                        raw_val = raw_data[data.index(val)]
                        s = header_v[v[lead_cols:].index(raw_val)]
                    elif q == 'median_excl_min':
                        if len(data) < 2:
                            s = 'NA'
                        else:
                            val_index = data.index(min(data))
                            s = np.median( data[:val_index] + data[(val_index + 1):] )
                    elif q == 'median_excl_max':
                        if len(data) < 2:
                            s = 'NA'
                        else:
                            val_index = data.index(max(data))
                            s = np.median( data[:val_index] + data[(val_index + 1):] )

                    stats.append(s)
        
            print '\t'.join(v[x] for x in xrange(lead_cols)) + '\t' + '\t'.join(map(str, stats))

    source.close()
    return
def main():

    # Enable / Disable the plotting
    use_plot = False
    render = False

    if use_plot:
        plt.ion()
        window = 500
        real_plot = PlotSignal(window=window)

        collect_fr = 1  # Frequency collecting data
        plot_fr = 1  # Frequency refresh plot

    if render:
        render_fr = 10  # Render frequency: only for simulation

    # Initialize Environment:
    env = gym.make(
        'DoublePendulumRR-v0')  # Use "DoublePendulumRR-v0" for the simulation
    ctrl = BalanceCtrl(dt=env.env.timing.dt)

    print("\n\n###############################")
    print("Episode {0}".format(0))
    obs = env.reset()

    print("\nStart Controller:\t\t\t", end="")
    t_i = 0
    while not ctrl.done:

        t_i += 1
        act = ctrl(obs)
        obs, _, done, _ = env.step(act[0] * np.ones(1))

        if done and t_i > 100:
            break

        if render:
            if np.mod(t_i, render_fr) == 0:
                env.render()

        if use_plot:
            if np.mod(t_i, collect_fr) == 0:
                x, theta1, theta2, x_dot, theta1_dot, theta2_dot = obs
                real_plot.update(theta1=theta1,
                                 theta1_dot=theta1_dot,
                                 theta2=theta2,
                                 theta2_dot=theta2_dot,
                                 volt=act[0],
                                 u=act[1],
                                 x=x)

            if np.mode(t_i, plot_fr) == 0:
                real_plot.plot_signal()

    # Stop the cart:
    env.step(np.array([0.]))
    print("Finished!")

    # Close the Connection to the system:
    time.sleep(0.5)
    env.close()
Ejemplo n.º 34
0
plt.show()

from scipy.stats import itemfreq
table = itemfreq(sample_array)

plt.bar(table[:, 0], table[:, 1])
plt.show()

np.min(sample_array)
np.max(sample_array)
np.mean(sample_array)
np.median(sample_array)

sorted_data = np.sort(sample_array)

np.mode(sample_array)
# mode is not available in numpy
from statistics import mode
mode(sample_array)
# display only one mode, for multiple modes- Error
#StatisticsError: no unique mode; found 2 equally common values

from collections import Counter
data = Counter(sample_array)
data.most_common()  # Returns all unique items and their counts
data.most_common(2)

from statistics import stdev, variance
stdev(sample_array)
variance(sample_array)
Ejemplo n.º 35
0
def mean(numbers):
    return float(sum(numbers)) / max(len(numbers), 1)


mean([1, 2, 3, 4])

import numpy as np
print(np.median([1, 3, 5, 7]))

import statistics
items = [1, 2, 3, 6, 8]
statistics.median(items)

import numpy as np
print(np.mode([1, 3, 5, 7]))

import statistics
items = [1, 2, 3, 6, 6, 8, 8, 8]
statistics.mode(items)

import statistics
items = [1, 2, 3, 6, 6, 8, 8, 8]
statistics.mean(items)

import statistics
items = [1, 2, 3, 6, 6, 8, 8, 8]
statistics.variance(items)

import statistics
items = [1, 2, 3, 6, 6, 8, 8, 8]