Ejemplo n.º 1
0
	def __init__(self, filename, sep=",", skip=0, index=True, header=True): 
		if type(filename) == str:
			allm = gendata(filename, sep=sep, skip=skip, index=index, header=header)
			if index==True:
				self.date = allm[0] 
				self.data = allm[1]
				self.header = allm[2]
			else:
				self.data = allm[0]
				self.header = allm[1]
				self.date = "none"
			self.data = self.data.transpose()
		else:
			self.data = filename
			self.date = index
			self.header = header
		self.N,self.T = self.data.shape
		self.mean = [sps.tmean(i) for i in self.data]
		if self.N > 1:
			self.variance = [sps.tvar(i) for i in self.data]
		else:
			self.variance = [sps.tvar(self.data)]
		self.mean = np.array(self.mean)
		self.variance = np.array(self.variance)
		self.covariance = np.zeros((self.N, self.N))
		self.correlation = np.zeros((self.N, self.N))
		self.skewness = 0
		self.kurtosis = 0
		self.dmean = (self.data.transpose()-self.mean).transpose()
		self.did_covar = False
		self.JB = 0
		self.JBpvalue = 0 
Ejemplo n.º 2
0
	def query4(self, length=8):	
		global data1
		data1=pandas.read_sql_query(query['4a'], cnx)
		pysql = lambda q: pandasql.sqldf(q, globals())
		data1_rep = pysql("select p_id as \"Patient ID\",exp as \"Expression Val\" from data1 ")
		global data2
		data2=pandas.read_sql_query(query['4b'], cnx)
		data2_rep = pysql("select p_id as \"Patient ID\",exp as \"Expression Val\" from data2 ")			
		a=data1['exp'].values
		b=data2['exp'].values
		print(stats.tmean(a))
		print(stats.tmean(b))
		print(stats.tvar(a))
		print(stats.tvar(b))
		return """<html>
					<form method="get" action="index">
              		<button type="submit">Return</button>
           			</form>
           			</form>
           			 <form method="post" action="processQuery4"> 
            			Custom Query on Result: 
            			<input type="text" name="qu"><br> 
    					<input type="submit">
    				</form>
    				<h2>T-statistics for Exp Values::</h2>"""+(str)(stats.ttest_ind(a,b,equal_var=True)[0])+"""
					<h1>Exp values for patients with ALL<h3>(Rows-"""+str(len(data1.index))+""")</h3></h1>"""+data1_rep.to_html(index=False)+"""
					<h1>Exp values for patients without ALL<h3>(Rows-"""+str(len(data2.index))+""")</h3></h1>"""+data2_rep.to_html(index=False)+"""
Ejemplo n.º 3
0
 def test_tvar(self):
     for n in self.get_n():
         x, y, xm, ym = self.generate_xy_sample(n)
         assert_almost_equal(stats.tvar(x), stats.mstats.tvar(xm),
                             decimal=12)
         assert_almost_equal(stats.tvar(y), stats.mstats.tvar(ym),
                             decimal=12)
def get_12ECG_features(data, header_data):

    tmp_hea = header_data[0].split(' ')
    ptID = tmp_hea[0]
    num_leads = int(tmp_hea[1])
    sample_Fs = int(tmp_hea[2])
    gain_lead = np.zeros(num_leads)

    for ii in range(num_leads):
        tmp_hea = header_data[ii + 1].split(' ')
        gain_lead[ii] = int(tmp_hea[2].split('/')[0])

    # for testing, we included the mean age of 57 if the age is a NaN
    # This value will change as more data is being released
    for iline in header_data:
        if iline.startswith('#Age'):
            tmp_age = iline.split(': ')[1].strip()
            age = int(tmp_age if tmp_age != 'NaN' else 57)
        elif iline.startswith('#Sex'):
            tmp_sex = iline.split(': ')[1]
            if tmp_sex.strip() == 'Female':
                sex = 1
            else:
                sex = 0
#        elif iline.startswith('#Dx'):
#            label = iline.split(': ')[1].split(',')[0]

#   We are only using data from lead1
    peaks, idx = detect_peaks(data[0], sample_Fs, gain_lead[0])

    #   mean
    mean_RR = np.mean(idx / sample_Fs * 1000)
    mean_Peaks = np.mean(peaks * gain_lead[0])

    #   median
    median_RR = np.median(idx / sample_Fs * 1000)
    median_Peaks = np.median(peaks * gain_lead[0])

    #   standard deviation
    std_RR = np.std(idx / sample_Fs * 1000)
    std_Peaks = np.std(peaks * gain_lead[0])

    #   variance
    var_RR = stats.tvar(idx / sample_Fs * 1000)
    var_Peaks = stats.tvar(peaks * gain_lead[0])

    #   Skewness
    skew_RR = stats.skew(idx / sample_Fs * 1000)
    skew_Peaks = stats.skew(peaks * gain_lead[0])

    #   Kurtosis
    kurt_RR = stats.kurtosis(idx / sample_Fs * 1000)
    kurt_Peaks = stats.kurtosis(peaks * gain_lead[0])

    features = np.hstack([
        age, sex, mean_RR, mean_Peaks, median_RR, median_Peaks, std_RR,
        std_Peaks, var_RR, var_Peaks, skew_RR, skew_Peaks, kurt_RR, kurt_Peaks
    ])

    return features
Ejemplo n.º 5
0
    def query4(self):
        global data1
        data1 = pandas.read_sql_query(query['4a'], cnx)
        global data2
        data2 = pandas.read_sql_query(query['4b'], cnx)
        a = data1['Expression Val'].values
        b = data2['Expression Val'].values
        print(stats.tmean(a))
        print(stats.tmean(b))
        print(stats.tvar(a))
        print(stats.tvar(b))
        tt = stats.ttest_ind(a, b, equal_var=True)
        return """<html>
					<form method="get" action="index">
              		<button type="submit">Return</button>
           			</form>
           			</form>
           			 <form method="post" action="processQuery4"> 
            			Custom Query on Result: 
            			<input type="text" name="qu"><br> 
    					<input type="submit">
    				</form>
    				<h2>T-statistics for Exp Values::</h2>""" + (str)(tt[0]) + """
    				<h2>Corresponding p-value::</h2>""" + (str)(tt[1]) + """
					<h1>Exp values for patients with ALL<h3>(Rows-""" + str(len(
            data1.index)) + """)</h3></h1>""" + data1.to_html(
                index=False) + """
					<h1>Exp values for patients without ALL<h3>(Rows-""" + str(
                    len(data2.index)) + """)</h3></h1>""" + data2.to_html(
                        index=False) + """
Ejemplo n.º 6
0
 def test_tvar(self):
     for n in self.get_n():
         x, y, xm, ym = self.generate_xy_sample(n)
         assert_almost_equal(stats.tvar(x), stats.mstats.tvar(xm),
                             decimal=12)
         assert_almost_equal(stats.tvar(y), stats.mstats.tvar(ym),
                             decimal=12)
Ejemplo n.º 7
0
def tableauMAJ(u, v, w, x, tableau_final):
    sigma1 = np.sqrt(stats.tvar(data["GasCum360"]))
    sigma2 = np.sqrt(stats.tvar(data["OilCum360"]))
    tableau_final["Gas360_SUP"] = tableau_final["GasCum360"] + u * sigma1
    tableau_final["Gas360_INF"] = tableau_final["GasCum360"] - v * sigma1
    tableau_final["Oil360_SUP"] = tableau_final["OilCum360"] + w * sigma2
    tableau_final["Oil360_INF"] = tableau_final["OilCum360"] - x * sigma2
    return tableau_final
Ejemplo n.º 8
0
def chi_2(list, alf=.95):
    upper = ((len(list) - 1) * stats.tvar(list)) / (stats.chi2.ppf(
        alf / 2,
        len(list) - 1))
    lower = ((len(list) - 1) * stats.tvar(list)) / (stats.chi2.ppf(
        1 - (alf / 2),
        len(list) - 1))

    return (lower, upper)
def get_file_features(data, header_data):

    age = header_data[0]
    sex = header_data[1]
    sample_Fs = header_data[2]
    num_leads = len(header_data) - 3
    gain_lead = np.zeros(num_leads)

    for ii in range(num_leads):
        gain_lead[ii] = header_data[3 + ii]

    for i in range(0, (len(data))):

        peaks, idx = detect_peaks(data[i], sample_Fs, gain_lead[i])

        #   mean
        mean_RR = np.mean(idx / sample_Fs * 1000)
        mean_Peaks = np.mean(peaks * gain_lead[i])

        #   median
        median_RR = 0
        median_Peaks = np.median(peaks * gain_lead[i])

        #   standard deviation
        std_RR = np.std(idx / sample_Fs * 1000)
        std_Peaks = np.std(peaks * gain_lead[i])

        #   variance
        var_RR = stats.tvar(idx / sample_Fs * 1000)
        var_Peaks = stats.tvar(peaks * gain_lead[i])

        #   Skewness
        skew_RR = stats.skew(idx / sample_Fs * 1000)
        skew_Peaks = stats.skew(peaks * gain_lead[i])

        #   Kurtosis
        kurt_RR = stats.kurtosis(idx / sample_Fs * 1000)
        kurt_Peaks = stats.kurtosis(peaks * gain_lead[i])

        curfeatures = np.vstack([
            mean_RR, mean_Peaks, median_RR, median_Peaks, std_RR, std_Peaks,
            var_RR, var_Peaks, skew_RR, skew_Peaks, kurt_RR, kurt_Peaks
        ])
        j = 0
        for j in range(0, (len(curfeatures))):
            if np.isnan(curfeatures[j]):
                curfeatures[j] = 0
        if i == 0:
            lead_features_tmp = curfeatures
        else:
            tmp = np.row_stack((lead_features_tmp, curfeatures))
            lead_features_tmp = tmp

    return lead_features_tmp
Ejemplo n.º 10
0
Archivo: sim.py Proyecto: thibugio/813
def main():
    f27_scan = open('sim_scan27.txt', 'r')
    f27_table = open('sim_table27.txt', 'r')
    f35932_scan = open('sim_scan35932.txt', 'r')
    f35932_table = open('sim_table35932.txt', 'r')
    
    ntests = 10
    
    scan27 = [0 for i in range(ntests)]
    table27 = [0 for i in range(ntests)]
    scan35932 = [0 for i in range(ntests)]
    table35932 = [0 for i in range(ntests)]

    files = [f27_scan, f27_table, f35932_scan, f35932_table]
    arrs = [scan27, table27, scan35932, table35932]

    for i in range(ntests):
        for j in range(4):
            line = files[j].readline()
            if line[len(line)-1] == '\n':
                line = line[:len(line)-1]
            arrs[j][i] = float(line)
    for j in range(4):
        files[j].close()

    _, p27 = stats.ttest_ind(scan27, table27, equal_var=False)
    mean27scan = stats.tmean(scan27)
    mean27table = stats.tmean(table27)
    var27scan = stats.tvar(scan27)
    var27table = stats.tvar(table27)

    _, p35932 = stats.ttest_ind(scan35932, table35932, equal_var=False)
    mean35932scan = stats.tmean(scan35932)
    mean35932table = stats.tmean(table35932)
    var35932scan = stats.tvar(scan35932)
    var35932table = stats.tvar(table35932)

    f = open('sim_results_compare_scan_table.txt', 'w')
    f.write('27\n')
    f.write('scan mean: ' + str(mean27scan) + '\n')
    f.write('scan var: ' + str(var27scan) + '\n')
    f.write('table mean: ' + str(mean27table) + '\n')
    f.write('table var: ' + str(var27table) + '\n')
    f.write('p-value: ' + str(p27) + '\n\n')

    f.write('35932\n')
    f.write('scan mean: ' + str(mean35932scan) + '\n')
    f.write('scan var: ' + str(var35932scan) + '\n')
    f.write('table mean: ' + str(mean35932table) + '\n')
    f.write('table var: ' + str(var35932table) + '\n')
    f.write('p-value: ' + str(p35932) + '\n')

    f.close()
Ejemplo n.º 11
0
    def process_CustomTstat(self, disease1, disease2, go):
        print(disease2)
        q1 = "SELECT s.p_id,maf.exp from GOAnnotation ga inner join (probe pb,Diagnosis dg,disease ds, microarray_fact maf, sample s) on (ga.UID = pb.UID and pb.pb_id=maf.pb_id and dg.ds_id= ds.ds_id and dg.p_id=s.p_id and maf.s_id= s.s_id) where ga.go_id =\"" + go + "\"and ds.`name` =\"" + disease1 + "\""
        q1_not = "SELECT s.p_id,maf.exp from GOAnnotation ga inner join (probe pb,Diagnosis dg,disease ds, microarray_fact maf, sample s) on (ga.UID = pb.UID and pb.pb_id=maf.pb_id and dg.ds_id= ds.ds_id and dg.p_id=s.p_id and maf.s_id= s.s_id) where ga.go_id =\"" + go + "\"and ds.`name` !=\"" + disease1 + "\""
        data1 = pandas.read_sql_query(q1, cnx)
        data1_not = pandas.read_sql_query(q1_not, cnx)
        a = data1['exp'].values
        b = data1_not['exp'].values
        print(stats.tmean(a))
        print(stats.tmean(b))
        print(stats.tvar(a))
        print(stats.tvar(b))
        if disease1 == disease2:
            tt = stats.ttest_ind(a, b, equal_var=True)
            return """<html>
					<form method="get" action="index">
              		<button type="submit">Return</button>
           			</form>
           			</form>
    				<h2>T-statistics for Exp Values::</h2>""" + (str)(tt[0]) + """
    				<h2>Corresponding p-value::</h2>""" + (str)(tt[1]) + """
					<h1>Exp values for patients with """ + disease1 + """<h3>(Rows-""" + str(
                len(data1.index)
            ) + """)</h3></h1>""" + data1.to_html(index=False) + """
					<h1>Exp values for patients without """ + disease1 + """<h3>(Rows-""" + str(
                len(data1_not.index)) + """)</h3></h1>""" + data1_not.to_html(
                    index=False) + """
					</html>"""
        else:
            q2 = "SELECT s.p_id,maf.exp from GOAnnotation ga inner join (probe pb,Diagnosis dg,disease ds, microarray_fact maf, sample s) on (ga.UID = pb.UID and pb.pb_id=maf.pb_id and dg.ds_id= ds.ds_id and dg.p_id=s.p_id and maf.s_id= s.s_id) where ga.go_id =\"" + go + "\"and ds.`name` =\"" + disease2 + "\""
            data2 = pandas.read_sql_query(q2, cnx)
            b = data2['exp'].values
            print(stats.tmean(a))
            print(stats.tmean(b))
            print(stats.tvar(a))
            print(stats.tvar(b))
            tt = stats.ttest_ind(a, b, equal_var=True)
            return """<html>
				<form method="get" action="index">
            	<button type="submit">Return</button>
           		</form>
           		</form>
           		 
    			<h2>T-statistics for Exp Values::</h2>""" + (str)(tt[0]) + """
    			<h2>Corresponding p-value::</h2>""" + (str)(tt[1]) + """
				<h1>Exp values for patients with """ + disease1 + """<h3>(Rows-""" + str(
                len(data1.index)) + """)</h3></h1>""" + data1.to_html(
                    index=False) + """
				<h1>Exp values for patients with """ + disease2 + """<h3>(Rows-""" + str(
                        len(data2.index)) + """)</h3></h1>""" + data2.to_html(
                            index=False) + """
Ejemplo n.º 12
0
def transform_input_shapes(matrix_as_array):
    bit = BasicImageTransformations(matrix_as_array)
    matrices_inverted = np.invert(bit.matrix_skeletonized)
    components = measure.label(matrices_inverted,
                               connectivity=1,
                               return_num=True)[1]
    image_pixels = convert_to_pixels_list(bit.matrix_skeletonized)
    image_pixels_as_pair_of_lists = transform_pixels_to_pair_of_lists(
        image_pixels)
    pixels_x = extract_dimension(image_pixels, 1)
    pixels_y = extract_dimension(image_pixels, 0)
    min_x = min(pixels_x)
    max_x = max(pixels_x)
    min_y = min(pixels_y)
    max_y = max(pixels_y)
    mean_x = stats.tmean(pixels_x)
    mean_y = stats.tmean(pixels_y)
    variance_x = stats.tvar(pixels_x)
    variance_y = stats.tvar(pixels_y)
    correlation = stats.pearsonr(image_pixels_as_pair_of_lists[0],
                                 image_pixels_as_pair_of_lists[1])[0]
    xxy = transform_pixels_with_function(
        image_pixels, lambda pixel: pixel[0] * pixel[1] * pixel[1])
    xyy = transform_pixels_with_function(
        image_pixels, lambda pixel: pixel[0] * pixel[0] * pixel[1])
    mean_xxy = stats.tmean(xxy)
    mean_xyy = stats.tmean(xyy)
    xy_tr2 = transform_pixels_with_function(
        image_pixels, lambda pixel: pixel[0] * pixel[1] * np.sin(pixel[
            0] / 2.0) * np.sin(pixel[1] / 2.0))
    xy_tr4 = transform_pixels_with_function(
        image_pixels, lambda pixel: pixel[0] * pixel[1] * np.sin(pixel[
            0] / 4.0) * np.sin(pixel[1] / 4.0))
    mean_xy_tr2 = stats.tmean(xy_tr2)
    mean_xy_tr4 = stats.tmean(xy_tr4)
    print(mean_xy_tr2)
    print(mean_xy_tr4)
    edges = feature.canny(bit.matrix_float)
    edges_for_y = edges.sum(axis=1)
    edges_for_x = edges.sum(axis=0)
    avg_egdes_for_y = average_of_non_zero_elements(edges_for_y)
    avg_egdes_for_x = average_of_non_zero_elements(edges_for_x)
    skew_x = stats.skew(pixels_x)
    skew_y = stats.skew(pixels_y)
    features = np.array([
        components, min_x, max_x, min_y, max_y, mean_x, mean_y, variance_x,
        variance_y, correlation, mean_xxy, mean_xyy, mean_xy_tr2, mean_xy_tr4,
        avg_egdes_for_x, avg_egdes_for_y, skew_x, skew_y
    ])
    return features
def calc(data_X,data_Y):
    "This function is calculate the mean ,variance, correlation,liner regression"
    print "x mean: ", np.mean(data_X) # Mean for X
    print "x variance: ", tvar(data_X) # Sample Variance for X
    print "y mean: ", np.mean(data_Y) # Mean for Y
    print "y variance: ", tvar(data_Y)  # Sample Variance for Y
    print "x and y correlation coefficient: ", pearsonr(np.array(data_X),np.array(data_Y))[0]

    # Build the liner model liner regression
    regr = linear_model.LinearRegression()
    # fit
    regr.fit(np.array(data_X).reshape(-1, 1), np.array(data_Y).reshape(-1, 1))
    a, b = regr.coef_, regr.intercept_
    print("liner regresstion: y=%.2fx+%.2f" %(a,b))
def Quest4():
	global query
	global cnx
	data=pandas.read_sql_query(query['4a'], cnx)
	data2=pandas.read_sql_query(query['4b'], cnx)
	a=data['exp'].values
	b=data2['exp'].values
	
	print(stats.tmean(a))
	print(stats.tmean(b))
	print(stats.tvar(a))
	print(stats.tvar(b))

	print(stats.ttest_ind(a,b,equal_var=True))
	return
Ejemplo n.º 15
0
def find_stats(df, dist_type, probNum=None):
    means = []
    sample_num = len(df.columns)  # AKA cases
    sample_size = len(df)  # AKA samples per case
    dist_type = dist_type + ' Distribution - ' + str(
        sample_num) + ' Cases that sample ' + str(sample_size) + ' numbers'

    print('number of cases:\t', sample_num, '\nsamples per case:\t',
          sample_size)
    print('\n')
    if probNum:
        temp = str(probNum * 100) + '%'
        print('Probablity of Binomial distribution:\t', temp)

    # loop through each sample and find its respective mean
    for i in range(0, sample_num):
        mean = round(df.iloc[0:, i].mean(),
                     3)  # mean of sample size from generated random values
        means.append(mean)

    if sample_num == 1:
        # there is one sample & we need more than 1 value to calculate std_dev
        # std_dev = round( df.iloc[0:, i].std(), 3) # old way to calculate std_dev for just 1 sample

        mean_of_means = means[0]
        std_dev = round(df.iloc[0:, i].mean().std(), 3)
        variance = round(df.iloc[0:, i].mean().var(), 3)
        skewness = round(df.iloc[0:, i].skew(), 3)
        kurtosis = round(df.iloc[0:, i].kurtosis(), 3)

        # Turn array of means into a numpy array
        numpy_means = np.array(means)

        fig = plt.subplot()
        fig.hist(numpy_means, bins=50, range=[0, 1], histtype='bar')
        fig.set_xlabel('Mean (Value)')
        fig.set_ylabel('Value Frequency')
        fig.set_title(dist_type)
        plt.show()

        return means, mean_of_means, variance, std_dev, skewness, kurtosis

    else:
        # Turn array of means into a numpy array
        numpy_means = np.array(means)

        fig = plt.subplot()
        fig.hist(numpy_means, bins=50, range=[0, 1], histtype='bar')
        fig.set_xlabel('Mean (Value)')
        fig.set_ylabel('Value Frequency')
        fig.set_title(dist_type)
        plt.show()

        mean_of_means = round(numpy_means.mean(), 3)
        variance = round(stats.tvar(means), 3)
        std_dev = round(stdev(means), 3)
        skewness = round(stats.skew(means), 3)
        kurtosis = round(stats.kurtosis(means), 3)

        return means, mean_of_means, variance, std_dev, skewness, kurtosis
Ejemplo n.º 16
0
def zero_var(var_list,df,threshold):
    thresh = 0.00
    for col in var_list:
        if (stats.tvar(df[col]) == threshold) or (np.percentile(df[col],90) == 0.00):
            var_list.remove(col)
    
    return var_list
Ejemplo n.º 17
0
def GFPFeatureCreation(tempG):
    print("Starting Feature Creation")
    # Create vertex * feature matrix
    # Loop through all the vertices and extract the vertices and attributes then all to a list
    featuresCollection = [[], [], [], [], [], []]
    f = []

    for v in tempG.vertices():
        featuresCollection[0].append(tempG.vp.dp[v])
        featuresCollection[1].append(tempG.vp.lc[v])
        featuresCollection[2].append(tempG.vp.tHN[v])
        featuresCollection[3].append(tempG.vp.nCCP[v])
        featuresCollection[4].append(tempG.vp.pR[v])
        featuresCollection[5].append(tempG.vp.eV[v])

    for i in range(6):
        median = numpy.median(featuresCollection[i])
        mean = numpy.mean(featuresCollection[i])
        stdev = numpy.std(featuresCollection[i])
        skewness = stats.skew(featuresCollection[i])
        kurtosis = stats.kurtosis(featuresCollection[i])
        variance = stats.tvar(featuresCollection[i])
        maxVal = stats.tmax(featuresCollection[i])
        minVal = stats.tmin(featuresCollection[i])
        f += [
            median, mean, stdev, skewness, kurtosis, variance, maxVal, minVal
        ]

    return f
Ejemplo n.º 18
0
def GFPFeatureCreation(tempG):
    print("Starting Feature Creation")
    # Create vertex * feature matrix
    # Loop through all the vertices and extract the vertices and attributes then all to a list
    featuresCollection = []
    f = []

    featuresCollection.append(np.array(np.nan_to_num(tempG.vp.dp.a)))
    featuresCollection.append(np.array(np.nan_to_num(tempG.vp.lc.a)))
    featuresCollection.append(np.array(np.nan_to_num(tempG.vp.tHN.a)))
    featuresCollection.append(np.array(np.nan_to_num(tempG.vp.nCCP.a)))
    featuresCollection.append(np.array(np.nan_to_num(tempG.vp.pR.a)))
    featuresCollection.append(np.array(np.nan_to_num(tempG.vp.eV.a)))

    for i in range(6):
        median = np.median(featuresCollection[i])
        mean = np.mean(featuresCollection[i])
        stdev = np.std(featuresCollection[i])
        skewness = stats.skew(featuresCollection[i])
        kurtosis = stats.kurtosis(featuresCollection[i])
        variance = stats.tvar(featuresCollection[i])
        maxVal = stats.tmax(featuresCollection[i])
        minVal = stats.tmin(featuresCollection[i])
        f += [median, mean, stdev, skewness, kurtosis, variance, maxVal, minVal]

    return f
Ejemplo n.º 19
0
 def ss_within(cls, *args):
     """
     Get the sum of square deviations of each value compared to its group
     mean value
     """
     try: return sum((len(a)-1)*stats.tvar(a) for a in args)
     except: raise TypeError('Expected only lists or tuples')
Ejemplo n.º 20
0
def GFPFeatureCreation(tempG):
    print("Starting Feature Creation")
    # Create vertex * feature matrix
    # Loop through all the vertices and extract the vertices and attributes then all to a list
    featuresCollection = [ [], [], [], [], [], [] ]
    f = []

    for v in tempG.vertices():
        featuresCollection[0].append(tempG.vp.dp[v])
        featuresCollection[1].append(tempG.vp.lc[v])
        featuresCollection[2].append(tempG.vp.tHN[v])
        featuresCollection[3].append(tempG.vp.nCCP[v])
        featuresCollection[4].append(tempG.vp.pR[v])
        featuresCollection[5].append(tempG.vp.eV[v])

    for i in range(6):
        median = numpy.median(featuresCollection[i])
        mean = numpy.mean(featuresCollection[i])
        stdev = numpy.std(featuresCollection[i])
        skewness = stats.skew(featuresCollection[i])
        kurtosis = stats.kurtosis(featuresCollection[i])
        variance = stats.tvar(featuresCollection[i])
        maxVal = stats.tmax(featuresCollection[i])
        minVal = stats.tmin(featuresCollection[i])
        f += [median, mean, stdev, skewness, kurtosis, variance, maxVal, minVal]

    return f
Ejemplo n.º 21
0
 def _badPixMap(self, clip=30, filename='badpix.dmp'):
     median = np.median(self.image)
     var = tvar(self.image, (-100, 100))
     self.badpix = ma.masked_greater(self.image - median,
                                     clip * np.sqrt(var))
     if filename is not None:
         self.badpix.dump(filename)
def overlap_variance(resList, anchors, rad, world_size, excludeDesert=True):
    niches = niche_analysis(resList, anchors, rad, world_size, excludeDesert)
    vals = []
    for key in niches.keys():
        vals.append(len(key)*(niches[key]/float(world_size*world_size)))

    return stats.tvar(vals)
Ejemplo n.º 23
0
    def ownCorrelationMeasure(self, X, Y):
        # Group X-values into categories with their respective set of Y-values
        groups = {}
        for i in range(len(X)):
            key = X[i]
            value = Y[i]
            if key in groups:
                groups[key] += [value]
            else:
                groups[key] = [value]

        # Calculate normal distribution for every X-value
        normal_distributions = {}
        #normal_distributions_old = {}
        for x in groups.keys():
            #normal_distributions_old[x] = stats.norm.fit(groups[x])
            if len(groups[x]) > 1:
                normal_distributions[x] = (stats.tmean(groups[x]), stats.tvar(groups[x]))
            else:
                normal_distributions[x] = (groups[x][0], 0)

        # Calculate correlation measure
        max_dist = max(normal_distributions.values())
        min_dist = min(normal_distributions.values())
        correlation = max_dist[0]/min_dist[0] # Ratio between mean for max and min

        return [correlation, normal_distributions]
Ejemplo n.º 24
0
 def test_calculate_variance(self):
     sample = []
     for i in range(0, 100):
         sample.append(random())
     var = self.stat.calculate_variance(sample,
                                        self.stat.calculate_mean(sample))
     control = tvar(sample)
     self.assertAlmostEqual(var, control)
Ejemplo n.º 25
0
def print_and_plot_results(count, results, verbose, plot_file_name):
    print("RPS calculated as 95% confidence interval")

    rps_mean_ar = []
    low_ar = []
    high_ar = []
    test_name_ar = []

    for test_name in sorted(results):
        data = results[test_name]
        rps = count / array(data)
        rps_mean = tmean(rps)
        rps_var = tvar(rps)
        low, high = norm.interval(0.95, loc=rps_mean, scale=rps_var**0.5)
        times = array(data) * 1000000 / count
        times_mean = tmean(times)
        times_stdev = tstd(times)
        print('Results for', test_name)
        print('RPS: {:d}: [{:d}, {:d}],\tmean: {:.3f} μs,'
              '\tstandard deviation {:.3f} μs'
              .format(int(rps_mean),
                      int(low),
                      int(high),
                      times_mean,
                      times_stdev))

        test_name_ar.append(test_name)
        rps_mean_ar.append(rps_mean)
        low_ar.append(low)
        high_ar.append(high)

        if verbose:
            print('    from', times)
        print()


    if plot_file_name is not None:
        import matplotlib.pyplot as plt
        from matplotlib import cm
        fig = plt.figure()
        ax = fig.add_subplot(111)
        L = len(rps_mean_ar)
        color = [cm.autumn(float(c) / (L - 1)) for c in arange(L)]
        bars = ax.bar(
            arange(L), rps_mean_ar,
            color=color, yerr=(low_ar, high_ar), ecolor='k')
        # order of legend is reversed for visual appeal
        ax.legend(
            reversed(bars), reversed(test_name_ar),
            loc='upper left')
        ax.get_xaxis().set_visible(False)
        plt.ylabel('Requets per Second', fontsize=16)
        print(plot_file_name)
        plt.savefig(plot_file_name, dpi=96)
        print("Plot is saved to {}".format(plot_file_name))
        if verbose:
            plt.show()
Ejemplo n.º 26
0
def sasha_chisquared(expec, observ):
    tempnum = (expec - observ)**2
    thevar = stats.tvar(observ)
    MeanSquaredError = np.sum(tempnum) / (len(expec) - 2)
    RootMeanSquaredError = np.sqrt(MeanSquaredError)
    tempnum /= thevar
    tempdenom = (len(expec) - 2)
    tempreturn = np.sum(tempnum) / tempdenom
    return [tempreturn, RootMeanSquaredError]
Ejemplo n.º 27
0
def sasha_chisquared(expec, observ, degree_of_freedom=2):
    tempnum = (expec - observ)**2
    thevar = stats.tvar(observ)
    MeanSquaredError = np.sum(tempnum) / (len(expec)-2)
    RootMeanSquaredError = np.sqrt(MeanSquaredError)
    tempnum /= thevar
    tempdenom = (len(expec)-degree_of_freedom)
    tempreturn = np.sum(tempnum)/tempdenom
    return [tempreturn,RootMeanSquaredError]
Ejemplo n.º 28
0
def sasha_slope_error(expec, observ, x_observ):
    tempnum = (expec - observ)**2
    tempnum = np.sum(tempnum)
    tempnum /= (len(expec) - 2)
    tempnum = np.sqrt(tempnum)
    thevar = stats.tvar(x_observ)
    thestdev = np.sqrt(thevar)
    tempnum /= thestdev
    return tempnum
Ejemplo n.º 29
0
def sasha_slope_error(expec, observ, x_observ):
    tempnum = (expec - observ)**2
    tempnum = np.sum(tempnum)
    tempnum /=  (len(expec) -2 )
    tempnum = np.sqrt(tempnum)
    thevar = stats.tvar(x_observ)
    thestdev = np.sqrt(thevar)
    tempnum /= thestdev
    return tempnum
Ejemplo n.º 30
0
  def plot(self,jobid,job_data=None):    
    if not self.setup(jobid,job_data=job_data):
      return
    
    ts=self.ts

    host_cpi = {}
    host_names = sorted(ts.data[0].keys())
    for v in host_names:
        ncores = len(ts.data[0][v])
        num = 0
        den = 0
        for k in range(ncores):
          ratio = nan_to_num(diff(ts.data[0][v][k]) / diff(ts.data[1][v][k]))

          try: cpi = vstack((cpi,ratio))
          except: cpi = array([ratio]) 
        
          num += diff(ts.data[0][v][k])
          den += diff(ts.data[1][v][k])

        host_cpi[v] = tmean(nan_to_num(num/den))

    mean_cpi = tmean(host_cpi.values())
    if len(host_cpi.values()) > 1:
      var_cpi  = tvar(host_cpi.values())
    else: var_cpi= 0.0

    self.fig = Figure(figsize=(10,12),dpi=110)
    self.ax=self.fig.add_subplot(1,1,1)

    ycore = arange(cpi.shape[0]+1)
    time = ts.t/3600.
    yhost=arange(len(host_cpi.keys())+1)*ncores + ncores

    fontsize = 8
    set_printoptions(precision=4)
    if len(yhost) > 80:
        fontsize /= 0.5*log(len(yhost))
    self.ax.set_ylim(bottom=ycore.min(),top=ycore.max())
    self.ax.set_yticks(yhost[0:-1]-ncores/2.)

    self.ax.set_yticklabels([key +'(' + "{0:.2f}".format(host_cpi[key]) +')' for key in host_names],fontsize=fontsize)

    self.ax.set_xlim(left=time.min(),right=time.max())
    
    pcm = self.ax.pcolor(time, ycore, cpi,vmin=0.0,vmax=5.0)
    pcm.cmap = cm.get_cmap('jet_r')

    try: self.ax.set_title(self.k2[ts.pmc_type][0] +'/'+self.k2[ts.pmc_type][1] + '\n' + 
                           r'Mean(Std)='+'{0:.2f}'.format(mean_cpi)+r'({0:.2f})'.format(sqrt(var_cpi)))
    except: self.ax.set_title(self.k2[0] +'/'+self.k2[1] + '\n'+ 
                              r'$\bar{Mean}$='+'{0:.2f}'.format(mean_cpi)+r'$\pm$'+'{0:.2f}'.format(sqrt(var_cpi)))
    self.fig.colorbar(pcm)
    self.ax.set_xlabel('Time (hrs)')
    self.output('heatmap')
Ejemplo n.º 31
0
def more_constraint_stats(constraint_dist):
    nan_count = sum(math.isnan(x) for x in constraint_dist)
    one_count = constraint_dist.count(1)
    half_count = constraint_dist.count(0.5)
    filtered = [
        x for x in constraint_dist
        if (not math.isnan(x)) and x != 0.5 and x != 1
    ]
    return nan_count, one_count, half_count, stats.tmean(filtered), stats.tvar(
        filtered), stats.skew(filtered), stats.kurtosis(filtered)
Ejemplo n.º 32
0
def print_and_plot_results(count, results, verbose, plot_file_name):
    print("RPS calculated as 95% confidence interval")

    rps_mean_ar = []
    low_ar = []
    high_ar = []
    test_name_ar = []

    for test_name in sorted(results):
        data = results[test_name]
        rps = count / array(data)
        rps_mean = tmean(rps)
        rps_var = tvar(rps)
        low, high = norm.interval(0.95, loc=rps_mean, scale=rps_var**0.5)
        times = array(data) * 1000000 / count
        times_mean = tmean(times)
        times_stdev = tstd(times)
        print('Results for', test_name)
        print('RPS: {:d}: [{:d}, {:d}],\tmean: {:.3f} μs,'
              '\tstandard deviation {:.3f} μs'.format(int(rps_mean), int(low),
                                                      int(high), times_mean,
                                                      times_stdev))

        test_name_ar.append(test_name)
        rps_mean_ar.append(rps_mean)
        low_ar.append(low)
        high_ar.append(high)

        if verbose:
            print('    from', times)
        print()

    if plot_file_name is not None:
        import matplotlib.pyplot as plt
        from matplotlib import cm
        fig = plt.figure()
        ax = fig.add_subplot(111)
        L = len(rps_mean_ar)
        color = [cm.autumn(float(c) / (L - 1)) for c in arange(L)]
        bars = ax.bar(arange(L),
                      rps_mean_ar,
                      color=color,
                      yerr=(low_ar, high_ar),
                      ecolor='k')
        # order of legend is reversed for visual appeal
        ax.legend(reversed(bars), reversed(test_name_ar), loc='upper left')
        ax.get_xaxis().set_visible(False)
        plt.ylabel('Requets per Second', fontsize=16)
        print(plot_file_name)
        plt.savefig(plot_file_name, dpi=96)
        print("Plot is saved to {}".format(plot_file_name))
        if verbose:
            plt.show()
Ejemplo n.º 33
0
def confidence_interval(errors):
    # tvar is the sample variance
    from scipy.stats import norm, tvar
    import math

    mu = sum(errors) / float(len(errors))
    var = tvar(errors)
    std_dev = math.sqrt(var)
    std_error = std_dev / math.sqrt(len(errors))
    span_95 = norm.interval(0.95, loc=mu, scale=std_error)

    return span_95
def var_truncNormal(a, b, mu, sigma, data, mod=3000.0):

    x1 = (a - mu)/sigma * stats.norm.pdf(a, mu, sigma)
    x2 = (b - mu)/sigma * stats.norm.pdf(b, mu, sigma)

    cx = stats.norm.cdf(b, mu, sigma) - stats.norm.cdf(a, mu, sigma)

    yhat = stats.tvar(data, limits=[mu-mod, mu+mod], inclusive=(False, False))
    sigma2 = yhat/((1+(x1-x2)/cx - ((x1-x2)/cx)**2))
    sigma = scipy.sqrt(sigma2)

    return sigma
Ejemplo n.º 35
0
 def test_calculate_incremental_variance(self):
     control_sample = []
     sample = []
     var = 0
     for i in range(0, 20):
         for _ in range(0, 100):
             elem = random()
             sample.append(elem)
             control_sample.append(elem)
         var = self.stat.calculate_incremental_variance(sample)
         sample.clear()
     control = tvar(control_sample)
     self.assertAlmostEqual(var, control)
Ejemplo n.º 36
0
def learn(tableau_final, metric_seuil, time_max, learning_rate):
    sigma1 = np.sqrt(stats.tvar(data["GasCum360"]))
    sigma2 = np.sqrt(stats.tvar(data["OilCum360"]))
    t0 = time.time()
    u, v, x, y = 0.5, 0.5, 0.5, 0.5
    tableau_final["Gas360_SUP"] = tableau_final["GasCum360"] + u * sigma1
    tableau_final["Gas360_INF"] = tableau_final["GasCum360"] - v * sigma1
    tableau_final["Oil360_SUP"] = tableau_final["OilCum360"] + x * sigma2
    tableau_final["Oil360_INF"] = tableau_final["OilCum360"] - y * sigma2
    X, Y = [], []
    while metric(tableau_final) > metric_seuil:
        if time.time() - t0 > time_max:
            break
        u += learning_rate
        v += learning_rate
        x += learning_rate
        y += learning_rate
        tableau_final["Gas360_SUP"] = tableau_final["GasCum360"] + u * sigma1
        tableau_final["Gas360_INF"] = tableau_final["GasCum360"] - v * sigma1
        tableau_final["Oil360_SUP"] = tableau_final["OilCum360"] + x * sigma2
        tableau_final["Oil360_INF"] = tableau_final["OilCum360"] - y * sigma2
        m = metric(tableau_final)
        X.append(u)
        Y.append(m)
        print(u)
        print(m)
        if u > 1: break
    plt.plot(X, Y)
    plt.show()
    minY = min(Y)
    minX = 0
    for j, value in enumerate(Y):
        if value == minY: minX = X[j]
    print([minY, minX])
    tableau_final["Gas360_SUP"] = tableau_final["GasCum360"] + minX * sigma1
    tableau_final["Gas360_INF"] = tableau_final["GasCum360"] - minX * sigma1
    tableau_final["Oil360_SUP"] = tableau_final["OilCum360"] + minX * sigma2
    tableau_final["Oil360_INF"] = tableau_final["OilCum360"] - minX * sigma2
    return (minY, minX)
Ejemplo n.º 37
0
def random_sample(frame: pd, samples_total=300, sample_size=30, var=False):
    sample_stats = []
    for a_sample in range(samples_total):
        # pick random elts of sample_size and add to samples list
        samples = []
        for elt in range(sample_size):
            samples.append(frame.iloc[floor(frame.shape[0] *
                                            (random.random()))])

        if var:
            sample_stats.append(stats.tvar(samples))
        else:
            sample_stats.append(stats.tmean(samples))
    return sample_stats
Ejemplo n.º 38
0
def getLineScoreStats(df,lineScoreCol,histScoreCol,binNumber=50):
	'''Return a Dataframe of line score stats for each bin. Relevant
	one is probably the mean.'''
	D = {}
	binnedScores = binLineScore(df,lineScoreCol,histScoreCol,binNumber)
	for bin in binnedScores:
		L = binnedScores[bin]
		if len(L) <=1:
			mean,var,dev = L[0],0,0
			continue
		mean = stats.tmean(L)
		var = stats.tvar(L)
		stanD = stats.tstd(L)
		D[bin] = {"mean":mean,"var":var,"stanDev.": stanD}
	return pd.DataFrame(D).T
Ejemplo n.º 39
0
def calculateStats(data):
  """
    Calculate statistics on a numeric array data
    and return them in a dictionary
    @ In, data, list or numpy.array, the data
    @ Out, ret, dict, the dictionary containing the stats
  """
  ret = {}
  ret["mean"] = np.mean(data)
  ret["variance"] = np.var(data)
  ret["sampleVariance"] = stats.tvar(data)
  ret["stdev"] = stats.tstd(data)
  ret["skewness"] = stats.skew(data)
  ret["kurtosis"] = stats.kurtosis(data)
  return ret
def calc_channel_hist(img):
    chans = cv2.split(img)
    #print(chans)
    mean=[]
    kurtosis=[]
    variance=[]
    skew=[]
    for chan in chans:
        # Calculate the histogram
        histo = cv2.calcHist([chan],[0],None,[100],[0,256])
        # Normalize the histogram
        hist_length = sum(histo)
        hist = [float(h) / hist_length for h in histo]
        #print(ss.describe(hist))
        skew.append(sp.skew(hist)[0])
        kurtosis.append(sp.kurtosis(hist)[0])
        mean.append(sp.tmean(hist))
        variance.append(sp.tvar(hist))
    return mean,variance,kurtosis,skew
Ejemplo n.º 41
0
    def plotPDF(self):
        print "plot the PDF stuffs"
        figure(1)
        print shape(self.gather)
        for each in self.gather:
            print len(each)

        smoothness = 75
        kde = []
        distSpace = []
        p = []
        txt = []
        for i in range(3):
            kde.append(list())
            distSpace.append(list())
            p.append(list())
            txt.append(list())

        lbl = [5, 10, 15]
        distxx = [0.25, 0.50, .75]
        for kd, dS, gat, pl, lb in \
         zip(kde, distSpace,self.gather, p, lbl):
            kd = gaussian_kde(gat)
            dS = linspace(min(gat), max(gat), smoothness)
            pl = plt.plot(dS, kd(dS), label="%s units from source" % lb)

        title("Probability density function of plume down stream from source")

        mean = 12
        variance = 1
        sigma = np.sqrt(variance)
        x = linspace(9, 15, 100)
        plt.plot(x, mlab.normpdf(x, mean, sigma), label="normal distribution")

        for lb, t, gat, dis in zip(lbl, txt, self.gather, distxx):
            t = ("%s units from source:\nskew: %4.4f\nvariance: %4.4f" \
             %(lb, ss.skew(gat) , ss.tvar(gat)))

            xloc = xlim()[0] + 0.15 * diff(xlim())
            yloc = ylim()[0] + dis * diff(ylim())
            text(xloc, yloc, t)
        plt.legend()
        plt.show()
Ejemplo n.º 42
0
def calc_channel_hist(img):
    chans = cv2.split(img)
    #print(chans)
    mean = []
    kurtosis = []
    variance = []
    skew = []
    for chan in chans:
        # Calculate the histogram
        histo = cv2.calcHist([chan], [0], None, [100], [0, 256])
        # Normalize the histogram
        hist_length = sum(histo)
        hist = [float(h) / hist_length for h in histo]
        #print(ss.describe(hist))
        skew.append(sp.skew(hist)[0])
        kurtosis.append(sp.kurtosis(hist)[0])
        mean.append(sp.tmean(hist))
        variance.append(sp.tvar(hist))
    return mean, variance, kurtosis, skew
Ejemplo n.º 43
0
	def daubtran(self,event):
		h0=(1+m.sqrt(3))/(4*m.sqrt(2))
		h1=(3+m.sqrt(3))/(4*m.sqrt(2))
		h2=(3-m.sqrt(3))/(4*m.sqrt(2))
		h3=(1-m.sqrt(3))/(4*m.sqrt(2))
		
		g0 = h3
		g1 = -h2
		g2 = h1
		g3 = -h0

		a=self.current_signal_val
		n=len(self.current_signal_val)
		print self.current_plot1_txt
		if (n>=4):
			half = n >> 1
			tmp=[0]*n
			i=0
			j=0
			while (j<n-3):
				tmp[i]      = a[j]*h0 + a[j+1]*h1 + a[j+2]*h2 + a[j+3]*h3
				tmp[i+half] = a[j]*g0 + a[j+1]*g1 + a[j+2]*g2 + a[j+3]*g3
				j += 2
				i +=1
				tmp[i]      = a[n-2]*h0 + a[n-1]*h1 + a[0]*h2 + a[1]*h3
				tmp[i+half] = a[n-2]*g0 + a[n-1]*g1 + a[0]*g2 + a[1]*g3
		self.current_daub_value=tmp
		self.draw_plot2(self.current_daub_value,"daubechies plot")
		
		self.current_mean_value = np.mean(self.current_daub_value)
		self.current_median_value = np.median(self.current_daub_value)
		self.current_mode_value = int(st.mode(self.current_daub_value)[0])
		self.current_kurtosis_value = st.kurtosis(self.current_daub_value)
		self.current_skew_value = st.skew(self.current_daub_value)
		self.current_variance_value = st.tvar(self.current_daub_value)
		
		self.st3.SetLabel("mean: "+str(self.current_mean_value))
		self.st4.SetLabel("median: "+str(self.current_median_value))
		self.st5.SetLabel("mode: "+str(self.current_mode_value))
		self.st6.SetLabel("kurtosis: "+str(self.current_kurtosis_value))
		self.st7.SetLabel("skew: "+str(self.current_skew_value))
		self.st8.SetLabel("variance: "+str(self.current_variance_value))
Ejemplo n.º 44
0
def info(var_list,df):
    info_var = dict()
    mean = []
    Var = []
    mode = []
    range_ = []
    for v in var_list:
        mean.append((df[v].mean()))
        Var.append(stats.tvar(df[v]))
        mode.append(stats.mode(df[v]))
        range_.append([df[v].min(), df[v].max()])
        

    info_var['Mean'] = mean
    info_var['Var'] = Var
    info_var['Mode'] = mode
    info_var['Range'] = range_
    
    return pd.DataFrame(info_var,
            index = var_list)
Ejemplo n.º 45
0
def autocorr(x, k, SE=False):
    c0 = stats.tvar(x) #sample variance
    mu = stats.tmean(x) #sample mean
    r_arr = [1]
    
    T = float(len(x))
    
    for j in range(1, k+1):
        T1 = int(T-j)
        cj = 0.0
        for i in xrange(T1):
            cj += (x[i] - mu)*(x[i+j]-mu)
        cj = cj/T
        rj = cj/c0
        r_arr.append(rj)
    SEk = autocorrSE(r_arr, k, T)
    tval = r_arr[-1]/SEk
    pval = 1 - stats.norm.cdf(tval)
    if SE:
        return r_arr, SEk
    else:
        return r_arr, pval
Ejemplo n.º 46
0
    def plotPDFandData(self):

        smoothness = 75
        kde7 = gaussian_kde(self.seventeens)
        dist_space7 = np.linspace(min(self.seventeens), max(self.seventeens),
                                  smoothness)
        p7 = plt.plot(dist_space7,
                      kde7(dist_space7),
                      label="10 units from source")

        pReal = plt.hist(self.seventeens, 200)

        txt7 = ("seventeens:\nskew: %4.4f\nkurtosis: %4.4f\nvariance: %4.4f" %
                (ss.skew(self.seventeens), ss.kurtosis(
                    self.seventeens), ss.tvar(self.seventeens)))
        xloc = xlim()[0] + 0.15 * np.diff(xlim())

        yloc = ylim()[0] + 0.50 * diff(ylim())
        text(xloc, yloc, txt7)

        plt.legend()
        plt.show()
Ejemplo n.º 47
0
K  = 1
Asian_Price = 0.0
dt = 0.0001
asian_prices = []

for i in range(n):
	S = S0
	S_a = S0
	aver = S0
	Ka = S0

	for j in range(0,T*10000):
		mean = (r-0.5*(o**2))
		rand = random.gauss(mean*dt, o*math.sqrt(dt))
		S =    S*math.exp(rand)
		S_a =  S_a*math.exp((2*mean*dt)-rand)
		aver = (S+S_a)/2.0
		Ka = Ka+aver
	Ka = Ka/(T/dt)
	if Ka < aver:
		Asian_Price = Asian_Price + (aver-Ka)*math.exp(-r*T)
		asian_prices.append((aver-Ka)*math.exp(-r*T))
	else:
		asian_prices.append(0.0)
	if i%100 == 0:
		print i

print "dt = 0.0001 uSa = "+str(Asian_Price/n)
print "mean = "+str(stats.tmean(asian_prices))
print "error = "+str(math.sqrt(stats.tvar(asian_prices))/math.sqrt(float(n)))
Ejemplo n.º 48
0
 def _badPixMap(self,clip=30,filename='badpix.dmp'):
     median = np.median(self.image)
     var  = tvar(self.image,(-100,100))
     self.badpix = ma.masked_greater(self.image-median,clip*np.sqrt(var))
     if filename is not None:
         self.badpix.dump(filename)
Ejemplo n.º 49
0
def PDM(times, fluxes, frequencies, numberOfBins = 10, binWidth = 0.1):
    """
    Perform phase dispersion minimization.  Need to add option for flux errors.
    """

    # Offset time array to make t0 = 0
    zeroPoint = times[0]
    times -= zeroPoint

    # Total number of data points, frequencies
    numberOfData = len(times)
    numberOfFrequencies = len(frequencies)

    # Calculate width used to center the bins
    widthForCenter = 1/float(numberOfBins)

    dispersions = np.zeros(len(frequencies))

    # Loop through total number of frequencies
    for iFrequency in range(numberOfFrequencies):

        # Initialize array for number of points in bin, may need to place in loop
        numPoints = np.zeros(numberOfBins)
        binVariance = np.zeros(numberOfBins)

        # Convert times to phase folded on frequencies[iFrequency], sort times, fluxes, fluxErrors
        sortedPhases, sortedFluxes = FoldTimes(times, fluxes, frequencies[iFrequency])

        overallVariance = stats.tvar(sortedFluxes)

        # Loop through total number of bins
        for iBin in range(numberOfBins):         

            # Use 'binWidth' to determine the min/max values of the bin
            binCenter = (iBin+1)*widthForCenter - 0.5*widthForCenter
            binMin = binCenter - 0.5*binWidth
            binMax = binCenter + 0.5*binWidth

            # Pick out fluxes that have associated phase between binMin and binMax
            # Account for bins with phases < 0 and > 1
            sample = sortedFluxes[np.where(np.logical_or(np.logical_or(np.logical_and(sortedPhases < binMax, sortedPhases >= binMin),np.logical_and(sortedPhases - 1 < binMax, sortedPhases - 1 >= binMin)),np.logical_and(sortedPhases + 1 < binMax, sortedPhases + 1 >= binMin)))]
            numPoints[iBin] = len(sample)

            # Calculate the variances of individual bins
            if numPoints[iBin] > 1:
                binVariance[iBin] = stats.tvar(sample)
            else:
                binVariance[iBin] = 0.
 
        # Calculate overall variance for samples
        numerator = 0.
        denominator = 0.    
        for iBin in range(numberOfBins):
            numerator += (float(numPoints[iBin])-1)*binVariance[iBin]
            denominator += float(numPoints[iBin])
        denominator -= numberOfBins        
        sampleVariance = numerator/denominator

        # Calculate dispersion measure
        dispersions[iFrequency] = sampleVariance/overallVariance

    return dispersions
Ejemplo n.º 50
0
		if rules is 1:
			profit_list,profit,status_list,entry_list,exit_list,entry_price_list,exit_price_list= trade_stock_1(mv_cp[choice], m_op.data[choice], m_cp.data[choice], varloss, vargain, N=N, alpha=alpha)
		elif rules is 2:
			profit_list,profit,status_list,entry_list,exit_list,entry_price_list,exit_price_list= trade_stock_2(mv_cp[choice], m_op.data[choice], m_cp.data[choice], vargain, N=N, alpha=alpha)
		elif rules is 3:
			profit_list,profit,status_list,entry_list,exit_list,entry_price_list,exit_price_list= trade_stock_3(mv_cp[choice], m_op.data[choice], m_cp.data[choice], varloss, N=N, alpha=alpha)
		elif rules is 4:
			profit_list,profit,status_list,entry_list,exit_list,entry_price_list,exit_price_list= trade_stock_4(mv_cp[choice], m_op.data[choice], m_cp.data[choice], N=N, alpha=alpha)
		total_profit[choice] = profit
		temp = [i for i in status_list if i is not "none"]
		if len(temp) != len(profit_list):
			temp = temp[0:len(temp)-1]
		no_profit_trade[choice] = len([i for i in profit_list if i>0]) 
		no_loss_trade[choice] = len(profit_list)-no_profit_trade[choice]
		no_long_trade[choice] = len([i for i in temp if i is "long"])
		no_short_trade[choice] = len([i for i in temp if i is "short"])
		no_long_profit[choice] = len([i for i in range(0,len(temp)) if (temp[i] is "long")&(profit_list[i]>0)])
		no_short_profit[choice] = len([i for i in range(0,len(temp)) if (temp[i] is "short")&(profit_list[i]>0)])
		no_long_loss[choice] = no_long_trade[choice] - no_long_profit[choice] 
		no_short_loss[choice] = no_short_trade[choice] - no_short_profit[choice] 
		test_stats[choice] = sps.tmean(profit_list)*len(profit_list)/np.power(sps.tvar(profit_list), 0.5)
	end = time.time()
	duration = end-start
	print("The total time is {0} minutes".format(duration/60))
	allresult = np.c_[total_profit, no_profit_trade, no_loss_trade, no_long_trade, no_short_trade, no_long_profit, no_short_profit, no_long_loss, no_short_loss, test_stats]
	s = "\n".join([m_cp.header[j]+","+",".join(["{0}".format(i) for i in allresult[j]]) for j in range(0,K)])
	s = ",total_profit,no_profit_trade,no_loss_trade,no_long_trade, no_short_trade,no_long_profit, no_short_profit, no_long_loss, no_short_loss, test_stats\n"+s
	f = open(filename, "w")
	f.write(s)
	f.close()
Ejemplo n.º 51
0
def cross_validation(transactions, sample_pct=0.50, support=-3, all_frequent_items=None):
    from fim import fpgrowth
    """
    Cross validation, 'old' version not using compatct
    triangle representation from Forward.
    """
    # init
    _id = str(time()).replace('.','')
    # if all_frequent_items is None:
    #     all_frequent_items = fpgrowth(transactions, supp=support, min=1, max=3)

    cv_start = time()
    print "\n### Running cross validation {}###".format(_id)
    print "Total transactions:{}".format(len(transactions))
    # print "Total frequest items:{}".format(len(all_frequent_items))

    # run results
    avg_errors = []
    var_errors = []

    # all_triangles, all_triples = filter_items(all_frequent_items)

    for chunk, index, rest in chunks(transactions, int(len(transactions) * sample_pct)):# TODO insert proper sampling

        all_frequent_items = fpgrowth(rest, supp=support, min=1, max=3)
        all_triangles, all_triples = Forward.forward(all_frequent_items)

        # Get triples for estimates
        frequent_items = fpgrowth(chunk, supp=support, min=1, max=3)
        if len(frequent_items) > 0:
            print 'frequent items: {}'.format(len(frequent_items))
        else:
            print 'No frequent items in chunk: {}'.format(index)
            continue
        triangles, triples = Forward.forward(frequent_items)
        print 'triangles: {}'.format(len(triangles))

        estimates = []
        observations = []
        abs_errors = []
        max_est = 0
        max_obs = 0

        for (s1, s2, s3, s12, s23, s13, s123) in triangles:

            # if s123[1] != 0:
            #     continue
            # maxent estimate from the sample.
            # Index [1] of the tuples hold the # occurences in the sample
            est = ent.maxent_est_rosa(s1[1], s2[1], s3[1], s12[1], s23[1], s13[1], float(len(transactions)-len(chunk)), num=int(math.log(len(transactions), 2))+1)

            # maxumum estiamte seen (for plotting)
            max_est = max(max_est, est)

            # record the estimate
            estimates.append(est)

            # from all observed triples get the actual observed number of triples
            observed = 0
            if all_triples.has_key(s123[0]):
                observed = all_triples[s123[0]]

            # maximum observation of the triple (for plotting)
            max_obs = max(max_obs, observed)

            # record the observed
            observations.append(observed)

            # record abs error
            error = abs(obs-est) / float(obs) * 100
            abs_errors.append(error)



        if len(abs_errors) > 0: #TODO handle this, probably when nothing has been found
            # evaluation
            min_error = min(abs_errors)
            max_error = max(abs_errors)
            avg_error = sum(abs_errors) / float(len(abs_errors))
            avg_errors.append(avg_error)
            var_error = 0
            if len(abs_errors) > 1:
                var_error = tvar(abs_errors) #tvar is the sample variance
            var_errors.append(var_error)

            # TODO histogram of the average errors. max-ent, extrapolation, heurestic
            # TODO print average error og the average errors to the log.

            res_string = "\nResult:\nSample size:{} min_error:{} max_error:{} avg_error:{} var_error:{}".format(len(chunk), min_error, max_error, avg_errors[-1], var_error)
            print res_string
        else:
            print 'No abs errors!'

    print "Cross validation done!"
    print "time: ", (time() - cv_start)
    total_avg_error = sum(avg_errors)/float(len(avg_errors))
    total_res_string = "Avg error:{}".format(total_avg_error)
    return path
Ejemplo n.º 52
0
#!/usr/bin/env python
#-*- coding:utf8 -*-

''' TODO 理解这一段话
NumPy是一个定义了数值数组和矩阵类型和它们的基本运算的语言扩展。
SciPy是另一种使用NumPy来做高等数学、信号处理、优化、统计和许多其它科学任务的语言扩展。
Matplotlib是一个帮助绘图的语言扩展。
'''

# 我们来搞定科学计算

import numpy
from scipy import stats

XXX_ar = stats.pearsonr([XXX])
print stats.tvar(XXX_ar), stats.tstd(XXX_ar), stats.tmean(XXX_ar)

# pearson product moment efficent
print stats.pearsonr(XXX_LISTA, XXX_LISTB)


print numpy.log2(1024)
print numpy.log10(0)
print numpy.log(XXX)  #it's ln

print numpy.exp(1)

print numpy.e, numpy.pi


Ejemplo n.º 53
0
 def test_tvarX(self):
     y = stats.tvar(X, (2, 8), (True, True))
     assert_almost_equal(y, 4.6666666666666661)
Ejemplo n.º 54
0
Description:              Source code 1 on a course of bootstrapping. It demonstrates how to estimate mean and its standard error by bootstrapping. 

"""
###########################################################################################################################################################################
import numpy as np
import numpy.random as npr
import scipy.stats as sps
import scipy as sp
import matplotlib.pyplot as plt
import pandas as pd

N = 1000 #Initial sample size
B = 500 #number of bootstrap sample ie replication. 
m = 3 #True mean of the data
s = 2 #True standard deviation of the data. 
data = sps.norm.rvs(size=N, loc=m, scale=s) #generating the random sample from normality. 
mhat = sps.tmean(data) #calculate sample mean estimate. 
shat2 = sps.tvar(data) #calcualte sample variance estimate. 
bootsample = [npr.choice(data,size=N,replace=True) for i in range(0,B)] #generate B bootstrap samples. 
bootmean = [sps.tmean(j) for j in bootsample]
plt.hist(bootmean,bins=np.floor(B/10))
plt.show()
columns = ['True', 'Estimated', 'Bootstrap']
index = ['mean', 'variance']
result = [ [m,mhat,sps.tmean(bootmean)], [np.power(s,2)/N, shat2/N, sps.tvar(bootmean)]]
result = np.array(result)
resultpd = pd.DataFrame(result, columns=columns, index=index)
print(resultpd)


Ejemplo n.º 55
0
N = 1000 #sample size
B = 500 #bootstrap sample size ie number of replication
a = 1  #true interpcept
b = 0.5 #true slope
s = 0.4 #true variance
e = sps.norm.rvs(size=N,loc=0, scale=s) #simulating residual vector
x = sps.norm.rvs(size=N,loc=2,scale=1) #simulating explanatory variable
y = a+b*x+e #constructing dependent variable
m = np.c_[y,x] #constructing dataset
ols_main = lm.lm('y~c+x', data=m, header=['y','x']) #estimating linear regression based on simulated dataset
ols_main.estimate()
coef = np.zeros((B,2)) #initiate vector to store bootstrapped coefficient estiamtes. 
for j in range(0,B):
    index = npr.choice(range(0,N), size=N, replace=True) #construct index set for bootstrap sample. 
    bootsample = m[list(index)] #extract bootstrap sample. 
    ols_temp = lm.lm('y~c+x', data=bootsample, header=['y','x']) #estimate regression based on bootstrap sample
    ols_temp.estimate()
    coef[j] = ols_temp.coef.reshape((1,2)) #store bootstrap estimate
####################################Calculate the true variance-covariance matrix of the OLS estimate###########################
tempx = np.c_[np.ones(N), x]
cov = np.power(s,2)*np.linalg.inv(np.dot(tempx.transpose(), tempx))
truecov = np.diag(cov)
###############################################################################################################################
summary = np.c_[truecov.reshape((2,1)), np.diag(ols_main.cov).reshape((2,1)), np.array([sps.tvar(coef[:,i]) for i in range(0,2)]).reshape((2,1))]
summary = np.r_[np.c_[np.r_[a,b], ols_main.coef.reshape((2,1)), np.array([sps.tmean(coef[:,i]) for i in range(0,2)]).reshape((2,1))], summary]
header = ['Theoretical', 'Sample Estimate', 'Bootstrap Estiamte']
labelx = ['a','b','var a', 'var b']
result = pd.DataFrame(summary, columns=header, index = labelx)
print(result)

Ejemplo n.º 56
0
def cross_validation_compact(transactions, sample_pct=0.50, support=-3, all_frequent_items=None):
    from fim import fpgrowth
    """
    Cross validation. Using compact representation from
    Forward.
    """
    # init
    _id = str(time()).replace('.','')
    # if all_frequent_items is None:
    #     all_frequent_items = fpgrowth(transactions, supp=support, min=1, max=3)

    cv_start = time()
    print "\n### Running cross validation {}###".format(_id)
    print "Total transactions:{}".format(len(transactions))
    # print "Total frequest items:{}".format(len(all_frequent_items))

    # run results
    avg_errors = []
    var_errors = []

    # all_triangles, all_triples = filter_items(all_frequent_items)

    for chunk, index, rest in chunks(transactions, int(len(transactions) * sample_pct)):# TODO insert proper sampling

        all_frequent_items = fpgrowth(rest, supp=support, min=1, max=3)
        all_triangles, all_triples = Forward.forward_compact(all_frequent_items)

        # Get triples for estimates
        frequent_items = fpgrowth(chunk, supp=support, min=1, max=3)
        if len(frequent_items) > 0:
            print 'frequent items: {}'.format(len(frequent_items))
        else:
            print 'No frequent items in chunk: {}'.format(index)
            continue
        triangle_tree, triples = Forward.forward_compact(frequent_items)
        print 'triangle roots: {}'.format(len(triangle_tree))

        estimates = []
        observations = []
        abs_errors = []
        max_est = 0
        max_obs = 0

        # DFS of the tree holding all triangles
        for n1 in triangle_tree.keys():
            s1, s2_dict = triangle_tree[n1]
            for n2 in s2_dict.keys():
                s2, s12, s3_dict = s2_dict[n2]
                for n3 in s3_dict.keys():
                    s3, s13, s23, s123 = s3_dict[n3]

                    est = ent.maxent_est_rosa(s1, s2, s3, s12, s23, s13, float(len(transactions)-len(chunk)), num=int(math.log(len(transactions), 2))+1)

                    # maxumum estiamte seen (for plotting)
                    max_est = max(max_est, est)

                    # record the estimate
                    estimates.append(est)

                    # from all observed triples get the actual observed number of triples
                    observed = 0
                    if all_triples.has_key((n1, n2, n3)):
                        observed = all_triples[(n1, n2, n3)]

                    # maximum observation of the triple (for plotting)
                    max_obs = max(max_obs, observed)

                    # record the observed
                    observations.append(observed)

                    # record abs error
                    error = abs(obs-est) / float(obs) * 100
                    abs_errors.append(error)


        if len(abs_errors) > 0: #TODO handle this, probably when nothing has been found
            # evaluation
            min_error = min(abs_errors)
            max_error = max(abs_errors)
            avg_error = sum(abs_errors) / float(len(abs_errors))
            avg_errors.append(avg_error)
            var_error = 0
            if len(abs_errors) > 1:
                var_error = tvar(abs_errors) #tvar is the sample variance
            var_errors.append(var_error)

            res_string = "\nResult:\nSample size:{} min_error:{} max_error:{} avg_error:{} var_error:{}".format(len(chunk), min_error, max_error, avg_errors[-1], var_error)
            print res_string
        else:
            print 'No abs errors!'

    print "Cross validation done!"
    print "time: ", (time() - cv_start)
    total_avg_error = sum(avg_errors)/float(len(avg_errors))
    total_res_string = "Avg error:{}".format(total_avg_error)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.")
    parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;")
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help="Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values."
    )
    parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used")
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help="if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored")
    parser.add_argument(
        "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored"
    )
    parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored")
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help="If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ")
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help="the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram")
    parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction")
    parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds")
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help="lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e")
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols != None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols != None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols != None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias)
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one), correction=args.correction, lambda_=args.lambda_
            )
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf is 0 and mf is 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf is 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf is 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf is 0 and mf is 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf is 0 and mf is 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf is 0 and mf is 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf is 0 and mf is 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf is 0 and mf is 0:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf is 0 and mf is 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf is 0 and mf is 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail)
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf is 0 and mf is 0:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf is 0 and mf is 0:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf is 0 and mf is 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda is 0:
                box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one), map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two)
            )
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(
                map(float, sample_one), map(float, sample_two), equal_var=args.equal_var
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one), zero_method=args.zero_method, correction=args.correction
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one), method=args.med, weights=map(float, sample_two)
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples
            )
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()
Ejemplo n.º 58
0
def getStdEstimation(samples):
    return math.sqrt(tvar(samples))
Ejemplo n.º 59
0
 def get_pooled_standard_error(cls, *args):
     """
     Get the pooled standard error of the groups
     """
     try: return sum(len(a)*stats.tvar(a) for a in args)/float(sum(len(a)-1 for a in args))
     except: raise TypeError('Expected only lists or tuples')