def test_variation(self): for n in self.get_n(): x, y, xm, ym = self.generate_xy_sample(n) assert_almost_equal(stats.variation(x), stats.mstats.variation(xm), decimal=12) assert_almost_equal(stats.variation(y), stats.mstats.variation(ym), decimal=12)
def collect_cv_Alternative(spike_data): ''' Input => Spike data for each cell [[spike_amp, spike_peak, spike_right_edge]...]] Output => Appended list of CV for all the cells ''' a = spike_data amp_collect = [] cv_collect = [] for i in range(len(a)): if i > 0: #Collect the previous list of amplitudes and calculate CV if amp_collect == []: #the cell has empty array, no amplitudes are collected cv_collect.append(0) else: cv_collect.append(stats.variation(amp_collect)) #Calculate CV amp_collect = [ ] #Collect the next cell amplitudes with empty list if len(a[i]) != 0: for j in range(len(a[i])): #Specified cell if a[i][j] != []: for k in range(len(a[i][j])): if a[i][j][k] != []: for l in range(len(a[i][j][k])): if l == 0: amp_collect.append( a[i][j][k] [l]) #Append the list with amplitudes if i == 24 and j == 4: cv_collect.append( stats.variation(amp_collect)) return cv_collect
def fit_CpoR(self, T, CpoR): """ Fits parameters a1 - a6 using dimensionless heat capacity and temperature. Parameters ---------- T - (N,) ndarray Temperatures (K) to fit the polynomial CpoR - (N,) ndarray Dimensionless heat capacities that correspond to T array """ #If the Cp/R does not vary with temperature (occurs when no vibrational frequencies are listed) if (np.mean(CpoR) < 1e-6 and np.isnan( variation(CpoR))) or variation(CpoR) < 1e-3 or all( np.isnan(CpoR)): self.T_mid = T[int(len(T) / 2)] self.a_low = np.array(7 * [0.]) self.a_high = np.array(7 * [0.]) else: max_R2 = -1 R2 = np.zeros(len(T)) for i, T_mid in enumerate(T): #Need at least 5 points to fit the polynomial if i > 5 and i < (len(T) - 6): #Separate the temperature and heat capacities into low and high range (R2[i], a_low, a_high) = self._get_CpoR_R2(T, CpoR, i) max_R2 = max(R2) max_i = np.where(max_R2 == R2)[0][0] (max_R2, a_low_rev, a_high_rev) = self._get_CpoR_R2(T, CpoR, max_i) empty_arr = np.array([0.] * 2) self.T_mid = T[max_i] self.a_low = np.concatenate((a_low_rev[::-1], empty_arr)) self.a_high = np.concatenate((a_high_rev[::-1], empty_arr))
def _fit_CpoR(T, CpoR, units): """Fit a[0]-a[4] coefficients given the dimensionless heat capacity data Parameters ---------- T : (N,) `numpy.ndarray`_ Temperatures in K CpoR : (N,) `numpy.ndarray`_ Dimensionless heat capacity units : str Units corresponding to Shomate polynomial. Units should be supported by :class:`~pmutt.constants.R`. Returns ------- a : (8,) `numpy.ndarray`_ Lower coefficients of Shomate polynomial .. _`numpy.ndarray`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html """ # If the Cp/R does not vary with temperature (occurs when no # vibrational frequencies are listed), return default values if (np.isclose(np.mean(CpoR), 0.) and np.isnan(variation(CpoR))) \ or np.isclose(variation(CpoR), 0.) \ or any([np.isnan(x) for x in CpoR]): return np.zeros(7) else: # Pass the unit set adj_shomate_CpoR = lambda T, A, B, C, D, E: _shomate_CpoR( T=T, A=A, B=B, C=C, D=D, E=E, units=units) [a, _] = curve_fit(adj_shomate_CpoR, T, np.array(CpoR)) a = np.append(a, [0., 0., 0.]) return a
def SPAEF(s, o): #remove NANs s, o = filter_nan(s, o) bins = np.around(math.sqrt(len(o)), 0) #compute corr coeff alpha = np.corrcoef(s, o)[0, 1] #compute ratio of CV beta = variation(s) / variation(o) #compute zscore mean=0, std=1 o = zscore(o) s = zscore(s) #compute histograms hobs, binobs = np.histogram(o, bins) hsim, binsim = np.histogram(s, bins) #convert int to float, critical conversion for the result hobs = np.float64(hobs) hsim = np.float64(hsim) #find the overlapping of two histogram minima = np.minimum(hsim, hobs) #compute the fraction of intersection area to the observed histogram area, hist intersection/overlap index gamma = np.sum(minima) / np.sum(hobs) #compute SPAEF finally with three vital components spaef = 1 - np.sqrt((alpha - 1)**2 + (beta - 1)**2 + (gamma - 1)**2) return spaef, alpha, beta, gamma
def point_filter(p): # not near any existing points for q in existing_points: if (q[0] - p[0]) ** 2 + (q[1] - p[1]) ** 2 < r_sq: return False c, r = int(p[0]), int(p[1]) # cannot be near the edge of the image if (c < side_thresh or r < side_thresh or cols - c < side_thresh or rows - r < side_thresh): return False # cannot be super bright or dark if not (0.02 < np.mean(image[r, c, :]) < 0.98): return False # cannot be on an edge r0, r1 = max(r - edge_window, 0), min(r + edge_window + 1, rows) c0, c1 = max(c - edge_window, 0), min(c + edge_window + 1, cols) if np.any(image_canny[r0:r1, c0:c1]): return False # chromaticity coefficient of variation cannot be too high # (cv = std / mean) chroma_cv = 0.5 * ( variation(image_lab[r0:r1, c0:c1, 1], axis=None) + variation(image_lab[r0:r1, c0:c1, 2], axis=None) ) return chroma_cv < 0.50
def read_output(ndata, ntimes, Nt, ngx, ngy): y = np.full( (ndata,ntimes,ngx,ngy), 0.0) for i in range(1, ndata + 1): for j in range(1, ntimes + 1): y[i-1, j-1, :, :] = np.loadtxt("output/conc_{}_t_{}.dat".format(i,j)) y = np.where(y>0.0,y,0.0) y0 = y[:,:Nt] # id0 = y0.nonzero() # y0 = y0[id0] y1 = y[:,Nt:] # id1 = y1.nonzero() # y1 = y1[id1] # y0_mean = np.average(y0) # y1_mean = np.average(y1) y_cov0 = variation(y0,axis=None) y_cov1 = variation(y1,axis=None) # print("cov0: {}".format(y_cov0)) # print("cov1: {}".format(y_cov1)) weight = y_cov0 / y_cov1 weight = 5 print("weight:{}".format(weight)) with open("weight.txt", "w") as text_file: text_file.write("%f" % weight) return weight
def _fit_CpoR(T, CpoR): """Fit a[0]-a[4] coefficients given the dimensionless heat capacity data Parameters ---------- T : (N,) `numpy.ndarray`_ Temperatures in K CpoR : (N,) `numpy.ndarray`_ Dimensionless heat capacity Returns ------- a : (8,) `numpy.ndarray`_ Lower coefficients of Shomate polynomial .. _`numpy.ndarray`: https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.ndarray.html """ # If the Cp/R does not vary with temperature (occurs when no # vibrational frequencies are listed), return default values if (np.isclose(np.mean(CpoR), 0.) and np.isnan(variation(CpoR))) \ or np.isclose(variation(CpoR), 0.) \ or any([np.isnan(x) for x in CpoR]): return np.zeros(7) else: [a, _] = curve_fit(_shomate_CpoR, T, np.array(CpoR)) a = np.append(a, [0., 0., 0.]) return a
def get_score(a, maxvar): a1 = [float(x) for x in a[1].split(";")] a2 = [float(x) for x in a[2].split(";")] var1, var2, var3 = variation(a1), variation(a2), variation([sum(a1), sum(a2)]) varscore = get_varscore(var1, var2, var3, maxvar) change = abs(float(a[3])) expression = (sum(a1) + sum(a2))/(len(a1) + len(a2)) return (change**2)*expression*varscore
def test_neg_inf(self): # Edge case that produces -inf: ddof equals the number of non-nan # values, the values are not constant, and the mean is negative. x1 = np.array([-3, -5]) assert_equal(variation(x1, ddof=2), -np.inf) x2 = np.array([[np.nan, 1, -10, np.nan], [-20, -3, np.nan, np.nan]]) assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'), [-np.inf, -np.inf])
def test_variation_ddof(self): # test variation with delta degrees of freedom # regression test for gh-13341 a = np.array([1, 2, 3, 4, 5]) nan_a = np.array([1, 2, 3, np.nan, 4, 5, np.nan]) y = variation(a, ddof=1) nan_y = variation(nan_a, nan_policy="omit", ddof=1) assert_allclose(y, np.sqrt(5 / 2) / 3) assert y == nan_y
def test_mean_zero(self): # Check that `variation` returns inf for a sequence that is not # identically zero but whose mean is zero. x = np.array([10, -3, 1, -4, -4]) y = variation(x) assert_equal(y, np.inf) x2 = np.array([x, -10 * x]) y2 = variation(x2, axis=1) assert_equal(y2, [np.inf, np.inf])
def SPAEF(data1, data2, bins): A = np.corrcoef(data1, data2)[0, 1] B = variation(data1) / variation(data2) data1, data2 = zscore(data1), zscore(data2) h1, _ = np.histogram(data1, bins) h2, _ = np.histogram(data2, bins) h1, h2 = np.float64(h1), np.float64(h2) minima = np.minimum(h1, h2) C = np.sum(minima) / np.sum(h1) return 1 - np.sqrt((1 - A)**2 + (1 - B)**2 + (1 - C)**2)
def record_statistics(self): """ record data as system runs forward in time """ wealth = [] prices = [] for firm in self.economy.firms_list: wealth.append(firm.wealth) prices.append(firm.price) self.wealth.append(sum(wealth)) self.wealth_cv.append(stats.variation(wealth)) self.prices_mean.append(np.mean(prices)) self.prices_cv.append(stats.variation(prices))
def curvature_period_features( AMSD, vertices, signal_smooth, peaks, valleys ): # mean and S.D. coefficient of variation of curvatures at vertices """ mean of curvatures (d2x/dt2) at vertices S.D. of curvatures at vertices coefficient of variation of curvatures at vertices vertex counts/sec S.D. of vertex-to-vertex period coefficient of variation of vertex-to-vertex period count of mean crossings/sec (hysteresis = 25% of AMSD) """ dx_dt = np.gradient(peaks) dy_dt = np.gradient(signal_smooth[peaks]) d2x_dt2 = np.gradient(dx_dt) d2y_dt2 = np.gradient(dy_dt) peak_curvature = np.abs(d2x_dt2 * dy_dt - dx_dt * d2y_dt2) / ( dx_dt * dx_dt + dy_dt * dy_dt)**1.5 dx_dt = np.gradient(valleys) dy_dt = np.gradient(signal_smooth[valleys]) d2x_dt2 = np.gradient(dx_dt) d2y_dt2 = np.gradient(dy_dt) valley_curvature = np.abs(d2x_dt2 * dy_dt - dx_dt * d2y_dt2) / ( dx_dt * dx_dt + dy_dt * dy_dt)**1.5 mean_curv_pos_over_mean_curv_neg = peak_curvature.mean( ) / valley_curvature.mean() dx_dt = np.gradient(vertices) dy_dt = np.gradient(signal_smooth[vertices]) d2x_dt2 = np.gradient(dx_dt) d2y_dt2 = np.gradient(dy_dt) curvature = np.abs(d2x_dt2 * dy_dt - dx_dt * d2y_dt2) / (dx_dt * dx_dt + dy_dt * dy_dt)**1.5 seconds = len(signal_smooth ) / 2000.0 # 2000hz sample rate and signal is 4400 samples vertices_per_second = len(signal_smooth[vertices] / seconds) selected_period = np.array(vertices[::2]) vertices_period = np.subtract(selected_period[1::], selected_period[:-1:]) hysteresis = abs(0.25 * AMSD) mean = signal_smooth.mean() shifted_signal = signal_smooth - mean hysterisized_signal = hyst(shifted_signal, -hysteresis, hysteresis) zero_crossing_count = (np.diff(hysterisized_signal) != 0).sum() CTMXMN = zero_crossing_count / seconds return np.array([ curvature.mean(), curvature.std(), variation(curvature), vertices_per_second, vertices_period.std(), variation(vertices_period), CTMXMN, mean_curv_pos_over_mean_curv_neg ])
def get_variation(img): path = images_path + img img = cv2.imread(path) hist_b = cv2.calcHist([img], [0], None, [256], [0, 256]) hist_g = cv2.calcHist([img], [1], None, [256], [0, 256]) hist_r = cv2.calcHist([img], [2], None, [256], [0, 256]) var_b = variation(hist_b) var_g = variation(hist_g) var_r = variation(hist_r) return [var_r[0], var_g[0], var_b[0]]
def cal_vc(data): """ Calculate variation coefficient Args: data: dataset Returns: variation coefficient score """ from scipy import stats # mats = np.zeros((1,len(data))) mats[0,1] = sum(data) max_v = stats.variation(mats.ravel()) min_v = 0 rst = (stats.variation(data) - min_v) / (max_v - min_v) return rst
def rateSampleByVariation(chunks): """ Rates an audio sample using the coefficient of variation of its short-term energy. """ energy = [shortTermEnergy(chunk) for chunk in chunks] return stats.variation(energy)
def test_propagate_nan(self): # Check that the shape of the result is the same for inputs # with and without nans, cf gh-5817 a = np.arange(8).reshape(2, -1).astype(float) a[1, 0] = np.nan v = variation(a, axis=1, nan_policy="propagate") assert_allclose(v, [np.sqrt(5 / 4) / 1.5, np.nan], atol=1e-15)
def compute_coef_var(image, x_start, x_end, y_start, y_end): """ Compute coefficient of variation in a window of [x_start: x_end] and [y_start:y_end] within the image. """ if x_start < 0: raise Exception('ERROR: x_start must be >= 0.') if y_start < 0: raise Exception('ERROR: y_start must be >= 0.') x_size, y_size = image.shape x_overflow = x_end > x_size y_overflow = y_end > y_size if x_overflow: raise Exception('ERROR: invalid parameters cause x window overflow.') if y_overflow: raise Exception('ERROR: invalid parameters cause y window overflow.') window = image[x_start:x_end, y_start:y_end] coef_var = variation(window, None) if not coef_var: # dirty patch coef_var = 0.01 # print "squared_coef was equal zero but replaced by %s" % coef_var if coef_var <= 0: raise Exception('ERROR: coeffient of variation cannot be zero.') return coef_var
def get_statistics(self, vec): expStats = { # "position": { "N": len(vec), "mean": np.mean(vec), "median": np.median(vec), "q1": np.percentile(vec, 25), "q3": np.percentile(vec, 75), # }, # "spread": { "range": np.ptp(vec), "variance": np.var(vec), "std": np.std(vec), "iqr": stats.iqr(vec), "mad": stats.median_absolute_deviation(vec), "cv": stats.variation(vec), "mad/median": stats.median_absolute_deviation(vec)/np.median(vec), "iqr/median": stats.iqr(vec)/np.median(vec), # }, # "distribution": { # "log_histogram": np.histogram(np.log(vec)) # } } for stat in expStats: expStats[stat] = round(expStats[stat], 2) return expStats
def _aggregate_data_per_lane(self): self.lanes_info=defaultdict(Info) for barcode_info in self.barcodes_info.values(): lane=barcode_info[ELEMENT_LANE] self.lanes_info[lane]+=barcode_info #add ELEMENT_PC_READ_IN_LANE to barcode info for barcode_info in self.barcodes_info.values(): nb_reads_lane = self.lanes_info[barcode_info[ELEMENT_LANE]][ELEMENT_NB_READS_PASS_FILTER] if nb_reads_lane: barcode_info[ELEMENT_PC_READ_IN_LANE]=float(barcode_info[ELEMENT_NB_READS_PASS_FILTER])/float(nb_reads_lane) else: barcode_info[ELEMENT_PC_READ_IN_LANE]='' #add ELEMENT_LANE_COEFF_VARIATION to lane_info nb_reads_per_lane = defaultdict(list) for barcode_info in self.barcodes_info.values(): if barcode_info[ELEMENT_BARCODE] != 'unknown' and barcode_info[ELEMENT_NB_READS_PASS_FILTER]: nb_reads_per_lane[barcode_info[ELEMENT_LANE]].append(barcode_info[ELEMENT_NB_READS_PASS_FILTER]) for lane in self.lanes_info: if nb_reads_per_lane.get(lane): self.lanes_info[lane][ELEMENT_LANE_COEFF_VARIATION] = variation(nb_reads_per_lane.get(lane)) #add ELEMENT_PC_READ_IN_LANE to barcode info for barcode_info in self.unexpected_barcode_info.values(): nb_reads_lane = self.lanes_info[barcode_info[ELEMENT_LANE]][ELEMENT_NB_READS_PASS_FILTER] if nb_reads_lane: barcode_info[ELEMENT_PC_READ_IN_LANE]=float(barcode_info[ELEMENT_NB_READS_PASS_FILTER])/float(nb_reads_lane) else: barcode_info[ELEMENT_PC_READ_IN_LANE]=''
def test_variation(self): """ variation = samplestd/mean """ ## y = stats.variation(self.shoes[0]) ## assert_approx_equal(y,21.8770668) y = stats.variation(self.testcase) assert_approx_equal(y,0.44721359549996, 10)
def MCV(data_df, N, columns, n = 4, S=1): data_array = np.array(data_df[columns]) mcv_array = data_array.copy() c = data_array.copy() c1 = data_array.copy() c1index = np.array([[i,i] for i in range(len(data_array))]) average = data_array.copy() broad = broadcasting_array(data_array, L=N+2, S=S) c[math.ceil(N/2):-math.ceil(N/2)]=np.array(variation(broad, axis = 1)) #luego se itera para reemplazar los primeros y últimos según los demás parámetros ''' N: posición en la que se identifica el transiente en real_time_disag el número de posiciones fijas debe ser mayor a en las que calcula el c1 index para que no se estanque el dataframe y pueda propagarse Esto es, c1 y c1index deben estar en otro ciclo ''' for i in range(0,len(data_df)): if (i < n) or (i > (len(data_df)- (math.ceil(N/2)) -1)): c[i] = np.fabs(data_df[columns].iloc[i]) + 10 #para asegurarse que estas posiciones nunca tengan el menor coeficiente de variación if (i < n) or (i >= (len(data_df)- (2*math.ceil(N/2)) -1)): #El límite superior es para asegurar propagación de la variable mcv_array[i] = data_df[columns].iloc[i] #v = data_df[columns].iloc[i] c1[i] = c[i] c1index[i] =i average[i] = data_df[columns].iloc[i] broad_c = broadcasting_array(c, L=N+2, S=S) c1[math.ceil(N/2):-math.ceil(N/2)] = np.min(broad_c, axis = 1) c1index[math.ceil(N/2):-math.ceil(N/2)] = -(math.ceil(N/2)) + np.argmin(broad_c, axis = 1) for i in range(N//2,len(data_df)-math.ceil(N/2)+1): c1index[i] = i + c1index[i] for i in range(0,n): #restaurar hasta n valores anteriores c[i] = np.fabs(data_df[columns].iloc[i]) + 10 #para asegurarse que estas posiciones nunca tengan el menos coeficiente de variación c1[i] = c[i] c1index[i] =i for i in range(len(data_df)- (math.ceil(N/2)) ,len(data_df)): c1[i] = c[i] c1index[i] =i broad_c1index = broadcasting_array(c1index, L=N+2, S=S) data_broadcast = [data_array[[int(broad_c1index[j][i][0]) for i in range(N+2)]] for j in range(len(broad_c1index))] average[math.ceil(N/2):-math.ceil(N/2)] = np.median(data_broadcast, axis = 1) for i in range(0, n): #restaurar hasta n valores anteriores average[i] = data_df[columns].iloc[i] for i in range(len(data_df)- (2*math.ceil(N/2)) -1,len(data_df)): average[i] = data_df[columns].iloc[i] df = pd.DataFrame(average, index= data_df.index, columns= columns) return df
def stats(values, return_list = False): """ Accepts a list of values (not events) and processes the data. Will return the stats in a dictionary. """ import numpy from numpy import mean, exp, median, std, log from scipy.stats import mode, variation, scoreatpercentile, skew, kurtosis stat_dict = {} n = float(len(values)) keys = ['amean','gmean','median','mode','stdev','cv','iqr','rcv','skew','kurt'] stat_dict[keys[0]] = mean(values) stat_dict[keys[1]] = exp(mean([log(value) for value in values])) stat_dict[keys[2]] = median(values) stat_dict[keys[3]] = int(mode(values)[0]) stat_dict[keys[4]] = std(values) stat_dict[keys[5]] = variation(values) stat_dict[keys[6]] = scoreatpercentile(values,per = 75) - scoreatpercentile(values,per = 25) stat_dict[keys[7]] = 0.75*stat_dict['iqr'] / stat_dict['median'] stat_dict[keys[8]] = skew(values) stat_dict[keys[9]] = kurtosis(values) if return_list: return [stat_dict[key] for key in keys] return stat_dict
def compute_profile(self): self.rec.label_contours(self.ji_intervals) distributions = {} for key, segments in self.rec.contour_labels.items(): distributions[key] = [] for indices in segments: distributions[key].extend(self.pitch_obj.pitch[indices[0]:indices[1]]) parameters = {} for interval, distribution in distributions.items(): distribution = np.array(distribution) #TODO: replace -10000 with whatever the bound is for invalid pitch values in cent scale distribution = distribution[distribution >= -10000] [n, be] = np.histogram(distribution, bins=1200) bc = (be[1:] + be[:-1])/2.0 peak_pos = bc[np.argmax(n)] peak_mean = float(np.mean(distribution)) peak_variance = float(variation(distribution)) peak_skew = float(skew(distribution)) peak_kurtosis = float(kurtosis(distribution)) pearson_skew = float(3.0 * (peak_mean - peak_pos) / np.sqrt(abs(peak_variance))) parameters[interval] = {"position": float(peak_pos), "mean": peak_mean, "amplitude": float(max(n)), "variance": peak_variance, "skew1": peak_skew, "skew2": pearson_skew, "kurtosis": peak_kurtosis} all_amps = [parameters[interval]["amplitude"] for interval in parameters.keys()] peak_amp_sum = sum(all_amps) for interval in parameters.keys(): parameters[interval]["amplitude"] = parameters[interval]["amplitude"]/peak_amp_sum self.intonation_profile = parameters
def smooth_curvatures(C, cvtarget, tolerance=10): """ Smoothes a curvature signal until the coefficient of variation of its differenced curvatures is within tolerance percent. """ from scipy.stats import variation smoothstep = int(len(C) / 100) window = smoothstep if window % 2 == 0: # Window must be odd window = window + 1 cv = 10000 while abs((cv - cvtarget) / cv * 100) > tolerance: Cs = signal.savgol_filter(C, window_length=window, polyorder=3, mode='interp') cv = variation(np.diff(Cs)) window = window + smoothstep if window % 2 == 0: # Window must be odd window = window + 1 if window > len(C) / 2: print('Could not find solution.') return Cs return Cs
def st_dev(): from statistics import pstdev, mean,median,mode, pvariance from scipy import stats from matplotlib import pyplot as plt entry=[] sizeplt=[] count=0 size =int(input("Enter number of days: ")) while count<size: val = int(input("Please enter {}. day cases: ".format(count+1))) entry.append(val) count += 1 print("Mean is: ",(mean(entry))) print("Variance is:", (pvariance(entry))) print("Standard Deviation is:", (pstdev(entry))) print("Mode is:", (mode(entry))) print("Coefficient of Variation:",stats.variation(entry)) print("Z Scores are:",stats.zscore(entry)) print("Median is:", (median(entry))) for z in range(1,len(entry)+1): sizeplt.append(z) plt.plot(sizeplt,entry) plt.title("Turkey Covid-19 Daily Date Information") plt.xlabel("Day") plt.ylabel("Cases") plt.show()
def doDBSCAN(data): #mykmeans = KMeans(n_clusters=2) myDBSCAN = DBSCAN(eps=stats.variation(data)*0.3, min_samples=len(data)*0.5).fit(data) n_clusters_ = len(set(myDBSCAN.labels_)) - (1 if -1 in myDBSCAN.labels_ else 0) print myDBSCAN.labels_ print sum(myDBSCAN.labels_) print "nclusters:", n_clusters_
def get_stock_COV(): tickers = load_tickers('sp500.csv') tickers.append('SPY') # used for comparison, SPDR market coefficient_of_variations = [] errors = [] for ticker in tickers: past_year_prices = [] url = ( "https://financialmodelingprep.com/api/v3/historical-price-full/" + ticker + "?timeseries=365") response = urlopen(url) data = response.read().decode("utf-8") json_data = json.loads(data) try: print("CALCULATING COV FOR: ", ticker) for day_price in json_data['historical']: past_year_prices.append(day_price['close']) except: print("ISSUE WITH: ", ticker) errors.append(ticker) continue coefficient_of_variations.append([ticker, variation(past_year_prices)]) print(errors) print(coefficient_of_variations) pickle.dump(coefficient_of_variations, open("COV.p", "wb"))
def _str_for_pred_stats(y_test, y_pred, spf, prec): """ Computes the predictive statistics for the given value arrays. It returns a string representation that corresponds to a table with field size `spf`. Args: y_test (ndarray, shape=(nvals,)): True values y_pred (ndarray, shape=(nvals,)): Predicted values spf (int): Space per table field. prec (int): Precision of floating point numbers. Returns: str: String representation for the statistics. """ bias = np.mean(y_pred - y_test) cv = variation(y_pred) mu = STD_FACT * np.sqrt(cv**2 + bias**2) rmse = np.sqrt(mean_squared_error(y_test, y_pred)) r2 = r2_score(y_test, y_pred) return f'{bias:^{spf}.{prec}f}|' \ f'{rmse:^{spf}.{prec}f}|' \ f'{mu:^{spf}.{prec}f}|' \ f'{r2:^{spf}.{prec}f}|'
def test_variation(self): """ variation = samplestd/mean """ ## y = stats.variation(self.shoes[0]) ## assert_approx_equal(y,21.8770668) y = stats.variation(self.testcase) assert_approx_equal(y, 0.44721359549996, 10)
def print_stats(datums): print 'Mean:', stats.tmean(datums) print 'Median:', stats.cmedian(datums) print 'Std Dev:', stats.tstd(datums) print 'Variation:', stats.variation(datums) print 'Kurtosis:', stats.kurtosis(datums, fisher=False) print 'Skewness:', stats.skew(datums)
def compute_coef_var(image, x_start, x_end, y_start, y_end): """ Compute coefficient of variation in a window of [x_start: x_end] and [y_start:y_end] within the image. """ assert x_start >= 0, 'ERROR: x_start must be >= 0.' assert y_start >= 0, 'ERROR: y_start must be >= 0.' x_size, y_size = image.shape x_overflow = x_end > x_size y_overflow = y_end > y_size assert not x_overflow, 'ERROR: invalid parameters cause x window overflow.' assert not y_overflow, 'ERROR: invalid parameters cause y window overflow.' window = image[x_start:x_end, y_start:y_end] coef_var = variation(window, None) if not coef_var: # dirty patch coef_var = COEF_VAR_DEFAULT # print "squared_coef was equal zero but replaced by %s" % coef_var assert coef_var > 0, 'ERROR: coeffient of variation cannot be zero.' return coef_var
def readFpkmData(dataName, delmited): with open(dataName) as csvfile: data=[] reader = csv.reader(csvfile, delimiter=delmited) for row in reader: data.append(row) sampleList=[] geneDict={} cvDict={} for j in range(1,len(data[1])): sampleList.append({}) for i in range(1,len(data)): tempDatalist=[] maxdata=0 for j in range(1,len(data[i])): currentDataPoint=float(data[i][j]) tempDatalist.append(currentDataPoint) maxdata=max(tempDatalist) cvDict[data[i][0]]=variation(tempDatalist) if maxdata==0: maxdata=1. geneDict[data[i][0]]=[itemer/maxdata for itemer in tempDatalist] for j in range(0,len(data[i])-1): sampleList[j][str.upper(data[i][0])]=float(data[i][1])/maxdata return sampleList, geneDict, cvDict
def assess_fold_performance(self) -> None: """Calculate Coefficient of Variation of model performance of clusters for each iteration. ==Precondition==: - store_cluster_results has been called. """ # Get Fold Mean Accuracy for Plotting [Regression vs. Classification] if self._inputs.model_goal == "regression": def regression_error(x): """Return regression error between prediction and label""" return np.sqrt(((x.predictions - x.labels)**2)) self._inputs.df_test[ "pred_performance"] = self._inputs.df_test.apply( regression_error, axis=1) else: self._inputs.df_test["pred_performance"] = ( self._inputs.df_test.predictions == self._inputs.df_test.labels ) self.mean_performance = self._inputs.df_test["pred_performance"].mean() # PREPROCESSING: Get CV of Cluster Accuracies cluster_performances_flattened = np.array([]) cv_accuracy = np.array([]) for arr in self.cluster_performances: cluster_performances_flattened = np.append( cluster_performances_flattened, arr) cv_accuracy = np.append(cv_accuracy, variation(arr)) self.cv_performance = cv_accuracy self._cluster_performance_flattened = cluster_performances_flattened
def bluevar(self): """x=bluevar(): returns the variation in brightness within the blue channel of an RGB image""" if self.threed==True: blue=self.image[:,:,2] flat = [x for sublist in blue for x in sublist] bluevar=variation(flat) return bluevar else: return float(0)
def read_energy(name): a = [] f = open(name, 'r') for l in f: s = l.strip() if(s[0] != 'f'): a += [float(s.split(';')[2])] var = stats.variation(a) return [var]*13
def barcode_variation_apply_fn(row, barcode_data, mapping): """ :py:meth:`pandas.DataFrame.apply` function for calculating the coefficient of variation for a variant's barcodes. """ bc_scores = barcode_data.ix[mapping.variants[row.name]]['score'] bc_scores = bc_scores[np.invert(np.isnan(bc_scores))] cv = stats.variation(bc_scores) return pd.Series({'scored.unique.barcodes' : len(bc_scores), \ 'barcode.cv' : cv})
def main(argv): ### Put the arguments in variables. traceFilename = '' try: opts, args = getopt.getopt(argv,"hf:",["tracefile="]) except getopt.GetoptError: print 'scriptGetInterarrivalsSummaryStatistics.py -f <tracefile>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'scriptGetInterarrivalsSummaryStatistics.py -f <tracefile>' sys.exit() elif opt in ("-f", "--tracefile"): traceFilename = arg if traceFilename == '': print 'ERROR: Must specify a trace filename.' print 'scriptGetInterarrivalsSummaryStatistics.py -f <tracefile>' sys.exit(1) ### Create the interarrival dictionary interarrival = {} file = open(traceFilename + "-IndiceLeido", "a") ### Open the file in read mode, and parse each line to obtain the timestamp, ### the command, and the id. with open (traceFilename, "r") as traceFile: reader = csv.reader(traceFile, delimiter=' ') for line in reader: file.write(line[0]+'\n') if line[2] in interarrival: with open(traceFilename + "-interarrivals/it-" + line[2] + ".txt", "a") as myfile: myfile.write(str((float(line[0])*1000)-interarrival[line[2]][0])+'\n') interarrival[line[2]] = [(float(line[0])*1000)] file.close() ### Calculate the summary statistics for each key with more than 2 interarrivals. ### print 'id mean median mid-range gmean hmedian std iqr range mad coeficiente_variacion skewness kurtosis' ### + str(stats.hmean(v[2:])) + ' ' \ print 'id mean median mid-range gmean std iqr range mad coeficiente_variacion skewness kurtosis' for k, t in interarrival.iteritems(): if os.path.isfile(traceFilename + '-interarrivals/it-' + k + '.txt'): v = [float(line) for line in open(traceFilename + '-interarrivals/it-' + k + '.txt')] if len(v) > 1: print k + ' ' + str(numpy.mean(v)) + ' ' \ + str(numpy.median(v)) + ' ' \ + str((numpy.max(v)-numpy.min(v))/2) + ' ' \ + str(stats.mstats.gmean(v)) + ' ' \ + str(numpy.std(v)) + ' ' \ + str(numpy.subtract(*numpy.percentile(v, [75, 25]))) + ' ' \ + str(numpy.ptp(v)) + ' ' \ + str(mad(v)) + ' ' \ + str(stats.variation(v)) + ' ' \ + str(stats.skew(v)) + ' ' \ + str(stats.kurtosis(v))
def according_coefficient_variation_delete(data, features): waiting_to_delete = np.array(load_result("complex_value_features.csv")) waiting_to_delete = waiting_to_delete.reshape((waiting_to_delete.size,)) #print(waiting_to_delete) indexs = get_known_features_index(features, waiting_to_delete) coefficient_variation_info = OrderedDict() for fea_pos in indexs: try: coefficient_variation_fea = stats.variation(data[:, fea_pos]) coefficient_variation_info[features[fea_pos]] = coefficient_variation_fea except: pass return coefficient_variation_info
def extract(self, instance): assert(isinstance(instance, Instance)) dndata = instance.eeg_data kurtosis = st.kurtosis(dndata, axis=1) skew = st.skew(dndata, axis=1) # coefficient of variation variation = st.variation(dndata, axis=1) # hstack will collapse all entries into one big vector features = np.hstack( (kurtosis, skew, variation) ) self.assert_features(features) # features = a 1d ndarray return features
def get_hmsi(spikes, hmsi_bin, file_path, title=""): intervals = np.diff( spikes, n=1 ) intervals = intervals[ np.abs(intervals - np.mean(intervals)) < 3*np.std(intervals) ] cv = stats.variation(intervals) # np.sqrt(np.std(intervals)) / np.mean(intervals) # numBins = 100 fig = plt.figure() ax = fig.add_subplot(111) hmsi, bins, _ = ax.hist(intervals, numBins, color='green', alpha=0.8, normed=True) ax.set_title("HMSI " + title) fig.savefig(file_path, dpi=500) plt.show(block=False) plt.close(fig) #hmsi, bins = np.histogram(intervals, numBins, density=True) return bins, hmsi, cv
def compute_statistics(self): price_expectations = dict((k, []) for k in range(0, self.kinds_of_capital)) for capital_firm in self.economy.capital_firms: if capital_firm.error_capital_price_expectation != None: self.errors_expectations_capital_price[capital_firm.capital_type].append(capital_firm.error_capital_price_expectation) price_expectations[capital_firm.capital_type].append(capital_firm.expected_price) for capital in xrange(self.kinds_of_capital): self.variation_price_expectation[capital] = stats.variation(price_expectations[capital]) for capital in xrange(self.kinds_of_capital): self.errors_expectations_capital_price_economy[capital] = np.mean(self.errors_expectations_capital_price[capital]) for goods_firm in self.economy.goods_firms: """ record the stock of different kinds of capital held by different capital firms in the economy""" for capital in xrange(self.kinds_of_capital): self.stock_capital[capital].append(goods_firm.capital_stock[capital]) for capital in xrange(self.kinds_of_capital): self.stock_capital_economy[capital] = np.sum(self.stock_capital[capital]) for capital in xrange(self.kinds_of_capital): self.mean_price_capital[capital] = np.mean(self.prices_capital[capital])
def weighting(window, cu=CU_DEFAULT): """ Computes the weighthing function for Kuan filter using cu as the noise coefficient. """ two_cu = cu * cu ci = variation(window, None) two_ci = ci * ci if not two_ci: # dirty patch to avoid zero division two_ci = COEF_VAR_DEFAULT divisor = 1.0 + two_cu if not divisor: divisor = 0.0001 if cu > ci: w_t = 0.0 else: w_t = (1.0 - (two_cu / two_ci)) / divisor return w_t
quit() means = [] COVs = [] FanoFactors = [] for key, value in dict.iteritems(): # print key, value bdf = pd.DataFrame(value) # print(bdf.shape) bdf1 = pd.DataFrame.transpose(bdf) df_sum = bdf1.sum(axis = 1) # print(bdf.shape, np.mean(df_sum), sp.variation(df_sum)) means.append(np.mean(df_sum)) COVs.append(sp.variation(df_sum)) FanoFactors.append((np.std(df_sum)**2)/np.mean(df_sum)) means_COV = np.column_stack((means, COVs, FanoFactors)) # print(means_COV) means_COV_df = pd.DataFrame(means_COV, index = dict.keys()) # means_COV_df['Barcodes', 'Experiments', 'States'] = [re.findall("\d+",key) for key in dict.keys()] means_COV_df['Barcodes'] = [int(re.findall("\d+",key)[0]) for key in dict.keys()] means_COV_df['Experiments'] = [int(re.findall("\d+",key)[1]) for key in dict.keys()] means_COV_df['States'] = [int(re.findall("\d+",key)[2]) for key in dict.keys()] # means_COV_df['Barcodes'] = [int(i[0]) for i in bd_nums] # means_COV_df['Experiments'] = [int(j[1]) for j in bd_nums] # means_COV_df['States'] = [int(k[2]) for k in bd_nums] means_COV_df.columns = ['Mean', 'COV', 'FanoFactor', 'Barcodes', 'Experiments', 'States'] # means_COV_df.sort_values(by='COV')
#for #print len(timeList) #print len(routerAverages) #plt.plot(timeList,routerAverages) varList = [] i = 1 variations = [] myTimeList = [] #timeList for average in routerAverages: varList.append(average) if i%(10)==0: variations.append(stats.variation(varList)) varList = [] myTimeList.append(i*100) i += 1 print variations #plt.xlabel('simTime') #plt.ylabel('router Average Util') #plt.show() plt.plot(myTimeList,variations) plt.xlabel('simTime Batches')
def parametrize_peaks(self, intervals, max_peakwidth=50, min_peakwidth=25, symmetric_bounds=True): """ Computes and stores the intonation profile of an audio recording. :param intervals: these will be the reference set of intervals to which peak positions correspond to. For each interval, the properties of corresponding peak, if exists, will be computed and stored as intonation profile. :param max_peakwidth: the maximum allowed width of the peak at the base for computing parameters of the distribution. :param min_peakwidth: the minimum allowed width of the peak at the base for computing parameters of the distribution. """ assert isinstance(self.pitch_obj.pitch, np.ndarray) valid_pitch = self.pitch_obj.pitch valid_pitch = [i for i in valid_pitch if i > -10000] valid_pitch = np.array(valid_pitch) parameters = {} for i in xrange(len(self.histogram.peaks["peaks"][0])): peak_pos = self.histogram.peaks["peaks"][0][i] #Set left and right bounds of the distribution. max_leftbound = peak_pos - max_peakwidth max_rightbound = peak_pos + max_peakwidth leftbound = max_leftbound rightbound = max_rightbound nearest_valleyindex = utils.find_nearest_index(self.histogram.peaks["valleys"][0], peak_pos) if peak_pos > self.histogram.peaks["valleys"][0][nearest_valleyindex]: leftbound = self.histogram.peaks["valleys"][0][nearest_valleyindex] if len(self.histogram.peaks["valleys"][0][nearest_valleyindex + 1:]) == 0: rightbound = peak_pos + max_peakwidth else: offset = nearest_valleyindex + 1 nearest_valleyindex = utils.find_nearest_index( self.histogram.peaks["valleys"][0][offset:], peak_pos) rightbound = self.histogram.peaks["valleys"][0][offset + nearest_valleyindex] else: rightbound = self.histogram.peaks["valleys"][0][nearest_valleyindex] if len(self.histogram.peaks["valleys"][0][:nearest_valleyindex]) == 0: leftbound = peak_pos - max_peakwidth else: nearest_valleyindex = utils.find_nearest_index( self.histogram.peaks["valleys"][0][:nearest_valleyindex], peak_pos) leftbound = self.histogram.peaks["valleys"][0][nearest_valleyindex] #In terms of x-axis, leftbound should be at least min_peakwidth # less than peak_pos, and at max max_peakwidth less than peak_pos, # and viceversa for the rightbound. if leftbound < max_leftbound: leftbound = max_leftbound elif leftbound > peak_pos - min_peakwidth: leftbound = peak_pos - min_peakwidth if rightbound > max_rightbound: rightbound = max_rightbound elif rightbound < peak_pos + min_peakwidth: rightbound = peak_pos + min_peakwidth #If symmetric bounds are asked for, then make the bounds symmetric if symmetric_bounds: if peak_pos - leftbound < rightbound - peak_pos: imbalance = (rightbound - peak_pos) - (peak_pos - leftbound) rightbound -= imbalance else: imbalance = (peak_pos - leftbound) - (rightbound - peak_pos) leftbound += imbalance #extract the distribution and estimate the parameters distribution = valid_pitch[valid_pitch >= leftbound] distribution = distribution[distribution <= rightbound] #print peak_pos, "\t", len(distribution), "\t", leftbound, "\t", rightbound interval_index = utils.find_nearest_index(intervals, peak_pos) interval = intervals[interval_index] _mean = float(np.mean(distribution)) _variance = float(variation(distribution)) _skew = float(skew(distribution)) _kurtosis = float(kurtosis(distribution)) pearson_skew = float(3.0 * (_mean - peak_pos) / np.sqrt(abs(_variance))) parameters[interval] = {"position": float(peak_pos), "mean": _mean, "amplitude": float(self.histogram.peaks["peaks"][1][i]), "variance": _variance, "skew1": _skew, "skew2": pearson_skew, "kurtosis": _kurtosis} self.intonation_profile = parameters
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", required=True, help="Tabular file.") parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;") parser.add_argument("--test_id", help="statistical test method") parser.add_argument( "--mwu_use_continuity", action="store_true", default=False, help="Whether a continuity correction (1/2.) should be taken into account.", ) parser.add_argument( "--equal_var", action="store_true", default=False, help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", ) parser.add_argument( "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values." ) parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used") parser.add_argument( "--bias", action="store_true", default=False, help="if false,then the calculations are corrected for statistical bias", ) parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored") parser.add_argument( "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored" ) parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored") parser.add_argument( "--printextras", action="store_true", default=False, help="If True, if there are extra points a warning is raised saying how many of those points there are", ) parser.add_argument( "--initial_lexsort", action="store_true", default="False", help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", ) parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ") parser.add_argument( "--axis", type=int, default=0, help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", ) parser.add_argument( "--n", type=int, default=0, help="the number of trials. This is ignored if x gives both the number of successes and failures", ) parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram") parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction") parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--m", type=float, default=0.0, help="limits") parser.add_argument("--mf", type=float, default=2.0, help="lower limit") parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") parser.add_argument( "--p", type=float, default=0.5, help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", ) parser.add_argument("--alpha", type=float, default=0.9, help="probability") parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds") parser.add_argument( "--proportiontocut", type=float, default=0.0, help="Proportion (in range 0-1) of total data set to trim of each end.", ) parser.add_argument( "--lambda_", type=float, default=1.0, help="lambda_ gives the power in the Cressie-Read power divergence statistic", ) parser.add_argument( "--imbda", type=float, default=0, help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", ) parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e") parser.add_argument("--dtype", help="dtype") parser.add_argument("--med", help="med") parser.add_argument("--cdf", help="cdf") parser.add_argument("--zero_method", help="zero_method options") parser.add_argument("--dist", help="dist options") parser.add_argument("--ties", help="ties options") parser.add_argument("--alternative", help="alternative options") parser.add_argument("--mode", help="mode options") parser.add_argument("--method", help="method options") parser.add_argument("--md", help="md options") parser.add_argument("--center", help="center options") parser.add_argument("--kind", help="kind options") parser.add_argument("--tail", help="tail options") parser.add_argument("--interpolation", help="interpolation options") parser.add_argument("--statistic", help="statistic options") args = parser.parse_args() infile = args.infile outfile = open(args.outfile, "w+") test_id = args.test_id nf = args.nf mf = args.mf imbda = args.imbda inclusive1 = args.inclusive1 inclusive2 = args.inclusive2 sample0 = 0 sample1 = 0 sample2 = 0 if args.sample_cols != None: sample0 = 1 barlett_samples = [] for sample in args.sample_cols.split(";"): barlett_samples.append(map(int, sample.split(","))) if args.sample_one_cols != None: sample1 = 1 sample_one_cols = args.sample_one_cols.split(",") if args.sample_two_cols != None: sample_two_cols = args.sample_two_cols.split(",") sample2 = 1 for line in open(infile): sample_one = [] sample_two = [] cols = line.strip().split("\t") if sample0 == 1: b_samples = columns_to_values(barlett_samples, line) if sample1 == 1: for index in sample_one_cols: sample_one.append(cols[int(index) - 1]) if sample2 == 1: for index in sample_two_cols: sample_two.append(cols[int(index) - 1]) if test_id.strip() == "describe": size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one)) cols.append(size) cols.append(min_max) cols.append(mean) cols.append(uv) cols.append(bs) cols.append(bk) elif test_id.strip() == "mode": vals, counts = stats.mode(map(float, sample_one)) cols.append(vals) cols.append(counts) elif test_id.strip() == "nanmean": m = stats.nanmean(map(float, sample_one)) cols.append(m) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "kurtosistest": z_value, p_value = stats.kurtosistest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "itemfreq": freq = stats.itemfreq(map(float, sample_one)) for list in freq: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "boxcox_llf": IIf = stats.boxcox_llf(imbda, map(float, sample_one)) cols.append(IIf) elif test_id.strip() == "tiecorrect": fa = stats.tiecorrect(map(float, sample_one)) cols.append(fa) elif test_id.strip() == "rankdata": r = stats.rankdata(map(float, sample_one), method=args.md) cols.append(r) elif test_id.strip() == "nanstd": s = stats.nanstd(map(float, sample_one), bias=args.bias) cols.append(s) elif test_id.strip() == "anderson": A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist) cols.append(A2) for list in critical: cols.append(list) cols.append(",") for list in sig: cols.append(list) elif test_id.strip() == "binom_test": p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p) cols.append(p_value) elif test_id.strip() == "gmean": gm = stats.gmean(map(float, sample_one), dtype=args.dtype) cols.append(gm) elif test_id.strip() == "hmean": hm = stats.hmean(map(float, sample_one), dtype=args.dtype) cols.append(hm) elif test_id.strip() == "kurtosis": k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias) cols.append(k) elif test_id.strip() == "moment": n_moment = stats.moment(map(float, sample_one), n=args.n) cols.append(n_moment) elif test_id.strip() == "normaltest": k2, p_value = stats.normaltest(map(float, sample_one)) cols.append(k2) cols.append(p_value) elif test_id.strip() == "skew": skewness = stats.skew(map(float, sample_one), bias=args.bias) cols.append(skewness) elif test_id.strip() == "skewtest": z_value, p_value = stats.skewtest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "sem": s = stats.sem(map(float, sample_one), ddof=args.ddof) cols.append(s) elif test_id.strip() == "zscore": z = stats.zscore(map(float, sample_one), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "signaltonoise": s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof) cols.append(s2n) elif test_id.strip() == "percentileofscore": p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind) cols.append(p) elif test_id.strip() == "bayes_mvs": c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha) cols.append(c_mean) cols.append(c_var) cols.append(c_std) elif test_id.strip() == "sigmaclip": c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n) cols.append(c) cols.append(c_low) cols.append(c_up) elif test_id.strip() == "kstest": d, p_value = stats.kstest( map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode ) cols.append(d) cols.append(p_value) elif test_id.strip() == "chi2_contingency": chi2, p, dof, ex = stats.chi2_contingency( map(float, sample_one), correction=args.correction, lambda_=args.lambda_ ) cols.append(chi2) cols.append(p) cols.append(dof) cols.append(ex) elif test_id.strip() == "tmean": if nf is 0 and mf is 0: mean = stats.tmean(map(float, sample_one)) else: mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(mean) elif test_id.strip() == "tmin": if mf is 0: min = stats.tmin(map(float, sample_one)) else: min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive) cols.append(min) elif test_id.strip() == "tmax": if nf is 0: max = stats.tmax(map(float, sample_one)) else: max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive) cols.append(max) elif test_id.strip() == "tvar": if nf is 0 and mf is 0: var = stats.tvar(map(float, sample_one)) else: var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(var) elif test_id.strip() == "tstd": if nf is 0 and mf is 0: std = stats.tstd(map(float, sample_one)) else: std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(std) elif test_id.strip() == "tsem": if nf is 0 and mf is 0: s = stats.tsem(map(float, sample_one)) else: s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(s) elif test_id.strip() == "scoreatpercentile": if nf is 0 and mf is 0: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation ) else: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation ) for list in s: cols.append(list) elif test_id.strip() == "relfreq": if nf is 0 and mf is 0: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b) else: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf)) for list in rel: cols.append(list) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "binned_statistic": if nf is 0 and mf is 0: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b ) else: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, range=(mf, nf), ) cols.append(st) cols.append(b_edge) cols.append(b_n) elif test_id.strip() == "threshold": if nf is 0 and mf is 0: o = stats.threshold(map(float, sample_one), newval=args.new) else: o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new) for list in o: cols.append(list) elif test_id.strip() == "trimboth": o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut) for list in o: cols.append(list) elif test_id.strip() == "trim1": t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail) for list in t1: cols.append(list) elif test_id.strip() == "histogram": if nf is 0 and mf is 0: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b) else: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf)) cols.append(hi) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "cumfreq": if nf is 0 and mf is 0: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b) else: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf)) cols.append(cum) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "boxcox_normmax": if nf is 0 and mf is 0: ma = stats.boxcox_normmax(map(float, sample_one)) else: ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method) cols.append(ma) elif test_id.strip() == "boxcox": if imbda is 0: box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha) cols.append(box) cols.append(ma) cols.append(ci) else: box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha) cols.append(box) elif test_id.strip() == "histogram2": h2 = stats.histogram2(map(float, sample_one), map(float, sample_two)) for list in h2: cols.append(list) elif test_id.strip() == "ranksums": z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two)) cols.append(z_statistic) cols.append(p_value) elif test_id.strip() == "ttest_1samp": t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) for list in t: cols.append(list) for list in prob: cols.append(list) elif test_id.strip() == "ansari": AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two)) cols.append(AB) cols.append(p_value) elif test_id.strip() == "linregress": slope, intercept, r_value, p_value, stderr = stats.linregress( map(float, sample_one), map(float, sample_two) ) cols.append(slope) cols.append(intercept) cols.append(r_value) cols.append(p_value) cols.append(stderr) elif test_id.strip() == "pearsonr": cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two)) cols.append(cor) cols.append(p_value) elif test_id.strip() == "pointbiserialr": r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two)) cols.append(r) cols.append(p_value) elif test_id.strip() == "ks_2samp": d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two)) cols.append(d) cols.append(p_value) elif test_id.strip() == "mannwhitneyu": mw_stats_u, p_value = stats.mannwhitneyu( map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "zmap": z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "ttest_ind": mw_stats_u, p_value = stats.ttest_ind( map(float, sample_one), map(float, sample_two), equal_var=args.equal_var ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "ttest_rel": t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(t) cols.append(prob) elif test_id.strip() == "mood": z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(z) cols.append(p_value) elif test_id.strip() == "shapiro": W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta) cols.append(W) cols.append(p_value) for list in a: cols.append(list) elif test_id.strip() == "kendalltau": k, p_value = stats.kendalltau( map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort ) cols.append(k) cols.append(p_value) elif test_id.strip() == "entropy": s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base) cols.append(s) elif test_id.strip() == "spearmanr": if sample2 == 1: rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two)) else: rho, p_value = stats.spearmanr(map(float, sample_one)) cols.append(rho) cols.append(p_value) elif test_id.strip() == "wilcoxon": if sample2 == 1: T, p_value = stats.wilcoxon( map(float, sample_one), map(float, sample_two), zero_method=args.zero_method, correction=args.correction, ) else: T, p_value = stats.wilcoxon( map(float, sample_one), zero_method=args.zero_method, correction=args.correction ) cols.append(T) cols.append(p_value) elif test_id.strip() == "chisquare": if sample2 == 1: rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof) else: rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof) cols.append(rho) cols.append(p_value) elif test_id.strip() == "power_divergence": if sample2 == 1: stat, p_value = stats.power_divergence( map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_ ) else: stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_) cols.append(stat) cols.append(p_value) elif test_id.strip() == "theilslopes": if sample2 == 1: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha) else: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha) cols.append(mpe) cols.append(met) cols.append(lo) cols.append(up) elif test_id.strip() == "combine_pvalues": if sample2 == 1: stat, p_value = stats.combine_pvalues( map(float, sample_one), method=args.med, weights=map(float, sample_two) ) else: stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med) cols.append(stat) cols.append(p_value) elif test_id.strip() == "obrientransform": ob = stats.obrientransform(*b_samples) for list in ob: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "f_oneway": f_value, p_value = stats.f_oneway(*b_samples) cols.append(f_value) cols.append(p_value) elif test_id.strip() == "kruskal": h, p_value = stats.kruskal(*b_samples) cols.append(h) cols.append(p_value) elif test_id.strip() == "friedmanchisquare": fr, p_value = stats.friedmanchisquare(*b_samples) cols.append(fr) cols.append(p_value) elif test_id.strip() == "fligner": xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(xsq) cols.append(p_value) elif test_id.strip() == "bartlett": T, p_value = stats.bartlett(*b_samples) cols.append(T) cols.append(p_value) elif test_id.strip() == "levene": w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(w) cols.append(p_value) elif test_id.strip() == "median_test": stat, p_value, m, table = stats.median_test( ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples ) cols.append(stat) cols.append(p_value) cols.append(m) cols.append(table) for list in table: elements = ",".join(map(str, list)) cols.append(elements) outfile.write("%s\n" % "\t".join(map(str, cols))) outfile.close()
def run_simulation(self): self.parameter_values() increment_exponents_ratio = 0.005 first_exponent_list = np.arange(0.01, 0.5, increment_exponents_ratio) increment_volatility = 0.0025 volatility_list = np.arange(0.01, 0.25, increment_volatility) with open('stochastic_variance.csv', 'wb') as stochastic_variance: for volatility in volatility_list: print 'volatility', volatility self.parameters.goods_demand_volatility = volatility self.assign_run_parameters() self.run.initialize() self.run.create_economy() self.run.run_forward_in_time() self.run.compute_run_statistics() last_ten_percent_time_steps = int(self.parameters.time_steps * 0.1) list_stock_ratio = self.run.capital_stock_ratio[-last_ten_percent_time_steps:] list_stock_ratio = np.array(list_stock_ratio) list_stock_ratio = list_stock_ratio[~np.isnan(list_stock_ratio)] stock_variance = stats.variation(list_stock_ratio) list_price_ratio = self.run.capital_price_ratio[-last_ten_percent_time_steps:] list_price_ratio = np.array(list_price_ratio) list_price_ratio = list_price_ratio[~np.isnan(list_price_ratio)] price_variance = stats.variation(list_price_ratio) writer = csv.writer(stochastic_variance, delimiter=',') writer.writerow([volatility] + [stock_variance] + [price_variance]) with open('stock_price.csv', 'wb') as data_stock_price: for first_exponent in first_exponent_list: print 'first_exponent', first_exponent self.parameters.goods_demand_volatility = 0 self.parameters.goods_firm_exponents = [first_exponent, 0.5] self.assign_run_parameters() self.run.initialize() self.run.create_economy() self.run.run_forward_in_time() self.run.compute_run_statistics() last_ten_percent_time_steps = int(self.parameters.time_steps * 0.1) list_stock_ratio = self.run.capital_stock_ratio[-last_ten_percent_time_steps:] list_stock_ratio = np.array(list_stock_ratio) list_stock_ratio = list_stock_ratio[~np.isnan(list_stock_ratio)] stock_ratio = np.mean(list_stock_ratio[-last_ten_percent_time_steps:]) list_price_ratio = self.run.capital_price_ratio[-last_ten_percent_time_steps:] list_price_ratio = np.array(list_price_ratio) list_price_ratio = list_price_ratio[~np.isnan(list_price_ratio)] price_ratio = np.mean(list_price_ratio[-last_ten_percent_time_steps:]) ratio_exponents = first_exponent / 0.5 writer = csv.writer(data_stock_price, delimiter=',') writer.writerow([ratio_exponents] + [stock_ratio] + [price_ratio]) with open('stochastic_stock_price.csv', 'wb') as stochastic_stock_price: for first_exponent in first_exponent_list: self.parameters.goods_demand_volatility = 0.1 print 'stochastic first_exponent', first_exponent self.parameters.goods_firm_exponents = [first_exponent, 0.5] self.assign_run_parameters() self.run.initialize() self.run.create_economy() self.run.run_forward_in_time() self.run.compute_run_statistics() last_ten_percent_time_steps = int(self.parameters.time_steps * 0.1) list_stock_ratio = self.run.capital_stock_ratio[-last_ten_percent_time_steps:] list_stock_ratio = np.array(list_stock_ratio) list_stock_ratio = list_stock_ratio[~np.isnan(list_stock_ratio)] stock_ratio = np.mean(list_stock_ratio[-last_ten_percent_time_steps:]) list_price_ratio = self.run.capital_price_ratio[-last_ten_percent_time_steps:] list_price_ratio = np.array(list_price_ratio) list_price_ratio = list_price_ratio[~np.isnan(list_price_ratio)] price_ratio = np.mean(list_price_ratio[-last_ten_percent_time_steps:]) ratio_exponents = first_exponent / 0.5 writer = csv.writer(stochastic_stock_price, delimiter=',') writer.writerow([ratio_exponents] + [stock_ratio] + [price_ratio])
def descstats(data, cols=None, axis=0): ''' Prints descriptive statistics for one or multiple variables. Parameters ------------ data: numpy array `x` is the data v: list, optional A list of the column number or field names (for a recarray) of variables. Default is all columns. axis: 1 or 0 axis order of data. Default is 0 for column-ordered data. Example ----------simple >>> decstats(data.exog,v=['x_1','x_2','x_3']) ''' x = np.array(data) # or rather, the data we're interested in if cols is None: # if isinstance(x, np.recarray): # cols = np.array(len(x.dtype.names)) if not isinstance(x, np.recarray) and x.ndim == 1: x = x[:,None] if x.shape[1] == 1: desc = ''' --------------------------------------------- Univariate Descriptive Statistics --------------------------------------------- Var. Name %(name)12s ---------- Obs. %(nobs)22i Range %(range)22s Sum of Wts. %(sum)22s Coeff. of Variation %(coeffvar)22.4g Mode %(mode)22.4g Skewness %(skewness)22.4g Repeats %(nmode)22i Kurtosis %(kurtosis)22.4g Mean %(mean)22.4g Uncorrected SS %(uss)22.4g Median %(median)22.4g Corrected SS %(ss)22.4g Variance %(variance)22.4g Sum Observations %(sobs)22.4g Std. Dev. %(stddev)22.4g ''' % {'name': cols, 'sum': 'N/A', 'nobs': len(x), 'mode': \ stats.mode(x)[0][0], 'nmode': stats.mode(x)[1][0], \ 'mean': x.mean(), 'median': np.median(x), 'range': \ '('+str(x.min())+', '+str(x.max())+')', 'variance': \ x.var(), 'stddev': x.std(), 'coeffvar': \ stats.variation(x), 'skewness': stats.skew(x), \ 'kurtosis': stats.kurtosis(x), 'uss': stats.ss(x),\ 'ss': stats.ss(x-x.mean()), 'sobs': np.sum(x)} # ''' % {'name': cols[0], 'sum': 'N/A', 'nobs': len(x[cols[0]]), 'mode': \ # stats.mode(x[cols[0]])[0][0], 'nmode': stats.mode(x[cols[0]])[1][0], \ # 'mean': x[cols[0]].mean(), 'median': np.median(x[cols[0]]), 'range': \ # '('+str(x[cols[0]].min())+', '+str(x[cols[0]].max())+')', 'variance': \ # x[cols[0]].var(), 'stddev': x[cols[0]].std(), 'coeffvar': \ # stats.variation(x[cols[0]]), 'skewness': stats.skew(x[cols[0]]), \ # 'kurtosis': stats.kurtosis(x[cols[0]]), 'uss': stats.ss(x[cols[0]]),\ # 'ss': stats.ss(x[cols[0]]-x[cols[0]].mean()), 'sobs': np.sum(x[cols[0]])} desc+= ''' Percentiles ------------- 1 %% %12.4g 5 %% %12.4g 10 %% %12.4g 25 %% %12.4g 50 %% %12.4g 75 %% %12.4g 90 %% %12.4g 95 %% %12.4g 99 %% %12.4g ''' % tuple([stats.scoreatpercentile(x,per) for per in (1,5,10,25, 50,75,90,95,99)]) t,p_t=stats.ttest_1samp(x,0) M,p_M=sign_test(x) S,p_S=stats.wilcoxon(np.squeeze(x)) desc+= ''' Tests of Location (H0: Mu0=0) ----------------------------- Test Statistic Two-tailed probability -----------------+----------------------------------------- Student's t | t %7.5f Pr > |t| <%.4f Sign | M %8.2f Pr >= |M| <%.4f Signed Rank | S %8.2f Pr >= |S| <%.4f ''' % (t,p_t,M,p_M,S,p_S) # Should this be part of a 'descstats' # in any event these should be split up, so that they can be called # individually and only returned together if someone calls summary # or something of the sort elif x.shape[1] > 1: desc =''' Var. Name | Obs. Mean Std. Dev. Range ------------+--------------------------------------------------------'''+\ os.linesep # for recarrays with columns passed as names # if isinstance(cols[0],str): # for var in cols: # desc += "%(name)15s %(obs)9i %(mean)12.4g %(stddev)12.4g \ #%(range)20s" % {'name': var, 'obs': len(x[var]), 'mean': x[var].mean(), # 'stddev': x[var].std(), 'range': '('+str(x[var].min())+', '\ # +str(x[var].max())+')'+os.linesep} # else: for var in range(x.shape[1]): desc += "%(name)15s %(obs)9i %(mean)12.4g %(stddev)12.4g \ %(range)20s" % {'name': var, 'obs': len(x[:,var]), 'mean': x[:,var].mean(), 'stddev': x[:,var].std(), 'range': '('+str(x[:,var].min())+', '+\ str(x[:,var].max())+')'+os.linesep} else: raise ValueError, "data not understood" return desc
def BasicSummary1(series): series_len = len(series) basiclist=[stats.skew(series), stats.skewtest(series)[1], stats.kurtosis(series),stats.kurtosistest(series)[1],stats.variation(series)] return np.round(pd.Series(basiclist),decimals=6)
parts = doHeading(1,Chapters[0], h1, parts) para = Paragraph(u"My Text that I can write here or take it from somewhere like shown in the next paragraph.", style["Normal"]) parts.append(para) parts = doHeading(1,Subchapters[1], h2,parts) title = u"my first data" para = Paragraph(Context[title], style["Normal"]) parts.append(para) text = {} text.update({"Standardabweichung":np.std(x)}) text.update({"Varianz":variation(x)}) text.update({"Schiefe":skew(x)}) text.update({"Kurtosis":kurtosis(x)}) print( Content[title] ) thisImage = plotHist(Content[title],title,subname="",spec="",show=False,text=text,Versuch=Chapters[0],path="",N=6) factor = doc.width/thisImage.drawWidth thisImage.drawHeight = thisImage.drawHeight * factor thisImage.drawWidth = thisImage.drawWidth * factor parts.append(thisImage) para = Paragraph(u"Fig. " + str(doc.figCount) + title, caption) parts.append(para)
def sharpe(x): v = stats.variation(x) if np.isinf(v): return 0 else: return v
def Convert2PdfReport(doc,parts,Daten,outpath,Fahrzeuge,Note=None,Carnames=None): #,outname, """ Plots Data and Graphics to a Portable Document File """ numbers = [int(re.findall(r'\d+', item)[0]) for item in Fahrzeuge] idxs = [numbers.index(x) for x in sorted(numbers)] Fahrzeuge = [Fahrzeuge[this] for this in idxs] print( "Fahrzeuge/Vergleiche:",Fahrzeuge ) #print "Phaenomene:",Phaenomene if not Note == None: vMin=np.min(Note.Note) vMax=np.max(Note.Note) if Note.typ == "abs": lowmid=3.5 midhigh=6.5 leftc="g" rightc="m" leftct=colors.limegreen rightct=colors.pink elif Note.typ == "rel": lowmid=-1.5 midhigh=1.5 leftc="m" rightc="g" leftct=colors.pink rightct=colors.limegreen for i,Fahrzeug in enumerate(Fahrzeuge): print( Daten.viewkeys() ) Vergleich = Daten[Fahrzeug] #,Meinungen title = u"Auswertung für " + str(Fahrzeug) if not Carnames == None: htitle = title + " " + str(Carnames[Fahrzeug]) Means = []#() Stds = []#() Data = [] celldata = [["" for k in range(7+2)] for m in range(7+1)] #celldata[0:-1][0] = [u"Heben",u"Nicken",u"Wanken",u"Werfen",u"(Mikro-) Stuckern",u"Stößigkeit"] #Phe #celldata[0][0:-1] = ["","Mittelwert","Standardabweichung","Minimum","Maximum","Stichprobenmenge"] parts = doHeading(Fahrzeuge[i],htitle, h1, parts) parts = doHeading(Fahrzeuge[i],u"Fahrzeug Übersicht", h2,parts) Phaenomene = [] for j,key in enumerate(Vergleich.iterkeys()): Phaenomene.append(key) Phaenomen = Vergleich[key] #Means = Means.__add__( (Phaenomen.mean,) ) # Stds = Stds.__add__( (Phaenomen.std,) ) Means.append(Phaenomen.mean) Stds.append(Phaenomen.std/2.) Data.append(Phaenomen) try: #print Phe[j] #print celldata try: celldata[j+1][0] = unicode(Phaenomene[j]) except IndexError: print( "Error:" ) #print celldata, Phaenomene[j] if not Phaenomen.len == 0: try: celldata[0][1] = "Mittelwert" celldata[j+1][1] = '%1.3f' % (Phaenomen.mean) celldata[0][2] = "Standardabweichung" celldata[j+1][2] = '%1.3f' % (Phaenomen.std) celldata[0][3] = "Minimum" celldata[j+1][3] = Phaenomen.min celldata[0][4] = "Maximum" celldata[j+1][4] = Phaenomen.max celldata[0][5] = "Stichprobenmenge" celldata[j+1][5] = Phaenomen.len except: pass else: para = Paragraph(u"Zu "+unicode(Phaenomene[j])+u": Keine Auswertung Möglich,", style["Normal"]) parts.append(para) para = Paragraph("Anzahl Vergebener Noten:" + str(Phaenomen.len), style["Normal"]) parts.append(para) except LayoutError: print( "Layout error detected, could not create the Document Template" ) #thisDrawing = barChart(Means,title+"Mittelwerte",Phaenomene,path=outpath,vMin=-4,vMax=4) thisDrawing = barHorizontal(Means[::-1],title+"Mittelwerte",Phaenomene[::-1],Stds[::-1],path=outpath,vMin=vMin,vMax=vMax,lowmid=lowmid,midhigh=midhigh,leftc=leftc,rightc=rightc) # relative: factor = (doc.width*0.85)/thisDrawing.drawWidth thisDrawing.drawHeight = thisDrawing.drawHeight * factor thisDrawing.drawWidth = thisDrawing.drawWidth * factor parts.append(thisDrawing) para = Paragraph(u"Mittelwerte der Phänomene mit Standardabweichung", caption) parts.append(para) parts.append(Spacer(1, 12)) mystyle=[ ('LINEABOVE',(0,0),(-1,0),1,colors.blue), ('LINEABOVE',(0,1),(-1,1),1,colors.blue), ('LINEBEFORE',(1,1),(1,-1),1,colors.pink), ('LINEBELOW',(0,-1),(-1,-1),1,colors.blue),] for l,key in enumerate(Vergleich.iterkeys()): value = Vergleich[key].mean if ( value >= vMin and value < lowmid ): mystyle.append(('BACKGROUND',(1,l+1),(1,l+1),leftct)) elif ( value >= lowmid and value < midhigh ): mystyle.append(('BACKGROUND',(1,l+1),(1,l+1),colors.khaki)) elif ( value >= midhigh and value <= vMax ): mystyle.append(('BACKGROUND',(1,l+1),(1,l+1), rightct)) else: pass t=Table(celldata, style=mystyle) #colors.brown parts.append(t) parts.append(Spacer(1, 12)) parts.append(PageBreak()) parts = doHeading(Fahrzeuge[i],u"Histogramme der Phänomene", h2,parts) for m,data in enumerate(Data): if not data.len == 0: text = {} text.update({"Standardabweichung":data.std}) text.update({"Varianz":variation(data.Event)}) text.update({"Schiefe":skew(data.Event)}) text.update({"Kurtosis":kurtosis(data.Event)}) thisImage = plotHist(data.Event,Phaenomene[m],show=False,text=text,Versuch=title,path=outpath,N=Note.Note,Min=vMin,Max=vMax) #except: # continue factor = (doc.width*0.85)/thisImage.drawWidth thisImage.drawHeight = thisImage.drawHeight * factor thisImage.drawWidth = thisImage.drawWidth * factor parts = doHeading(Fahrzeuge[i],u"Phänomen " + unicode(Phaenomene[m]), h3,parts) #para = Paragraph(u"Phänomen " + str(Phe[idxs[m]]), style["Heading3"]) #parts.append(para) parts.append(thisImage) parts.append(PageBreak()) parts = doHeading(Fahrzeuge[i],u"Verbale Bemerkungen", h2,parts) for o,Phaenomen in enumerate(Phaenomene): if not len(Vergleich[Phaenomen].Text) == 0: parts = doHeading(Fahrzeuge[i],u"Probandenmeinung " + unicode(Phaenomen) , h3,parts) #print Phaenomene[o], Meinungen[o] para = Paragraph(Vergleich[Phaenomen].Text, style["Normal"]) parts.append(para) parts.append(PageBreak()) plt.close('all') try: return parts #doc.build(parts) #doc.multiBuild(parts) except LayoutError: return LayoutError("there is an error with the Layout")
def generateFeatures(all_radar_features): features = []; ####################Take aggregate statistics based on the radar quality index features.append(len(all_radar_features)) # No of Radars; for i in range(0, len(all_radar_features[0])): if i in [0, 1, 2, 3, 4, 12]: # Time based observations observations = [r[i] for r in all_radar_features]; features.append(round(np.mean(observations), 2)); features.append(round(np.std(observations), 2)); elif i == 7: #HydrometeorType #For each of the HydrometeorType, compute the weighted mean of the counts HydrometeorType = dict(); for r in all_radar_features: for k in r[i].keys(): if float(k) in HydrometeorType: HydrometeorType[float(k)] += (r[12] * r[i][k]) else: HydrometeorType[float(k)] = (r[12] * r[i][k]) for hm in range(0, 15): if float(hm) in HydrometeorType.keys(): features.append(HydrometeorType[hm]); else: features.append(0); #Add the most frequent HydrometeorType HydrometeorType = dict(); for r in all_radar_features: for k in r[i].keys(): if float(k) in HydrometeorType: HydrometeorType[float(k)] += (r[i][k]) else: HydrometeorType[float(k)] = (r[i][k]) most_frequent_meteor = sorted(HydrometeorType.items(), key=operator.itemgetter(1)) if most_frequent_meteor: features.append(most_frequent_meteor[0][0]); else: features.append("NaN"); else: #Only compute the stats of radar values which aren't missing observations = [r[12] * float(r[i]) for r in all_radar_features if r[i] != "NaN"]; if len(observations) > 0: features.append(round(np.mean(observations), 2)); features.append(round(np.std(observations), 2)); features.append(round(np.median(observations), 2)); if np.mean(observations) > 0: features.append(round(variation(observations), 2)); #Coefficient of variations else: features.append("NaN"); #Coefficient of variations else: features.append("NaN"); features.append("NaN"); features.append("NaN"); features.append("NaN"); return features;