Python variation 예제들, scipy.stats.variation Python 예제들

예제 #1

0

파일 보기

파일: test_mstats_basic.py 프로젝트: andycasey/scipy

 def test_variation(self):
     for n in self.get_n():
         x, y, xm, ym = self.generate_xy_sample(n)
         assert_almost_equal(stats.variation(x), stats.mstats.variation(xm),
                             decimal=12)
         assert_almost_equal(stats.variation(y), stats.mstats.variation(ym),
                             decimal=12)

예제 #2

0

파일 보기

파일: metadata_main.py 프로젝트: admond1994/cellular-oscillation

def collect_cv_Alternative(spike_data):
    '''
    Input => Spike data for each cell
            [[spike_amp, spike_peak, spike_right_edge]...]]
            
    Output => Appended list of CV for all the cells
    '''
    a = spike_data
    amp_collect = []
    cv_collect = []

    for i in range(len(a)):
        if i > 0:  #Collect the previous list of amplitudes and calculate CV
            if amp_collect == []:  #the cell has empty array, no amplitudes are collected
                cv_collect.append(0)
            else:
                cv_collect.append(stats.variation(amp_collect))  #Calculate CV
                amp_collect = [
                ]  #Collect the next cell amplitudes with empty list
        if len(a[i]) != 0:
            for j in range(len(a[i])):  #Specified cell
                if a[i][j] != []:
                    for k in range(len(a[i][j])):
                        if a[i][j][k] != []:
                            for l in range(len(a[i][j][k])):
                                if l == 0:
                                    amp_collect.append(
                                        a[i][j][k]
                                        [l])  #Append the list with amplitudes
                                    if i == 24 and j == 4:
                                        cv_collect.append(
                                            stats.variation(amp_collect))

    return cv_collect

예제 #3

0

파일 보기

파일: nasa.py 프로젝트: jonlym/py_box

 def fit_CpoR(self, T, CpoR):
     """
     Fits parameters a1 - a6 using dimensionless heat capacity and temperature.
     Parameters
     ----------
         T - (N,) ndarray
             Temperatures (K) to fit the polynomial
         CpoR - (N,) ndarray
             Dimensionless heat capacities that correspond to T array
     """
     #If the Cp/R does not vary with temperature (occurs when no vibrational frequencies are listed)
     if (np.mean(CpoR) < 1e-6 and np.isnan(
             variation(CpoR))) or variation(CpoR) < 1e-3 or all(
                 np.isnan(CpoR)):
         self.T_mid = T[int(len(T) / 2)]
         self.a_low = np.array(7 * [0.])
         self.a_high = np.array(7 * [0.])
     else:
         max_R2 = -1
         R2 = np.zeros(len(T))
         for i, T_mid in enumerate(T):
             #Need at least 5 points to fit the polynomial
             if i > 5 and i < (len(T) - 6):
                 #Separate the temperature and heat capacities into low and high range
                 (R2[i], a_low, a_high) = self._get_CpoR_R2(T, CpoR, i)
         max_R2 = max(R2)
         max_i = np.where(max_R2 == R2)[0][0]
         (max_R2, a_low_rev, a_high_rev) = self._get_CpoR_R2(T, CpoR, max_i)
         empty_arr = np.array([0.] * 2)
         self.T_mid = T[max_i]
         self.a_low = np.concatenate((a_low_rev[::-1], empty_arr))
         self.a_high = np.concatenate((a_high_rev[::-1], empty_arr))

예제 #4

0

파일 보기

파일: shomate.py 프로젝트: alongd/pMuTT

def _fit_CpoR(T, CpoR, units):
    """Fit a[0]-a[4] coefficients given the dimensionless heat capacity data

    Parameters
    ----------
        T : (N,) `numpy.ndarray`_
            Temperatures in K
        CpoR : (N,) `numpy.ndarray`_
            Dimensionless heat capacity
        units : str
            Units corresponding to Shomate polynomial. Units should be supported
            by :class:`~pmutt.constants.R`.
    Returns
    -------
        a : (8,) `numpy.ndarray`_
            Lower coefficients of Shomate polynomial

    .. _`numpy.ndarray`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html
    """
    # If the Cp/R does not vary with temperature (occurs when no
    # vibrational frequencies are listed), return default values
    if (np.isclose(np.mean(CpoR), 0.) and np.isnan(variation(CpoR))) \
       or np.isclose(variation(CpoR), 0.) \
       or any([np.isnan(x) for x in CpoR]):
        return np.zeros(7)
    else:
        # Pass the unit set
        adj_shomate_CpoR = lambda T, A, B, C, D, E: _shomate_CpoR(
            T=T, A=A, B=B, C=C, D=D, E=E, units=units)
        [a, _] = curve_fit(adj_shomate_CpoR, T, np.array(CpoR))
        a = np.append(a, [0., 0., 0.])
        return a

예제 #5

0

파일 보기

파일: SPAEF_metric.py 프로젝트: cuneyd/spaef

def SPAEF(s, o):
    #remove NANs
    s, o = filter_nan(s, o)

    bins = np.around(math.sqrt(len(o)), 0)
    #compute corr coeff
    alpha = np.corrcoef(s, o)[0, 1]
    #compute ratio of CV
    beta = variation(s) / variation(o)
    #compute zscore mean=0, std=1
    o = zscore(o)
    s = zscore(s)
    #compute histograms
    hobs, binobs = np.histogram(o, bins)
    hsim, binsim = np.histogram(s, bins)
    #convert int to float, critical conversion for the result
    hobs = np.float64(hobs)
    hsim = np.float64(hsim)
    #find the overlapping of two histogram
    minima = np.minimum(hsim, hobs)
    #compute the fraction of intersection area to the observed histogram area, hist intersection/overlap index
    gamma = np.sum(minima) / np.sum(hobs)
    #compute SPAEF finally with three vital components
    spaef = 1 - np.sqrt((alpha - 1)**2 + (beta - 1)**2 + (gamma - 1)**2)

    return spaef, alpha, beta, gamma

예제 #6

0

파일 보기

파일: sampling.py 프로젝트: CV-IP/opensurfaces

    def point_filter(p):
        # not near any existing points
        for q in existing_points:
            if (q[0] - p[0]) ** 2 + (q[1] - p[1]) ** 2 < r_sq:
                return False

        c, r = int(p[0]), int(p[1])
        # cannot be near the edge of the image
        if (c < side_thresh or r < side_thresh or
                cols - c < side_thresh or rows - r < side_thresh):
            return False
        # cannot be super bright or dark
        if not (0.02 < np.mean(image[r, c, :]) < 0.98):
            return False
        # cannot be on an edge
        r0, r1 = max(r - edge_window, 0), min(r + edge_window + 1, rows)
        c0, c1 = max(c - edge_window, 0), min(c + edge_window + 1, cols)
        if np.any(image_canny[r0:r1, c0:c1]):
            return False
        # chromaticity coefficient of variation cannot be too high
        # (cv = std / mean)
        chroma_cv = 0.5 * (
            variation(image_lab[r0:r1, c0:c1, 1], axis=None) +
            variation(image_lab[r0:r1, c0:c1, 2], axis=None)
        )
        return chroma_cv < 0.50

예제 #7

0

파일 보기

파일: read_data_sequence_2out.py 프로젝트: prajwal210798/Deep-autoregressive-neural-networks-for-dynamical-solute-transport-models

def read_output(ndata, ntimes, Nt, ngx, ngy):
    y = np.full( (ndata,ntimes,ngx,ngy), 0.0)
    for i in range(1, ndata + 1):
        for j in range(1, ntimes + 1):
            y[i-1, j-1, :, :] = np.loadtxt("output/conc_{}_t_{}.dat".format(i,j))

    y = np.where(y>0.0,y,0.0)
    y0 = y[:,:Nt]
    # id0 = y0.nonzero()
    # y0 = y0[id0]

    y1 = y[:,Nt:]
    # id1 = y1.nonzero()
    # y1 = y1[id1]

    # y0_mean = np.average(y0)
    # y1_mean = np.average(y1)
    y_cov0 = variation(y0,axis=None)
    y_cov1 = variation(y1,axis=None)
    # print("cov0: {}".format(y_cov0))
    # print("cov1: {}".format(y_cov1))
    weight = y_cov0 / y_cov1
    weight = 5
    print("weight:{}".format(weight))
    with open("weight.txt", "w") as text_file:
        text_file.write("%f" % weight)
    return weight

예제 #8

0

파일 보기

파일: shomate.py 프로젝트: himaghna/pMuTT

def _fit_CpoR(T, CpoR):
    """Fit a[0]-a[4] coefficients given the dimensionless heat capacity data

    Parameters
    ----------
        T : (N,) `numpy.ndarray`_
            Temperatures in K
        CpoR : (N,) `numpy.ndarray`_
            Dimensionless heat capacity
    Returns
    -------
        a : (8,) `numpy.ndarray`_
            Lower coefficients of Shomate polynomial

    .. _`numpy.ndarray`: https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.ndarray.html
    """
    # If the Cp/R does not vary with temperature (occurs when no
    # vibrational frequencies are listed), return default values
    if (np.isclose(np.mean(CpoR), 0.) and np.isnan(variation(CpoR))) \
       or np.isclose(variation(CpoR), 0.) \
       or any([np.isnan(x) for x in CpoR]):
        return np.zeros(7)
    else:
        [a, _] = curve_fit(_shomate_CpoR, T, np.array(CpoR))
        a = np.append(a, [0., 0., 0.])
        return a

예제 #9

0

파일 보기

파일: test_mstats_basic.py 프로젝트: psoll001/phys177

 def test_variation(self):
     for n in self.get_n():
         x, y, xm, ym = self.generate_xy_sample(n)
         assert_almost_equal(stats.variation(x), stats.mstats.variation(xm),
                             decimal=12)
         assert_almost_equal(stats.variation(y), stats.mstats.variation(ym),
                             decimal=12)

예제 #10

0

파일 보기

def get_score(a, maxvar):
    a1 = [float(x) for x in a[1].split(";")]
    a2 = [float(x) for x in a[2].split(";")]
    var1, var2, var3 = variation(a1), variation(a2), variation([sum(a1), sum(a2)])
    varscore = get_varscore(var1, var2, var3, maxvar)
    change = abs(float(a[3]))
    expression = (sum(a1) + sum(a2))/(len(a1) + len(a2))
    
    return (change**2)*expression*varscore

예제 #11

0

파일 보기

    def test_neg_inf(self):
        # Edge case that produces -inf: ddof equals the number of non-nan
        # values, the values are not constant, and the mean is negative.
        x1 = np.array([-3, -5])
        assert_equal(variation(x1, ddof=2), -np.inf)

        x2 = np.array([[np.nan, 1, -10, np.nan], [-20, -3, np.nan, np.nan]])
        assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'),
                     [-np.inf, -np.inf])

예제 #12

0

파일 보기

 def test_variation_ddof(self):
     # test variation with delta degrees of freedom
     # regression test for gh-13341
     a = np.array([1, 2, 3, 4, 5])
     nan_a = np.array([1, 2, 3, np.nan, 4, 5, np.nan])
     y = variation(a, ddof=1)
     nan_y = variation(nan_a, nan_policy="omit", ddof=1)
     assert_allclose(y, np.sqrt(5 / 2) / 3)
     assert y == nan_y

예제 #13

0

파일 보기

    def test_mean_zero(self):
        # Check that `variation` returns inf for a sequence that is not
        # identically zero but whose mean is zero.
        x = np.array([10, -3, 1, -4, -4])
        y = variation(x)
        assert_equal(y, np.inf)

        x2 = np.array([x, -10 * x])
        y2 = variation(x2, axis=1)
        assert_equal(y2, [np.inf, np.inf])

예제 #14

0

파일 보기

파일: otherFunctions.py 프로젝트: Sunjjjjjj/Satellite-functions

def SPAEF(data1, data2, bins):
    A = np.corrcoef(data1, data2)[0, 1]
    B = variation(data1) / variation(data2)
    data1, data2 = zscore(data1), zscore(data2)
    h1, _ = np.histogram(data1, bins)
    h2, _ = np.histogram(data2, bins)
    h1, h2 = np.float64(h1), np.float64(h2)
    minima = np.minimum(h1, h2)
    
    C = np.sum(minima) / np.sum(h1)
    return 1 - np.sqrt((1 - A)**2 + (1 - B)**2 + (1 - C)**2)

예제 #15

0

파일 보기

파일: run.py 프로젝트: VipinVeetil/cantillon_effect

	def record_statistics(self):
		""" record data as system runs forward in time """
		wealth = []
		prices = []
		for firm in self.economy.firms_list:
			wealth.append(firm.wealth)
			prices.append(firm.price)
		self.wealth.append(sum(wealth))
		self.wealth_cv.append(stats.variation(wealth))
		self.prices_mean.append(np.mean(prices))
		self.prices_cv.append(stats.variation(prices))

예제 #16

0

파일 보기

파일: extract_statistics.py 프로젝트: cedricoeldorf/Analysis_IEEG

def curvature_period_features(
    AMSD, vertices, signal_smooth, peaks, valleys
):  # mean and S.D. coefficient of variation of curvatures at vertices
    """
			mean of curvatures (d2x/dt2) at vertices
			S.D. of curvatures at vertices
			coefficient of variation of curvatures at vertices
			vertex counts/sec
			S.D. of vertex-to-vertex period
			coefficient of variation of vertex-to-vertex period
			count of mean crossings/sec (hysteresis = 25% of AMSD)
	"""
    dx_dt = np.gradient(peaks)
    dy_dt = np.gradient(signal_smooth[peaks])
    d2x_dt2 = np.gradient(dx_dt)
    d2y_dt2 = np.gradient(dy_dt)
    peak_curvature = np.abs(d2x_dt2 * dy_dt - dx_dt * d2y_dt2) / (
        dx_dt * dx_dt + dy_dt * dy_dt)**1.5

    dx_dt = np.gradient(valleys)
    dy_dt = np.gradient(signal_smooth[valleys])
    d2x_dt2 = np.gradient(dx_dt)
    d2y_dt2 = np.gradient(dy_dt)
    valley_curvature = np.abs(d2x_dt2 * dy_dt - dx_dt * d2y_dt2) / (
        dx_dt * dx_dt + dy_dt * dy_dt)**1.5

    mean_curv_pos_over_mean_curv_neg = peak_curvature.mean(
    ) / valley_curvature.mean()

    dx_dt = np.gradient(vertices)
    dy_dt = np.gradient(signal_smooth[vertices])
    d2x_dt2 = np.gradient(dx_dt)
    d2y_dt2 = np.gradient(dy_dt)
    curvature = np.abs(d2x_dt2 * dy_dt -
                       dx_dt * d2y_dt2) / (dx_dt * dx_dt + dy_dt * dy_dt)**1.5
    seconds = len(signal_smooth
                  ) / 2000.0  # 2000hz sample rate and signal is 4400 samples
    vertices_per_second = len(signal_smooth[vertices] / seconds)
    selected_period = np.array(vertices[::2])
    vertices_period = np.subtract(selected_period[1::], selected_period[:-1:])
    hysteresis = abs(0.25 * AMSD)
    mean = signal_smooth.mean()
    shifted_signal = signal_smooth - mean
    hysterisized_signal = hyst(shifted_signal, -hysteresis, hysteresis)
    zero_crossing_count = (np.diff(hysterisized_signal) != 0).sum()
    CTMXMN = zero_crossing_count / seconds

    return np.array([
        curvature.mean(),
        curvature.std(),
        variation(curvature), vertices_per_second,
        vertices_period.std(),
        variation(vertices_period), CTMXMN, mean_curv_pos_over_mean_curv_neg
    ])

예제 #17

0

파일 보기

def get_variation(img):
    path = images_path + img
    img = cv2.imread(path)
    hist_b = cv2.calcHist([img], [0], None, [256], [0, 256])
    hist_g = cv2.calcHist([img], [1], None, [256], [0, 256])
    hist_r = cv2.calcHist([img], [2], None, [256], [0, 256])

    var_b = variation(hist_b)
    var_g = variation(hist_g)
    var_r = variation(hist_r)

    return [var_r[0], var_g[0], var_b[0]]

예제 #18

0

파일 보기

파일: metric.py 프로젝트: jiyingsheng/aco

def cal_vc(data):
    """ Calculate variation coefficient 
    Args:
       data: dataset
    Returns:
       variation coefficient score 
    """
    from scipy import stats
    #
    mats = np.zeros((1,len(data)))
    mats[0,1] = sum(data)
    max_v = stats.variation(mats.ravel())
    min_v = 0
    rst =  (stats.variation(data) - min_v) / (max_v - min_v)
    return rst

예제 #19

0

파일 보기

파일: very_naive_audio_speech_detection.py 프로젝트: voletiv/movie-translation-experiments

def rateSampleByVariation(chunks):
    """
    Rates an audio sample using the coefficient of variation of its short-term
    energy.
    """
    energy = [shortTermEnergy(chunk) for chunk in chunks]
    return stats.variation(energy)

예제 #20

0

파일 보기

 def test_propagate_nan(self):
     # Check that the shape of the result is the same for inputs
     # with and without nans, cf gh-5817
     a = np.arange(8).reshape(2, -1).astype(float)
     a[1, 0] = np.nan
     v = variation(a, axis=1, nan_policy="propagate")
     assert_allclose(v, [np.sqrt(5 / 4) / 1.5, np.nan], atol=1e-15)

예제 #21

0

파일 보기

def compute_coef_var(image, x_start, x_end, y_start, y_end):
    """
    Compute coefficient of variation in a window of [x_start: x_end] and
    [y_start:y_end] within the image.
    """
    if x_start < 0: raise Exception('ERROR: x_start must be >= 0.')
    if y_start < 0: raise Exception('ERROR: y_start must be >= 0.')

    x_size, y_size = image.shape
    x_overflow = x_end > x_size
    y_overflow = y_end > y_size

    if x_overflow:
        raise Exception('ERROR: invalid parameters cause x window overflow.')
    if y_overflow:
        raise Exception('ERROR: invalid parameters cause y window overflow.')

    window = image[x_start:x_end, y_start:y_end]

    coef_var = variation(window, None)

    if not coef_var:  # dirty patch
        coef_var = 0.01
        # print "squared_coef was equal zero but replaced by %s" % coef_var
    if coef_var <= 0:
        raise Exception('ERROR: coeffient of variation cannot be zero.')

    return coef_var

예제 #22

0

파일 보기

파일: exploratory_statistics_class.py 프로젝트: kurhula/Scraping-Project

 def get_statistics(self, vec):
     expStats = {
         # "position": {
         "N": len(vec),
         "mean": np.mean(vec),
         "median": np.median(vec),
         "q1": np.percentile(vec, 25),
         "q3": np.percentile(vec, 75),
         # },
         # "spread": {
         "range": np.ptp(vec),
         "variance": np.var(vec),
         "std": np.std(vec),
         "iqr": stats.iqr(vec),
         "mad": stats.median_absolute_deviation(vec),
         "cv": stats.variation(vec),
         "mad/median": stats.median_absolute_deviation(vec)/np.median(vec),
         "iqr/median": stats.iqr(vec)/np.median(vec),
         # },
         # "distribution": {
         # "log_histogram": np.histogram(np.log(vec))
         # }
     }
     for stat in expStats:
         expStats[stat] = round(expStats[stat], 2)
     return expStats

예제 #23

0

파일 보기

파일: demultiplexing_report.py 프로젝트: EdinburghGenomics/report_generation

    def _aggregate_data_per_lane(self):
        self.lanes_info=defaultdict(Info)
        for barcode_info in self.barcodes_info.values():
            lane=barcode_info[ELEMENT_LANE]
            self.lanes_info[lane]+=barcode_info
        #add ELEMENT_PC_READ_IN_LANE to barcode info
        for barcode_info in self.barcodes_info.values():
            nb_reads_lane = self.lanes_info[barcode_info[ELEMENT_LANE]][ELEMENT_NB_READS_PASS_FILTER]
            if nb_reads_lane:
                barcode_info[ELEMENT_PC_READ_IN_LANE]=float(barcode_info[ELEMENT_NB_READS_PASS_FILTER])/float(nb_reads_lane)
            else:
                barcode_info[ELEMENT_PC_READ_IN_LANE]=''

        #add ELEMENT_LANE_COEFF_VARIATION to lane_info
        nb_reads_per_lane = defaultdict(list)
        for barcode_info in self.barcodes_info.values():
            if barcode_info[ELEMENT_BARCODE] != 'unknown' and barcode_info[ELEMENT_NB_READS_PASS_FILTER]:
                nb_reads_per_lane[barcode_info[ELEMENT_LANE]].append(barcode_info[ELEMENT_NB_READS_PASS_FILTER])
        for lane in self.lanes_info:
            if nb_reads_per_lane.get(lane):
                self.lanes_info[lane][ELEMENT_LANE_COEFF_VARIATION] = variation(nb_reads_per_lane.get(lane))

        #add ELEMENT_PC_READ_IN_LANE to barcode info
        for barcode_info in self.unexpected_barcode_info.values():
            nb_reads_lane = self.lanes_info[barcode_info[ELEMENT_LANE]][ELEMENT_NB_READS_PASS_FILTER]
            if nb_reads_lane:
                barcode_info[ELEMENT_PC_READ_IN_LANE]=float(barcode_info[ELEMENT_NB_READS_PASS_FILTER])/float(nb_reads_lane)
            else:
                barcode_info[ELEMENT_PC_READ_IN_LANE]=''

예제 #24

0

파일 보기

파일: test_stats.py 프로젝트: zoccolan/eyetracker

    def test_variation(self):
        """
        variation = samplestd/mean """
##        y = stats.variation(self.shoes[0])
##        assert_approx_equal(y,21.8770668)
        y = stats.variation(self.testcase)
        assert_approx_equal(y,0.44721359549996, 10)

예제 #25

0

파일 보기

파일: data.py 프로젝트: vitigo96/Reconocimiento-facial

def MCV(data_df, N, columns, n = 4, S=1):

    data_array = np.array(data_df[columns])
    mcv_array = data_array.copy()
    c = data_array.copy()
    c1 = data_array.copy()
    c1index = np.array([[i,i] for i in range(len(data_array))])
    average = data_array.copy()

    broad =  broadcasting_array(data_array, L=N+2, S=S)
    c[math.ceil(N/2):-math.ceil(N/2)]=np.array(variation(broad, axis = 1)) #luego se itera para reemplazar los primeros y últimos según los demás parámetros

    '''
    N: posición en la que se identifica el transiente en real_time_disag
    el número de posiciones fijas debe ser mayor a en las que calcula el c1 index para que no se estanque el dataframe y pueda propagarse
    Esto es, c1 y c1index deben estar en otro ciclo
    '''
    
    for i in range(0,len(data_df)):

        if (i < n) or (i > (len(data_df)- (math.ceil(N/2)) -1)): 
            c[i] =  np.fabs(data_df[columns].iloc[i]) + 10 #para asegurarse que estas posiciones nunca tengan el menor coeficiente de variación

        if (i < n) or (i >= (len(data_df)- (2*math.ceil(N/2)) -1)): #El límite superior es para asegurar propagación de la variable
            mcv_array[i] =  data_df[columns].iloc[i]
            #v = data_df[columns].iloc[i]
            c1[i] =  c[i]
            c1index[i] =i  
            average[i] =  data_df[columns].iloc[i]
   
    broad_c =  broadcasting_array(c, L=N+2, S=S)

    c1[math.ceil(N/2):-math.ceil(N/2)] = np.min(broad_c, axis = 1) 
    c1index[math.ceil(N/2):-math.ceil(N/2)] = -(math.ceil(N/2)) + np.argmin(broad_c, axis = 1)

    for i in range(N//2,len(data_df)-math.ceil(N/2)+1):
        c1index[i] = i + c1index[i] 
    
    for i in range(0,n): #restaurar hasta n valores anteriores  
        c[i] =  np.fabs(data_df[columns].iloc[i]) + 10 #para asegurarse que estas posiciones nunca tengan el menos coeficiente de variación 
        c1[i] =  c[i]
        c1index[i] =i  
    
    for i in range(len(data_df)- (math.ceil(N/2)) ,len(data_df)): 
        c1[i] =  c[i]
        c1index[i] =i  

    broad_c1index =  broadcasting_array(c1index, L=N+2, S=S)
    data_broadcast = [data_array[[int(broad_c1index[j][i][0]) for i in range(N+2)]] for j in range(len(broad_c1index))]
    average[math.ceil(N/2):-math.ceil(N/2)] = np.median(data_broadcast, axis = 1)

    for i in range(0, n): #restaurar hasta n valores anteriores
        average[i] =  data_df[columns].iloc[i]

    for i in range(len(data_df)- (2*math.ceil(N/2)) -1,len(data_df)): 
        average[i] =  data_df[columns].iloc[i] 
    
    df = pd.DataFrame(average, index= data_df.index, columns= columns)

    return df

예제 #26

0

파일 보기

파일: flowAnalysis.py 프로젝트: cmcoffman/rlgl

def stats(values, return_list = False):
    """
    Accepts a list of values (not events) and processes the data. Will return the stats
    in a dictionary.
    """
    import numpy
    from numpy import mean, exp, median, std, log
    from scipy.stats import mode, variation, scoreatpercentile, skew, kurtosis
    stat_dict = {}

    n = float(len(values))    
    
    keys = ['amean','gmean','median','mode','stdev','cv','iqr','rcv','skew','kurt']    
    
    stat_dict[keys[0]] = mean(values)
    stat_dict[keys[1]] = exp(mean([log(value) for value in values]))
    stat_dict[keys[2]] = median(values)
    stat_dict[keys[3]] = int(mode(values)[0])
    stat_dict[keys[4]] = std(values)
    stat_dict[keys[5]] = variation(values)
    stat_dict[keys[6]] = scoreatpercentile(values,per = 75) - scoreatpercentile(values,per = 25)
    stat_dict[keys[7]] = 0.75*stat_dict['iqr'] / stat_dict['median']    
    stat_dict[keys[8]] = skew(values)
    stat_dict[keys[9]] = kurtosis(values)
    
    if return_list:
        return [stat_dict[key] for key in keys]
    return stat_dict

예제 #27

0

파일 보기

파일: intonation_profile.py 프로젝트: EQ4/pycompmusic

    def compute_profile(self):
        self.rec.label_contours(self.ji_intervals)
        distributions = {}
        for key, segments in self.rec.contour_labels.items():
            distributions[key] = []
            for indices in segments:
                distributions[key].extend(self.pitch_obj.pitch[indices[0]:indices[1]])

        parameters = {}
        for interval, distribution in distributions.items():
            distribution = np.array(distribution)
            #TODO: replace -10000 with whatever the bound is for invalid pitch values in cent scale
            distribution = distribution[distribution >= -10000]
            [n, be] = np.histogram(distribution, bins=1200)
            bc = (be[1:] + be[:-1])/2.0
            peak_pos = bc[np.argmax(n)]
            peak_mean = float(np.mean(distribution))
            peak_variance = float(variation(distribution))
            peak_skew = float(skew(distribution))
            peak_kurtosis = float(kurtosis(distribution))
            pearson_skew = float(3.0 * (peak_mean - peak_pos) / np.sqrt(abs(peak_variance)))
            parameters[interval] = {"position": float(peak_pos),
                                    "mean": peak_mean,
                                    "amplitude": float(max(n)),
                                    "variance": peak_variance,
                                    "skew1": peak_skew,
                                    "skew2": pearson_skew,
                                    "kurtosis": peak_kurtosis}
        all_amps = [parameters[interval]["amplitude"] for interval in parameters.keys()]
        peak_amp_sum = sum(all_amps)
        for interval in parameters.keys():
            parameters[interval]["amplitude"] = parameters[interval]["amplitude"]/peak_amp_sum

        self.intonation_profile = parameters

예제 #28

0

파일 보기

def smooth_curvatures(C, cvtarget, tolerance=10):
    """
    Smoothes a curvature signal until the coefficient of variation of its
    differenced curvatures is within tolerance percent.
    """
    from scipy.stats import variation

    smoothstep = int(len(C) / 100)
    window = smoothstep
    if window % 2 == 0:  # Window must be odd
        window = window + 1

    cv = 10000
    while abs((cv - cvtarget) / cv * 100) > tolerance:
        Cs = signal.savgol_filter(C,
                                  window_length=window,
                                  polyorder=3,
                                  mode='interp')
        cv = variation(np.diff(Cs))

        window = window + smoothstep
        if window % 2 == 0:  # Window must be odd
            window = window + 1

        if window > len(C) / 2:
            print('Could not find solution.')
            return Cs

    return Cs

예제 #29

0

파일 보기

파일: standart sapma istatistik.py 프로젝트: natoberk/kodlar

def st_dev():
    from statistics import pstdev, mean,median,mode, pvariance
    from scipy import stats
    from matplotlib import pyplot as plt
    entry=[]
    sizeplt=[]
    count=0
    size =int(input("Enter number of days: "))
    while count<size:
        val = int(input("Please enter {}. day cases: ".format(count+1)))
        entry.append(val)
        count += 1
    print("Mean is: ",(mean(entry)))
    print("Variance is:", (pvariance(entry)))
    print("Standard Deviation is:", (pstdev(entry)))
    print("Mode is:", (mode(entry)))
    print("Coefficient of Variation:",stats.variation(entry))
    print("Z Scores are:",stats.zscore(entry))
    print("Median is:", (median(entry)))
    for z in range(1,len(entry)+1):
        sizeplt.append(z)
    plt.plot(sizeplt,entry)
    plt.title("Turkey Covid-19 Daily Date Information")
    plt.xlabel("Day")
    plt.ylabel("Cases")
    plt.show()

예제 #30

0

파일 보기

파일: makeCuts.py 프로젝트: FNAL-HCAL-Ph1Upgrade/QIETesting

def doDBSCAN(data):
    #mykmeans = KMeans(n_clusters=2)
    myDBSCAN = DBSCAN(eps=stats.variation(data)*0.3, min_samples=len(data)*0.5).fit(data)
    n_clusters_ = len(set(myDBSCAN.labels_)) - (1 if -1 in myDBSCAN.labels_ else 0)
    print myDBSCAN.labels_
    print sum(myDBSCAN.labels_)
    print "nclusters:", n_clusters_

예제 #31

0

파일 보기

def get_stock_COV():
    tickers = load_tickers('sp500.csv')
    tickers.append('SPY')  # used for comparison, SPDR market

    coefficient_of_variations = []
    errors = []
    for ticker in tickers:
        past_year_prices = []
        url = (
            "https://financialmodelingprep.com/api/v3/historical-price-full/" +
            ticker + "?timeseries=365")
        response = urlopen(url)
        data = response.read().decode("utf-8")
        json_data = json.loads(data)
        try:
            print("CALCULATING COV FOR: ", ticker)
            for day_price in json_data['historical']:
                past_year_prices.append(day_price['close'])
        except:
            print("ISSUE WITH: ", ticker)
            errors.append(ticker)
            continue

        coefficient_of_variations.append([ticker, variation(past_year_prices)])

    print(errors)
    print(coefficient_of_variations)
    pickle.dump(coefficient_of_variations, open("COV.p", "wb"))

예제 #32

0

파일 보기

파일: vis_pred_stats.py 프로젝트: jaegglic/ablvcobas

def _str_for_pred_stats(y_test, y_pred, spf, prec):
    """ Computes the predictive statistics for the given value arrays. It
    returns a string representation that corresponds to a table with field size
    `spf`.

    Args:
        y_test (ndarray, shape=(nvals,)): True values
        y_pred (ndarray, shape=(nvals,)): Predicted values
        spf (int): Space per table field.
        prec (int): Precision of floating point numbers.

    Returns:
        str: String representation for the statistics.

    """

    bias = np.mean(y_pred - y_test)
    cv = variation(y_pred)
    mu = STD_FACT * np.sqrt(cv**2 + bias**2)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    return f'{bias:^{spf}.{prec}f}|' \
           f'{rmse:^{spf}.{prec}f}|' \
           f'{mu:^{spf}.{prec}f}|' \
           f'{r2:^{spf}.{prec}f}|'

예제 #33

0

파일 보기

파일: test_stats.py 프로젝트: zoccolan/eyetracker

 def test_variation(self):
     """
     variation = samplestd/mean """
     ##        y = stats.variation(self.shoes[0])
     ##        assert_approx_equal(y,21.8770668)
     y = stats.variation(self.testcase)
     assert_approx_equal(y, 0.44721359549996, 10)

예제 #34

0

파일 보기

파일: revision_stats.py 프로젝트: mahmoud/ipynb-ftw

def print_stats(datums):
    print 'Mean:', stats.tmean(datums)
    print 'Median:', stats.cmedian(datums)
    print 'Std Dev:', stats.tstd(datums)
    print 'Variation:', stats.variation(datums)
    print 'Kurtosis:', stats.kurtosis(datums, fisher=False)
    print 'Skewness:', stats.skew(datums)

예제 #35

0

파일 보기

파일: frost.py 프로젝트: matiasherranz/pyradar

def compute_coef_var(image, x_start, x_end, y_start, y_end):
    """
    Compute coefficient of variation in a window of [x_start: x_end] and
    [y_start:y_end] within the image.
    """
    assert x_start >= 0, 'ERROR: x_start must be >= 0.'
    assert y_start >= 0, 'ERROR: y_start must be >= 0.'

    x_size, y_size = image.shape
    x_overflow = x_end > x_size
    y_overflow = y_end > y_size

    assert not x_overflow, 'ERROR: invalid parameters cause x window overflow.'
    assert not y_overflow, 'ERROR: invalid parameters cause y window overflow.'

    window = image[x_start:x_end, y_start:y_end]

    coef_var = variation(window, None)

    if not coef_var:  # dirty patch
        coef_var = COEF_VAR_DEFAULT
#        print "squared_coef was equal zero but replaced by %s" % coef_var
    assert coef_var > 0, 'ERROR: coeffient of variation cannot be zero.'

    return coef_var

예제 #36

0

파일 보기

def compute_coef_var(image, x_start, x_end, y_start, y_end):
    """
    Compute coefficient of variation in a window of [x_start: x_end] and
    [y_start:y_end] within the image.
    """
    assert x_start >= 0, 'ERROR: x_start must be >= 0.'
    assert y_start >= 0, 'ERROR: y_start must be >= 0.'

    x_size, y_size = image.shape
    x_overflow = x_end > x_size
    y_overflow = y_end > y_size

    assert not x_overflow, 'ERROR: invalid parameters cause x window overflow.'
    assert not y_overflow, 'ERROR: invalid parameters cause y window overflow.'

    window = image[x_start:x_end, y_start:y_end]

    coef_var = variation(window, None)

    if not coef_var:  # dirty patch
        coef_var = COEF_VAR_DEFAULT
#        print "squared_coef was equal zero but replaced by %s" % coef_var
    assert coef_var > 0, 'ERROR: coeffient of variation cannot be zero.'

    return coef_var

예제 #37

0

파일 보기

파일: pathway_analysis_setup.py 프로젝트: rpalli/ruleMakerGA

def readFpkmData(dataName, delmited):
	with open(dataName) as csvfile:
		data=[]
		reader = csv.reader(csvfile, delimiter=delmited)
		for row in reader:
			data.append(row)
	sampleList=[]
	geneDict={}
	cvDict={}
	for j in range(1,len(data[1])):
		sampleList.append({})
	for i in range(1,len(data)):
		tempDatalist=[]
		maxdata=0
		for j in range(1,len(data[i])):
			currentDataPoint=float(data[i][j])
			tempDatalist.append(currentDataPoint)
		maxdata=max(tempDatalist)
		cvDict[data[i][0]]=variation(tempDatalist)
		if maxdata==0:
			maxdata=1.
		geneDict[data[i][0]]=[itemer/maxdata for itemer in tempDatalist]
		for j in range(0,len(data[i])-1):
			sampleList[j][str.upper(data[i][0])]=float(data[i][1])/maxdata
	return sampleList, geneDict, cvDict

예제 #38

0

파일 보기

    def assess_fold_performance(self) -> None:
        """Calculate Coefficient of Variation of model performance of clusters
        for each iteration.

        ==Precondition==:
            - store_cluster_results has been called.
        """
        # Get Fold Mean Accuracy for Plotting [Regression vs. Classification]
        if self._inputs.model_goal == "regression":

            def regression_error(x):
                """Return regression error between prediction and label"""
                return np.sqrt(((x.predictions - x.labels)**2))

            self._inputs.df_test[
                "pred_performance"] = self._inputs.df_test.apply(
                    regression_error, axis=1)
        else:
            self._inputs.df_test["pred_performance"] = (
                self._inputs.df_test.predictions == self._inputs.df_test.labels
            )

        self.mean_performance = self._inputs.df_test["pred_performance"].mean()

        # PREPROCESSING: Get CV of Cluster Accuracies
        cluster_performances_flattened = np.array([])
        cv_accuracy = np.array([])
        for arr in self.cluster_performances:
            cluster_performances_flattened = np.append(
                cluster_performances_flattened, arr)
            cv_accuracy = np.append(cv_accuracy, variation(arr))

        self.cv_performance = cv_accuracy
        self._cluster_performance_flattened = cluster_performances_flattened

예제 #39

0

파일 보기

파일: classify.py 프로젝트: kuleana/ay250_pyseminar

 def bluevar(self):
     """x=bluevar(): returns the variation in brightness within the blue channel of an RGB image"""
     if self.threed==True:
         blue=self.image[:,:,2]
         flat = [x for sublist in blue for x in sublist]
         bluevar=variation(flat)
         return bluevar
     else:
         return float(0)

예제 #40

0

파일 보기

파일: main.py 프로젝트: lturtsamuel/myprj

def read_energy(name):
    a = []
    f = open(name, 'r')
    for l in f:
        s = l.strip()
        if(s[0] != 'f'):
            a += [float(s.split(';')[2])]
    var = stats.variation(a)

    return [var]*13

예제 #41

0

파일 보기

파일: selection.py 프로젝트: mulescent/Enrich

def barcode_variation_apply_fn(row, barcode_data, mapping):
    """
    :py:meth:`pandas.DataFrame.apply` function for calculating the coefficient 
    of variation for a variant's barcodes.
    """
    bc_scores = barcode_data.ix[mapping.variants[row.name]]['score']
    bc_scores = bc_scores[np.invert(np.isnan(bc_scores))]
    cv = stats.variation(bc_scores)
    return pd.Series({'scored.unique.barcodes' : len(bc_scores), \
                      'barcode.cv' : cv})

예제 #42

0

파일 보기

파일: scriptGetInterarrivalsSummaryStatistics.py 프로젝트: cristina-abad/vrodriguez

def main(argv):
    ### Put the arguments in variables.
    traceFilename = ''
    try:
        opts, args = getopt.getopt(argv,"hf:",["tracefile="])
    except getopt.GetoptError:
        print 'scriptGetInterarrivalsSummaryStatistics.py -f <tracefile>'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'scriptGetInterarrivalsSummaryStatistics.py -f <tracefile>'
            sys.exit()
        elif opt in ("-f", "--tracefile"):
            traceFilename = arg
    if traceFilename == '':
        print 'ERROR: Must specify a trace filename.'
        print 'scriptGetInterarrivalsSummaryStatistics.py -f <tracefile>'
        sys.exit(1)
    
    ### Create the interarrival dictionary
    interarrival = {}
    file = open(traceFilename + "-IndiceLeido", "a")

    ### Open the file in read mode, and parse each line to obtain the timestamp,
    ### the command, and the id.
    with open (traceFilename, "r") as traceFile:
        reader = csv.reader(traceFile, delimiter=' ')
        for line in reader:
	    file.write(line[0]+'\n')
            if line[2] in interarrival:
		with open(traceFilename + "-interarrivals/it-" + line[2] + ".txt", "a") as myfile:
    		     myfile.write(str((float(line[0])*1000)-interarrival[line[2]][0])+'\n')
            interarrival[line[2]] = [(float(line[0])*1000)]
    file.close()
    ### Calculate the summary statistics for each key with more than 2 interarrivals.
    ### print 'id mean median mid-range gmean hmedian std iqr range mad coeficiente_variacion skewness kurtosis'
    ###        + str(stats.hmean(v[2:])) + ' ' \
    print 'id mean median mid-range gmean std iqr range mad coeficiente_variacion skewness kurtosis'
    for k, t in interarrival.iteritems():
	if os.path.isfile(traceFilename + '-interarrivals/it-' + k + '.txt'):
	   v = [float(line) for line in open(traceFilename + '-interarrivals/it-' + k + '.txt')]
           if len(v) > 1:  
               print k + ' ' + str(numpy.mean(v)) + ' ' \
               + str(numpy.median(v)) + ' ' \
               + str((numpy.max(v)-numpy.min(v))/2) + ' ' \
               + str(stats.mstats.gmean(v)) + ' ' \
               + str(numpy.std(v)) + ' ' \
               + str(numpy.subtract(*numpy.percentile(v, [75, 25]))) + ' ' \
               + str(numpy.ptp(v)) + ' ' \
               + str(mad(v)) + ' ' \
               + str(stats.variation(v)) + ' ' \
               + str(stats.skew(v))  + ' ' \
               + str(stats.kurtosis(v))

예제 #43

0

파일 보기

파일: features_reduce.py 프로젝트: CSJLOVEJX/DataPigs

def according_coefficient_variation_delete(data, features):
	waiting_to_delete = np.array(load_result("complex_value_features.csv"))
	waiting_to_delete = waiting_to_delete.reshape((waiting_to_delete.size,))
	#print(waiting_to_delete)
	indexs = get_known_features_index(features, waiting_to_delete)
	coefficient_variation_info = OrderedDict()
	for fea_pos in indexs:
		try:
			coefficient_variation_fea = stats.variation(data[:, fea_pos])
			coefficient_variation_info[features[fea_pos]] = coefficient_variation_fea
		except:
			pass
	return coefficient_variation_info

예제 #44

0

파일 보기

파일: ICAFeatures.py 프로젝트: vincentadam87/gatsby-hackathon-seizure

    def extract(self, instance):
        assert(isinstance(instance, Instance))
        dndata = instance.eeg_data

        kurtosis = st.kurtosis(dndata, axis=1)
        skew = st.skew(dndata, axis=1)
        # coefficient of variation 
        variation = st.variation(dndata, axis=1)
        
        # hstack will collapse all entries into one big vector 
        features = np.hstack( (kurtosis, skew, variation) )
        self.assert_features(features)
        # features = a 1d ndarray 
        return features

예제 #45

0

파일 보기

파일: spike_train_lib.py 프로젝트: ivanmysin/spike_train_analisis

def get_hmsi(spikes, hmsi_bin, file_path, title=""):
    intervals = np.diff( spikes, n=1 )
    intervals = intervals[ np.abs(intervals - np.mean(intervals)) < 3*np.std(intervals) ]
    cv = stats.variation(intervals) #  np.sqrt(np.std(intervals)) / np.mean(intervals) #
    numBins = 100   
    
    fig = plt.figure()
    ax = fig.add_subplot(111)

    hmsi, bins, _ = ax.hist(intervals, numBins, color='green', alpha=0.8, normed=True)

    ax.set_title("HMSI " + title)
    
    
    
    fig.savefig(file_path, dpi=500)
    plt.show(block=False)
    plt.close(fig)
    
    #hmsi, bins = np.histogram(intervals, numBins, density=True)

    return bins, hmsi, cv

예제 #46

0

파일 보기

파일: time_step.py 프로젝트: VipinVeetil/capital

	def compute_statistics(self):
		price_expectations = dict((k, []) for k in range(0, self.kinds_of_capital))

		for capital_firm in self.economy.capital_firms:
			if capital_firm.error_capital_price_expectation != None:
				self.errors_expectations_capital_price[capital_firm.capital_type].append(capital_firm.error_capital_price_expectation)
			price_expectations[capital_firm.capital_type].append(capital_firm.expected_price)
		for capital in xrange(self.kinds_of_capital):
			self.variation_price_expectation[capital] = stats.variation(price_expectations[capital])
		
		for capital in xrange(self.kinds_of_capital):
			self.errors_expectations_capital_price_economy[capital] = np.mean(self.errors_expectations_capital_price[capital])

		for goods_firm in self.economy.goods_firms:
			""" record the stock of different kinds of capital held by different capital firms in the economy"""
			for capital in xrange(self.kinds_of_capital):
				self.stock_capital[capital].append(goods_firm.capital_stock[capital])

		for capital in xrange(self.kinds_of_capital):
			self.stock_capital_economy[capital] = np.sum(self.stock_capital[capital])
	

		for capital in xrange(self.kinds_of_capital):
			self.mean_price_capital[capital] = np.mean(self.prices_capital[capital])

예제 #47

0

파일 보기

파일: kuan.py 프로젝트: nanopony/pyradar

def weighting(window, cu=CU_DEFAULT):
    """
    Computes the weighthing function for Kuan filter using cu as the noise
    coefficient.
    """
    two_cu = cu * cu

    ci = variation(window, None)
    two_ci = ci * ci

    if not two_ci:  # dirty patch to avoid zero division
        two_ci = COEF_VAR_DEFAULT

    divisor = 1.0 + two_cu

    if not divisor:
        divisor = 0.0001

    if cu > ci:
        w_t = 0.0
    else:
        w_t = (1.0 - (two_cu / two_ci)) / divisor

    return w_t

예제 #48

0

파일 보기

파일: barcoding_variation.py 프로젝트: coreyhayford/ThreeStateMelanoma

quit()


means = []
COVs = []
FanoFactors = []
for key, value in dict.iteritems():
    # print key, value
    bdf = pd.DataFrame(value)
    # print(bdf.shape)
    bdf1 = pd.DataFrame.transpose(bdf)
    df_sum = bdf1.sum(axis = 1)
    # print(bdf.shape, np.mean(df_sum), sp.variation(df_sum))
    means.append(np.mean(df_sum))
    COVs.append(sp.variation(df_sum))
    FanoFactors.append((np.std(df_sum)**2)/np.mean(df_sum))


means_COV = np.column_stack((means, COVs, FanoFactors))
# print(means_COV)
means_COV_df = pd.DataFrame(means_COV, index = dict.keys())
# means_COV_df['Barcodes', 'Experiments', 'States'] = [re.findall("\d+",key) for key in dict.keys()]
means_COV_df['Barcodes'] = [int(re.findall("\d+",key)[0]) for key in dict.keys()]
means_COV_df['Experiments'] = [int(re.findall("\d+",key)[1]) for key in dict.keys()]
means_COV_df['States'] = [int(re.findall("\d+",key)[2]) for key in dict.keys()]
# means_COV_df['Barcodes'] = [int(i[0]) for i in bd_nums]
# means_COV_df['Experiments'] = [int(j[1]) for j in bd_nums]
# means_COV_df['States'] = [int(k[2]) for k in bd_nums]
means_COV_df.columns = ['Mean', 'COV', 'FanoFactor', 'Barcodes', 'Experiments', 'States']
# means_COV_df.sort_values(by='COV')

예제 #49

0

파일 보기

파일: hw7problem22.py 프로젝트: samr13/Computer-Performance-Modelling

#for 
#print len(timeList)
#print len(routerAverages)

#plt.plot(timeList,routerAverages)
varList = []
i = 1
variations = []

myTimeList = []
#timeList
for average in routerAverages:
	varList.append(average)
	if i%(10)==0:
		variations.append(stats.variation(varList))
		varList = []
		myTimeList.append(i*100)
	i += 1



print variations

#plt.xlabel('simTime')
#plt.ylabel('router Average Util')
#plt.show()


plt.plot(myTimeList,variations)
plt.xlabel('simTime Batches')

예제 #50

0

파일 보기

파일: recording.py 프로젝트: gopalkoduri/intonation

    def parametrize_peaks(self, intervals, max_peakwidth=50, min_peakwidth=25, symmetric_bounds=True):
        """
        Computes and stores the intonation profile of an audio recording.

        :param intervals: these will be the reference set of intervals to which peak positions
         correspond to. For each interval, the properties of corresponding peak, if exists,
         will be computed and stored as intonation profile.
        :param max_peakwidth: the maximum allowed width of the peak at the base for computing
        parameters of the distribution.
        :param min_peakwidth: the minimum allowed width of the peak at the base for computing
        parameters of the distribution.
        """
        assert isinstance(self.pitch_obj.pitch, np.ndarray)
        valid_pitch = self.pitch_obj.pitch
        valid_pitch = [i for i in valid_pitch if i > -10000]
        valid_pitch = np.array(valid_pitch)

        parameters = {}
        for i in xrange(len(self.histogram.peaks["peaks"][0])):
            peak_pos = self.histogram.peaks["peaks"][0][i]
            #Set left and right bounds of the distribution.
            max_leftbound = peak_pos - max_peakwidth
            max_rightbound = peak_pos + max_peakwidth
            leftbound = max_leftbound
            rightbound = max_rightbound
            nearest_valleyindex = utils.find_nearest_index(self.histogram.peaks["valleys"][0], peak_pos)
            if peak_pos > self.histogram.peaks["valleys"][0][nearest_valleyindex]:
                leftbound = self.histogram.peaks["valleys"][0][nearest_valleyindex]
                if len(self.histogram.peaks["valleys"][0][nearest_valleyindex + 1:]) == 0:
                    rightbound = peak_pos + max_peakwidth
                else:
                    offset = nearest_valleyindex + 1
                    nearest_valleyindex = utils.find_nearest_index(
                        self.histogram.peaks["valleys"][0][offset:], peak_pos)
                    rightbound = self.histogram.peaks["valleys"][0][offset + nearest_valleyindex]
            else:
                rightbound = self.histogram.peaks["valleys"][0][nearest_valleyindex]
                if len(self.histogram.peaks["valleys"][0][:nearest_valleyindex]) == 0:
                    leftbound = peak_pos - max_peakwidth
                else:
                    nearest_valleyindex = utils.find_nearest_index(
                        self.histogram.peaks["valleys"][0][:nearest_valleyindex], peak_pos)
                    leftbound = self.histogram.peaks["valleys"][0][nearest_valleyindex]

            #In terms of x-axis, leftbound should be at least min_peakwidth
            # less than peak_pos, and at max max_peakwidth less than peak_pos,
            # and viceversa for the rightbound.
            if leftbound < max_leftbound:
                leftbound = max_leftbound
            elif leftbound > peak_pos - min_peakwidth:
                leftbound = peak_pos - min_peakwidth

            if rightbound > max_rightbound:
                rightbound = max_rightbound
            elif rightbound < peak_pos + min_peakwidth:
                rightbound = peak_pos + min_peakwidth

            #If symmetric bounds are asked for, then make the bounds symmetric
            if symmetric_bounds:
                if peak_pos - leftbound < rightbound - peak_pos:
                    imbalance = (rightbound - peak_pos) - (peak_pos - leftbound)
                    rightbound -= imbalance
                else:
                    imbalance = (peak_pos - leftbound) - (rightbound - peak_pos)
                    leftbound += imbalance

            #extract the distribution and estimate the parameters
            distribution = valid_pitch[valid_pitch >= leftbound]
            distribution = distribution[distribution <= rightbound]
            #print peak_pos, "\t", len(distribution), "\t", leftbound, "\t", rightbound

            interval_index = utils.find_nearest_index(intervals, peak_pos)
            interval = intervals[interval_index]
            _mean = float(np.mean(distribution))
            _variance = float(variation(distribution))
            _skew = float(skew(distribution))
            _kurtosis = float(kurtosis(distribution))
            pearson_skew = float(3.0 * (_mean - peak_pos) / np.sqrt(abs(_variance)))
            parameters[interval] = {"position": float(peak_pos),
                                    "mean": _mean,
                                    "amplitude": float(self.histogram.peaks["peaks"][1][i]),
                                    "variance": _variance,
                                    "skew1": _skew,
                                    "skew2": pearson_skew,
                                    "kurtosis": _kurtosis}

        self.intonation_profile = parameters

예제 #51

0

파일 보기

파일: statistical_hypothesis_testing.py 프로젝트: pkohvaei/galaxytools

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.")
    parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;")
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help="Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values."
    )
    parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used")
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help="if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored")
    parser.add_argument(
        "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored"
    )
    parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored")
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help="If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ")
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help="the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram")
    parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction")
    parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds")
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help="lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e")
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols != None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols != None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols != None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias)
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one), correction=args.correction, lambda_=args.lambda_
            )
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf is 0 and mf is 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf is 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf is 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf is 0 and mf is 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf is 0 and mf is 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf is 0 and mf is 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf is 0 and mf is 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf is 0 and mf is 0:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf is 0 and mf is 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf is 0 and mf is 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail)
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf is 0 and mf is 0:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf is 0 and mf is 0:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf is 0 and mf is 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda is 0:
                box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one), map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two)
            )
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(
                map(float, sample_one), map(float, sample_two), equal_var=args.equal_var
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one), zero_method=args.zero_method, correction=args.correction
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one), method=args.med, weights=map(float, sample_two)
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples
            )
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()

예제 #52

0

파일 보기

파일: simulations.py 프로젝트: VipinVeetil/capital

	def run_simulation(self):
		self.parameter_values()

		increment_exponents_ratio = 0.005
		first_exponent_list = np.arange(0.01, 0.5, increment_exponents_ratio)
		increment_volatility = 0.0025
		volatility_list = np.arange(0.01, 0.25, increment_volatility)


		with open('stochastic_variance.csv', 'wb') as stochastic_variance:
			for volatility in volatility_list:
				print 'volatility', volatility
			
				self.parameters.goods_demand_volatility = volatility
				self.assign_run_parameters()
				self.run.initialize()
				self.run.create_economy()
				self.run.run_forward_in_time()
				self.run.compute_run_statistics()
				last_ten_percent_time_steps = int(self.parameters.time_steps * 0.1)
				list_stock_ratio = self.run.capital_stock_ratio[-last_ten_percent_time_steps:]
				list_stock_ratio = np.array(list_stock_ratio)
				
				list_stock_ratio = list_stock_ratio[~np.isnan(list_stock_ratio)]
				stock_variance = stats.variation(list_stock_ratio)
				
				list_price_ratio = self.run.capital_price_ratio[-last_ten_percent_time_steps:]
				list_price_ratio = np.array(list_price_ratio)
				list_price_ratio = list_price_ratio[~np.isnan(list_price_ratio)]
				price_variance = stats.variation(list_price_ratio)
				writer = csv.writer(stochastic_variance, delimiter=',')
				writer.writerow([volatility] + [stock_variance] + [price_variance])


		with open('stock_price.csv', 'wb') as data_stock_price:
			for first_exponent in first_exponent_list:
				print 'first_exponent', first_exponent
		
				self.parameters.goods_demand_volatility = 0
				self.parameters.goods_firm_exponents = [first_exponent, 0.5]
				self.assign_run_parameters()
				self.run.initialize()
				self.run.create_economy()
				self.run.run_forward_in_time()
				self.run.compute_run_statistics()
				last_ten_percent_time_steps = int(self.parameters.time_steps * 0.1)
				list_stock_ratio = self.run.capital_stock_ratio[-last_ten_percent_time_steps:]
				list_stock_ratio = np.array(list_stock_ratio)
				list_stock_ratio = list_stock_ratio[~np.isnan(list_stock_ratio)]
				stock_ratio = np.mean(list_stock_ratio[-last_ten_percent_time_steps:])
				
				list_price_ratio = self.run.capital_price_ratio[-last_ten_percent_time_steps:]
				list_price_ratio = np.array(list_price_ratio)
				list_price_ratio = list_price_ratio[~np.isnan(list_price_ratio)]
				price_ratio = np.mean(list_price_ratio[-last_ten_percent_time_steps:])
				
				ratio_exponents = first_exponent / 0.5
				writer = csv.writer(data_stock_price, delimiter=',')
				writer.writerow([ratio_exponents] + [stock_ratio] + [price_ratio])


		with open('stochastic_stock_price.csv', 'wb') as stochastic_stock_price:
			for first_exponent in first_exponent_list:
				self.parameters.goods_demand_volatility = 0.1
				print 'stochastic first_exponent', first_exponent
			
				self.parameters.goods_firm_exponents = [first_exponent, 0.5]
				self.assign_run_parameters()
				self.run.initialize()
				self.run.create_economy()
				self.run.run_forward_in_time()
				self.run.compute_run_statistics()
				last_ten_percent_time_steps = int(self.parameters.time_steps * 0.1)
				list_stock_ratio = self.run.capital_stock_ratio[-last_ten_percent_time_steps:]
				list_stock_ratio = np.array(list_stock_ratio)
				list_stock_ratio = list_stock_ratio[~np.isnan(list_stock_ratio)]
				stock_ratio = np.mean(list_stock_ratio[-last_ten_percent_time_steps:])
				
				list_price_ratio = self.run.capital_price_ratio[-last_ten_percent_time_steps:]
				list_price_ratio = np.array(list_price_ratio)
				list_price_ratio = list_price_ratio[~np.isnan(list_price_ratio)]
				price_ratio = np.mean(list_price_ratio[-last_ten_percent_time_steps:])
				
				ratio_exponents = first_exponent / 0.5
				writer = csv.writer(stochastic_stock_price, delimiter=',')
				writer.writerow([ratio_exponents] + [stock_ratio] + [price_ratio])

예제 #53

0

파일 보기

파일: descstats.py 프로젝트: matthew-brett/draft-statsmodels

def descstats(data, cols=None, axis=0):
    '''
    Prints descriptive statistics for one or multiple variables.

    Parameters
    ------------
    data: numpy array
        `x` is the data

    v: list, optional
        A list of the column number or field names (for a recarray) of variables.
        Default is all columns.

    axis: 1 or 0
        axis order of data.  Default is 0 for column-ordered data.

    Example
    ----------simple
    >>>
    decstats(data.exog,v=['x_1','x_2','x_3'])

    '''

    x = np.array(data)  # or rather, the data we're interested in
    if cols is None:
#       if isinstance(x, np.recarray):
#            cols = np.array(len(x.dtype.names))
        if not isinstance(x, np.recarray) and x.ndim == 1:
            x = x[:,None]

    if x.shape[1] == 1:
        desc = '''
    ---------------------------------------------
    Univariate Descriptive Statistics
    ---------------------------------------------

    Var. Name   %(name)12s
    ----------
    Obs.          %(nobs)22i  Range                  %(range)22s
    Sum of Wts.   %(sum)22s  Coeff. of Variation     %(coeffvar)22.4g
    Mode          %(mode)22.4g  Skewness                %(skewness)22.4g
    Repeats       %(nmode)22i  Kurtosis                %(kurtosis)22.4g
    Mean          %(mean)22.4g  Uncorrected SS          %(uss)22.4g
    Median        %(median)22.4g  Corrected SS            %(ss)22.4g
    Variance      %(variance)22.4g  Sum Observations        %(sobs)22.4g
    Std. Dev.     %(stddev)22.4g
    ''' % {'name': cols, 'sum': 'N/A', 'nobs': len(x), 'mode': \
    stats.mode(x)[0][0], 'nmode': stats.mode(x)[1][0], \
    'mean': x.mean(), 'median': np.median(x), 'range': \
    '('+str(x.min())+', '+str(x.max())+')', 'variance': \
    x.var(), 'stddev': x.std(), 'coeffvar': \
    stats.variation(x), 'skewness': stats.skew(x), \
    'kurtosis': stats.kurtosis(x), 'uss': stats.ss(x),\
    'ss': stats.ss(x-x.mean()), 'sobs': np.sum(x)}

#    ''' % {'name': cols[0], 'sum': 'N/A', 'nobs': len(x[cols[0]]), 'mode': \
#    stats.mode(x[cols[0]])[0][0], 'nmode': stats.mode(x[cols[0]])[1][0], \
#    'mean': x[cols[0]].mean(), 'median': np.median(x[cols[0]]), 'range': \
#    '('+str(x[cols[0]].min())+', '+str(x[cols[0]].max())+')', 'variance': \
#    x[cols[0]].var(), 'stddev': x[cols[0]].std(), 'coeffvar': \
#    stats.variation(x[cols[0]]), 'skewness': stats.skew(x[cols[0]]), \
#    'kurtosis': stats.kurtosis(x[cols[0]]), 'uss': stats.ss(x[cols[0]]),\
#    'ss': stats.ss(x[cols[0]]-x[cols[0]].mean()), 'sobs': np.sum(x[cols[0]])}

        desc+= '''

    Percentiles
    -------------
    1  %%          %12.4g
    5  %%          %12.4g
    10 %%          %12.4g
    25 %%          %12.4g

    50 %%          %12.4g

    75 %%          %12.4g
    90 %%          %12.4g
    95 %%          %12.4g
    99 %%          %12.4g
    ''' % tuple([stats.scoreatpercentile(x,per) for per in (1,5,10,25,
                50,75,90,95,99)])
        t,p_t=stats.ttest_1samp(x,0)
        M,p_M=sign_test(x)
        S,p_S=stats.wilcoxon(np.squeeze(x))

        desc+= '''

    Tests of Location (H0: Mu0=0)
    -----------------------------
    Test                Statistic       Two-tailed probability
    -----------------+-----------------------------------------
    Student's t      |  t %7.5f   Pr > |t|   <%.4f
    Sign             |  M %8.2f   Pr >= |M|  <%.4f
    Signed Rank      |  S %8.2f   Pr >= |S|  <%.4f

    ''' % (t,p_t,M,p_M,S,p_S)
# Should this be part of a 'descstats'
# in any event these should be split up, so that they can be called
# individually and only returned together if someone calls summary
# or something of the sort

    elif x.shape[1] > 1:
        desc ='''
    Var. Name   |     Obs.        Mean    Std. Dev.           Range
    ------------+--------------------------------------------------------'''+\
            os.linesep

# for recarrays with columns passed as names
#        if isinstance(cols[0],str):
#            for var in cols:
#                desc += "%(name)15s %(obs)9i %(mean)12.4g %(stddev)12.4g \
#%(range)20s" %  {'name': var, 'obs': len(x[var]), 'mean': x[var].mean(),
#        'stddev': x[var].std(), 'range': '('+str(x[var].min())+', '\
#                +str(x[var].max())+')'+os.linesep}
#        else:
        for var in range(x.shape[1]):
                desc += "%(name)15s %(obs)9i %(mean)12.4g %(stddev)12.4g \
%(range)20s" % {'name': var, 'obs': len(x[:,var]), 'mean': x[:,var].mean(),
                'stddev': x[:,var].std(), 'range': '('+str(x[:,var].min())+', '+\
                str(x[:,var].max())+')'+os.linesep}
    else:
        raise ValueError, "data not understood"

    return desc

예제 #54

0

파일 보기

파일: 0_datareplace.py 프로젝트: dier111320/LR_MODEL

def BasicSummary1(series):
	series_len = len(series)
	basiclist=[stats.skew(series), stats.skewtest(series)[1], stats.kurtosis(series),stats.kurtosistest(series)[1],stats.variation(series)]
	return np.round(pd.Series(basiclist),decimals=6)

예제 #55

0

파일 보기

파일: autoreport.py 프로젝트: MarkusFries/mubosym

    parts = doHeading(1,Chapters[0], h1, parts)
    
    para = Paragraph(u"My Text that I can write here or take it from somewhere like shown in the next paragraph.", style["Normal"])
    parts.append(para)
    
    parts = doHeading(1,Subchapters[1], h2,parts)
    
    title = u"my first data"
    
    para = Paragraph(Context[title], style["Normal"])
    parts.append(para)

    text = {}
    
    text.update({"Standardabweichung":np.std(x)})
    text.update({"Varianz":variation(x)})
    text.update({"Schiefe":skew(x)})
    text.update({"Kurtosis":kurtosis(x)})
    
    print( Content[title] )
    
    thisImage = plotHist(Content[title],title,subname="",spec="",show=False,text=text,Versuch=Chapters[0],path="",N=6)

    factor = doc.width/thisImage.drawWidth
    thisImage.drawHeight = thisImage.drawHeight * factor
    thisImage.drawWidth  = thisImage.drawWidth  * factor
    
    parts.append(thisImage)
    
    para = Paragraph(u"Fig. " + str(doc.figCount) + title, caption)
    parts.append(para)

예제 #56

0

파일 보기

파일: features_pairs.py 프로젝트: sibelius/CauseEffect

def sharpe(x):
    v = stats.variation(x)
    if np.isinf(v):
        return 0
    else:
        return v

예제 #57

0

파일 보기

파일: autoreport.py 프로젝트: MarkusFries/mubosym

def Convert2PdfReport(doc,parts,Daten,outpath,Fahrzeuge,Note=None,Carnames=None): #,outname,
    """
    Plots Data and Graphics to a Portable Document File
    """ 
    

    numbers = [int(re.findall(r'\d+', item)[0]) for item in Fahrzeuge]
    idxs = [numbers.index(x) for x in sorted(numbers)]
    
       
    Fahrzeuge = [Fahrzeuge[this] for this in idxs]
    print( "Fahrzeuge/Vergleiche:",Fahrzeuge )
    #print "Phaenomene:",Phaenomene

    if not Note == None:
        vMin=np.min(Note.Note)
        vMax=np.max(Note.Note)
        if Note.typ == "abs":
            lowmid=3.5
            midhigh=6.5
            leftc="g"
            rightc="m"
            leftct=colors.limegreen
            rightct=colors.pink
            
        elif Note.typ == "rel":
            lowmid=-1.5
            midhigh=1.5
            leftc="m"
            rightc="g"
            leftct=colors.pink
            rightct=colors.limegreen
            

    for i,Fahrzeug in enumerate(Fahrzeuge):
        print( Daten.viewkeys() )
        Vergleich = Daten[Fahrzeug] #,Meinungen
        title = u"Auswertung für " + str(Fahrzeug)
        if not Carnames == None:
            htitle = title + " " + str(Carnames[Fahrzeug])
        
        Means = []#()
        Stds = []#()
        Data = []
          
        celldata = [["" for k in range(7+2)] for m in range(7+1)]    
        
        #celldata[0:-1][0] = [u"Heben",u"Nicken",u"Wanken",u"Werfen",u"(Mikro-) Stuckern",u"Stößigkeit"] #Phe
        #celldata[0][0:-1] = ["","Mittelwert","Standardabweichung","Minimum","Maximum","Stichprobenmenge"]
        
        parts = doHeading(Fahrzeuge[i],htitle, h1, parts)
     
        parts = doHeading(Fahrzeuge[i],u"Fahrzeug Übersicht", h2,parts)
        Phaenomene = []
        for j,key  in enumerate(Vergleich.iterkeys()):
            Phaenomene.append(key)
            Phaenomen = Vergleich[key]
            #Means = Means.__add__( (Phaenomen.mean,) )
           # Stds = Stds.__add__( (Phaenomen.std,) )
            Means.append(Phaenomen.mean)
            Stds.append(Phaenomen.std/2.)
            Data.append(Phaenomen)
            
            try:
                #print Phe[j]
                #print celldata
                try:
                    celldata[j+1][0] = unicode(Phaenomene[j])
                except IndexError:
                    print( "Error:" )
                    #print celldata, Phaenomene[j]
                
                if not Phaenomen.len == 0:
                    try:
                        celldata[0][1] = "Mittelwert"
                        celldata[j+1][1] = '%1.3f' % (Phaenomen.mean)
                        celldata[0][2] = "Standardabweichung"
                        celldata[j+1][2] = '%1.3f' % (Phaenomen.std)
                        celldata[0][3] = "Minimum"
                        celldata[j+1][3] = Phaenomen.min
                        celldata[0][4] = "Maximum"
                        celldata[j+1][4] = Phaenomen.max
                        celldata[0][5] = "Stichprobenmenge"
                        celldata[j+1][5] = Phaenomen.len
                    except:
                        pass
                    
                else:
                    
                    para = Paragraph(u"Zu "+unicode(Phaenomene[j])+u": Keine Auswertung Möglich,", style["Normal"])
                    parts.append(para)
                    
                    para = Paragraph("Anzahl Vergebener Noten:" + str(Phaenomen.len), style["Normal"])
                    parts.append(para)                
                
            except LayoutError:
                print( "Layout error detected, could not create the Document Template" )
                
        #thisDrawing = barChart(Means,title+"Mittelwerte",Phaenomene,path=outpath,vMin=-4,vMax=4)
                
        thisDrawing = barHorizontal(Means[::-1],title+"Mittelwerte",Phaenomene[::-1],Stds[::-1],path=outpath,vMin=vMin,vMax=vMax,lowmid=lowmid,midhigh=midhigh,leftc=leftc,rightc=rightc) # relative: 
        
        factor = (doc.width*0.85)/thisDrawing.drawWidth
        thisDrawing.drawHeight = thisDrawing.drawHeight * factor
        thisDrawing.drawWidth  = thisDrawing.drawWidth  * factor
        
        parts.append(thisDrawing)
        
        para = Paragraph(u"Mittelwerte der Phänomene mit Standardabweichung", caption)
        parts.append(para)
        
        parts.append(Spacer(1, 12))
        
        mystyle=[ 
                    ('LINEABOVE',(0,0),(-1,0),1,colors.blue),
                    ('LINEABOVE',(0,1),(-1,1),1,colors.blue),
                    ('LINEBEFORE',(1,1),(1,-1),1,colors.pink),
                    ('LINEBELOW',(0,-1),(-1,-1),1,colors.blue),]
            
        for l,key  in enumerate(Vergleich.iterkeys()):
            value = Vergleich[key].mean
            if ( value >= vMin and value < lowmid ):
                mystyle.append(('BACKGROUND',(1,l+1),(1,l+1),leftct))
            elif ( value >= lowmid and value < midhigh ):
                mystyle.append(('BACKGROUND',(1,l+1),(1,l+1),colors.khaki))
            elif ( value >= midhigh and value <= vMax ):
                mystyle.append(('BACKGROUND',(1,l+1),(1,l+1), rightct))
            else:
                pass
        
        t=Table(celldata, style=mystyle)
        #colors.brown
        parts.append(t)
        
        parts.append(Spacer(1, 12))
        parts.append(PageBreak())
        parts = doHeading(Fahrzeuge[i],u"Histogramme der Phänomene", h2,parts)
        
        for m,data in enumerate(Data):
            if not data.len == 0:
                
                text = {}
                
                text.update({"Standardabweichung":data.std})
                text.update({"Varianz":variation(data.Event)})
                text.update({"Schiefe":skew(data.Event)})
                text.update({"Kurtosis":kurtosis(data.Event)})
                
                thisImage = plotHist(data.Event,Phaenomene[m],show=False,text=text,Versuch=title,path=outpath,N=Note.Note,Min=vMin,Max=vMax)
                #except:
                #    continue

                factor = (doc.width*0.85)/thisImage.drawWidth
                thisImage.drawHeight = thisImage.drawHeight * factor
                thisImage.drawWidth  = thisImage.drawWidth  * factor
                
                parts = doHeading(Fahrzeuge[i],u"Phänomen " + unicode(Phaenomene[m]), h3,parts)
                #para = Paragraph(u"Phänomen " + str(Phe[idxs[m]]), style["Heading3"])
                #parts.append(para)
                
                parts.append(thisImage)
        parts.append(PageBreak())
        parts = doHeading(Fahrzeuge[i],u"Verbale Bemerkungen", h2,parts)
        
        for o,Phaenomen in enumerate(Phaenomene):
        
            if not len(Vergleich[Phaenomen].Text) == 0:
                parts = doHeading(Fahrzeuge[i],u"Probandenmeinung " + unicode(Phaenomen) , h3,parts)
                #print Phaenomene[o], Meinungen[o]
                para = Paragraph(Vergleich[Phaenomen].Text, style["Normal"])
                parts.append(para)         
        
        parts.append(PageBreak())
        
    plt.close('all')        
        
    try:
        
        return parts
        #doc.build(parts)
        #doc.multiBuild(parts)

    except LayoutError:
        return LayoutError("there is an error with the Layout")

예제 #58

0

파일 보기

파일: clean_up.py 프로젝트: Amortized/Rain-Predictor

def generateFeatures(all_radar_features):
	features = [];					 

	####################Take aggregate statistics based on the radar quality index
	features.append(len(all_radar_features)) # No of Radars;

	for i in range(0, len(all_radar_features[0])):
		if i in [0, 1, 2, 3, 4, 12]:
			# Time based observations
			observations = [r[i] for r in all_radar_features];	 
			features.append(round(np.mean(observations), 2)); 
			features.append(round(np.std(observations), 2)); 

		elif i == 7:
			#HydrometeorType
			#For each of the HydrometeorType, compute the weighted mean of the counts
			HydrometeorType = dict();

			for r in all_radar_features:
				for k in r[i].keys():
					if float(k) in HydrometeorType:
						HydrometeorType[float(k)] += (r[12] * r[i][k])
					else:
						HydrometeorType[float(k)]  = (r[12] * r[i][k])

			for hm in range(0, 15):
				if float(hm) in HydrometeorType.keys():
					features.append(HydrometeorType[hm]);
				else:
					features.append(0);

			#Add the most frequent HydrometeorType
			HydrometeorType = dict();
			for r in all_radar_features:
				for k in r[i].keys():
					if float(k) in HydrometeorType:
						HydrometeorType[float(k)] += (r[i][k])
					else:
						HydrometeorType[float(k)]  = (r[i][k])

			most_frequent_meteor = sorted(HydrometeorType.items(), key=operator.itemgetter(1))
			if most_frequent_meteor:
				features.append(most_frequent_meteor[0][0]);
			else:
				features.append("NaN");

		else:
			#Only compute the stats of radar values which aren't missing
			observations = [r[12] * float(r[i]) for r in all_radar_features if r[i] != "NaN"];

			if len(observations) > 0:
				features.append(round(np.mean(observations), 2)); 
				features.append(round(np.std(observations), 2));  
				features.append(round(np.median(observations), 2));
				if np.mean(observations) > 0:
					features.append(round(variation(observations), 2)); #Coefficient of variations
				else:
					features.append("NaN"); #Coefficient of variations
			else:
				features.append("NaN"); 
				features.append("NaN");
				features.append("NaN");
				features.append("NaN");

	
	return features;