Python skew Examples, scipy.stats.skew Python Examples

Example #1

0

Show file

File: test_moment_matching.py Project: chenhh/PySPPortfolio

def test_unbiased_HMM(precision=2):
    n_rv, n_sample = 10, 100
    n_scenario = 500
    data = np.random.rand(n_rv, n_sample)

    # original statistics
    tgt_moments = np.zeros((n_rv, 4))
    tgt_moments[:, 0] = data.mean(axis=1)
    tgt_moments[:, 1] = data.std(axis=1, ddof=1)
    tgt_moments[:, 2] = spstats.skew(data, axis=1, bias=False)
    tgt_moments[:, 3] = spstats.kurtosis(data, axis=1, bias=False)
    tgt_corrs = np.corrcoef(data)

    t0 = time()
    py_scenarios = HMM(tgt_moments, tgt_corrs, n_scenario, bias=False)
    print ("python unbiased HMM (n_rv, n_scenario):({}, {}) {:.4f} secs".format(
        n_rv, n_scenario, time()-t0))

    t1 = time()
    c_scenarios = c_HMM(tgt_moments, tgt_corrs, n_scenario, bias=False)
    print ("c unbiased HMM (n_rv, n_scenario):({}, {}) {:.4f} secs".format(
        n_rv, n_scenario, time()-t1))

    for scenarios in (py_scenarios,  c_scenarios):
        # scenarios statistics
        res_moments = np.zeros((n_rv, 4))
        res_moments[:, 0] = scenarios.mean(axis=1)
        res_moments[:, 1] = scenarios.std(axis=1, ddof=1)
        res_moments[:, 2] = spstats.skew(scenarios, axis=1, bias=False)
        res_moments[:, 3] = spstats.kurtosis(scenarios, axis=1, bias=False)
        res_corrs = np.corrcoef(scenarios)

        np.testing.assert_array_almost_equal(tgt_moments, res_moments, precision)
        np.testing.assert_array_almost_equal(tgt_corrs, res_corrs, precision)

Example #2

0

Show file

File: dynamics_graphs.py Project: ikenox/human-turnover-simulator

def main():

    parser = argparse.ArgumentParser(description='Simulate and plot graphs of human turnover steps.')
    parser.add_argument('step', type=int,
                       help='an integer for the accumulator')
    parser.add_argument('p', type=float,
                       help='an integer for the accumulator')
    parser.add_argument('K', type=float,
                       help='an integer for the accumulator')
    parser.add_argument('stage', type=str,
                       help='wake or sleep')
    parser.add_argument('--stepchart', default=False, action="store_true",
                        help='if record and plot step chart')

    args = parser.parse_args()

    sim = TurnoverModel(args.step, args.p, args.K, args.stage, record_step_chart=args.stepchart)
    sim.save_fluctuation()
    sim.save_interval_angle_dist()
    sim.save_interval_ccdf()
    if args.stepchart:
        sim.save_step_chart()

    print '===== turnover intervals ====='
    print sim.turnover_intervals
    print '===== turnover times,intervals,angles ====='
    print zip(sim.turnover_times,sim.turnover_intervals,sim.turnover_angles)
    print '===== alpha s,l ====='
    print sim.calced_alpha_s.get(), sim.calced_alpha_l.get()
    print '===== skew of log10(tau)====='
    print skew([log10(i) for i in sim.turnover_intervals])

Example #3

0

Show file

File: test_skewness_CMBlensing.py Project: apetri/CFHTLens_analysis

def iskew (i):
    print i
    ikmap_NL = kmapNL(i)
    ikmap_NOISY = kmapNOISY(i)
    skewness_NL = [skew(WLanalysis.smooth(ikmap_NL, ismooth).flatten() ) for ismooth in sigmaG_arr*PPA_NL] 
    skewness_NOISY = [skew(WLanalysis.smooth(ikmap_NOISY, ismooth).flatten() ) for ismooth in sigmaG_arr*PPA_NOISY]
    return [skewness_NL, skewness_NOISY]

Example #4

0

Show file

File: myFunct.py Project: roman-dvorak/SolarForecast

def smerodatna_odchylka(data, min=0, max=0, plot=True):
    #
    #   pocita smerodatnou odchylku. Pokud min a max neni nastaveno, pocita se
    #       z celeho pole. Jinak to je vyber mezi min a max
    #
    #   in 'data'   - pole s daty
    #   in 'min'    - minimalni hodnota v poli pro posouzeni
    #   in 'max'    - maximalni hodnota v poli pro posouzeni
    #   in 'plot'   - rozhoduje o vykresleni grafu
    #
    #   out 'out'   - smerodatna odchylka
    #


    data = np.array(data)

    if min == 0 and max == 0:
        average = np.mean(data)
        median = np.median(data)
        standardDeviation=np.std(data)
        kurtosis = stats.kurtosis(data)
        skewness = stats.skew(data)
    else:
        crop = np.array([])
        for x in data:
            if min < x < max:
                crop=np.append(crop,x)
        average = np.mean(crop)
#        modus = stats.mode(crop)
#        modus = statistics.mode(crop)         !!!!!
        median = np.median(crop)
        standardDeviation=np.std(crop)
        kurtosis = stats.kurtosis(crop)
        skewness = stats.skew(crop)

    if plot:

        plt.figure()
        plt.axvspan(float(min), float(max), alpha=0.3, color='k')
        plt.axvspan(average-standardDeviation, average+standardDeviation, alpha=0.4, color='b')
        plt.axvspan(average+standardDeviation, average+standardDeviation+standardDeviation, alpha=0.4, color='r')
        plt.axvspan(average-standardDeviation, average-standardDeviation-standardDeviation, alpha=0.4, color='r')
        plt.axvline(x=median, linewidth=2, color='r')
        plt.axvline(x=average, linewidth=2, color='g')
        #plt.axvline(x=modus[0], linewidth=2, color='b')
        plt.hist(data, 1.0+3.3*math.log(np.shape(data)[0]), facecolor='green', alpha=0.75)
        plt.text(average, 10, "std: "+ str(standardDeviation),
                bbox={'facecolor':'green', 'alpha':0.75, 'pad':10})
        plt.show(block=False)


    print "___________________________________________________________"
    print "výběr hodnot od ", float(min), " po ", float(max)
    print "průměr: ", average
    print "median: ", median
    print "smerodatn odchylka je: ", standardDeviation
    print "spicatost: ", kurtosis
    print "sikmost: ", skewness

    return standardDeviation

Example #5

0

Show file

File: activeContourSegmentation.py Project: sapresearch/Tomo

def getLabelImFeats(lsim,center,orgim):
    """Compute object geometry features.

    Parameters
    ----------
    lsim:
        Segmented binary image
    center:
        Center coordinate(x,y) of the object
    orgim:
        Original image

    """
    
    label_img = skimage.measure.label(lsim)
    regions = regionprops(label_img)
    index = label_img[center[0],center[1]]-1

    # direct features
    Area = regions[index].area
    CentralMoments = regions[index].moments_central
    Eccentricity = regions[index].eccentricity
    Perimeter = regions[index].perimeter

    skewx=np.mean(stats.skew(lsim, axis=0, bias=True))
    skewy=np.mean(stats.skew(lsim, axis=1, bias=True))
    
    # derived features
    compact = Area/Perimeter**2
    skewness = np.sqrt(skewx**2 + skewy**2)
    cen_skew = getCentSkewness(label_img,Area, index,regions[index].centroid)
    numBranch = getRBSTim(label_img,orgim)

    return np.hstack((Area, Eccentricity, Perimeter, compact, skewness, cen_skew, numBranch))

Example #6

0

Show file

File: feature_engg_data_prep.py Project: ssgalitsky/Working-on-Audio-data

def get_feature(fname):
    #b,_ = librosa.load(fname, res_type = 'kaiser_fast')
    b,_ = librosa.load(fname, res_type = 'kaiser_fast')
    try:
        mfcc = np.mean(librosa.feature.mfcc(y = b,n_mfcc=60).T,axis=0)
        mels = np.mean(librosa.feature.melspectrogram(b, sr = SAMPLE_RATE).T,axis = 0)
        stft = np.abs(librosa.stft(b))
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr = SAMPLE_RATE).T,axis = 0)
        contrast=np.mean(librosa.feature.spectral_contrast(S=stft, sr=SAMPLE_RATE).T,axis=0)
        tonnetz=np.mean(librosa.feature.tonnetz(librosa.effects.harmonic(b), sr = SAMPLE_RATE).T,axis = 0)
        ft2 = librosa.feature.zero_crossing_rate(b)[0]
        ft3 = librosa.feature.spectral_rolloff(b)[0]
        ft4 = librosa.feature.spectral_centroid(b)[0]
        ft5 = librosa.feature.spectral_contrast(b)[0]
        ft6 = librosa.feature.spectral_bandwidth(b)[0]
        ft2_trunc = np.hstack([np.mean(ft2),np.std(ft2), skew(ft2), np.max(ft2), np.min(ft2)])
        ft3_trunc = np.hstack([np.mean(ft3),np.std(ft3), skew(ft3), np.max(ft3), np.min(ft3)])
        ft4_trunc = np.hstack([np.mean(ft4),np.std(ft4), skew(ft4), np.max(ft4), np.min(ft4)])
        ft5_trunc = np.hstack([np.mean(ft5),np.std(ft5), skew(ft5), np.max(ft5), np.min(ft5)])
        ft6_trunc = np.hstack([np.mean(ft6),np.std(ft6), skew(ft6), np.max(ft6), np.min(ft6)])
        return pd.Series(np.hstack((mfcc,mels,chroma,contrast,tonnetz,ft2_trunc, ft3_trunc, ft4_trunc, ft5_trunc, ft6_trunc)))
        #d = np.hstack([mfcc,mels,chroma,contrast,tonnetz,ft2_trunc,ft3_trunc,ft4_trunc,ft5_trunc,ft6_trunc])
        #features = np.empty((0,238))
        #d = np.vstack([features,d])
    except:
        print('bad file')
        return pd.Series([0]*238)

Example #7

0

Show file

File: ImageProcess.py Project: cassieburgess/Flower-Classification

def colorMoment(im): 
    """Calculates the 2nd and 3rd color moments of the input image and returns values in a list."""
    #The first color moment is the mean. This is already considered as a metric for 
    #the red, green, and blue channels, so this is not included here. 
    #Only the 2nd and 3rd moments will be calculated here. 
    
    newIm = matplotlib.colors.rgb_to_hsv(im) #convert to HSV space 
     
    #Pull out each channel from the image to analyze seperately. 
    HChannel = newIm[:,:,0]
    SChannel = newIm[:,:,1]
    VChannel = newIm[:,:,2]
    
    #2nd moment = standard deviation. 
    Hstd = numpy.std(HChannel) 
    Sstd = numpy.std(SChannel) 
    Vstd = numpy.std(VChannel) 
    
    #3rd Moment = "Skewness". Calculate the skew, which gives an array.
    #Then take the mean of that array to get a single value for each channel. 
    Hskew = numpy.mean(skew(HChannel))
    Sskew = numpy.mean(skew(SChannel))
    Vskew = numpy.mean(skew(VChannel))
    
    
    return [Hstd, Sstd, Vstd, Hskew, Sskew, Vskew] #return all of the metrics.

Example #8

0

Show file

File: build.py Project: vnylp/feature_engineering_project

def skewness_sqrt(ny_housing):
    skewness_SpLiv1 = skew(ny_housing['SalePrice'])
    skewness_grLiv1 = skew(ny_housing['GrLivArea'])
    ny_housing['SalePrice'] = np.sqrt(ny_housing['SalePrice'])
    ny_housing['GrLivArea'] = np.sqrt(ny_housing['GrLivArea'])
    skewness_SpLiv2 = skew(ny_housing['SalePrice'])
    skewness_grLiv2 = skew(ny_housing['GrLivArea'])
    return skewness_grLiv2,skewness_SpLiv2

Example #9

0

Show file

File: build.py Project: vnylp/feature_engineering_project

def skewness_log(data):
    skewness_SpLiv1 = skew(data['SalePrice'])
    skewness_grLiv1 = skew(data['GrLivArea'])
    data['SalePrice'] = np.log(data['SalePrice'])
    data['GrLivArea'] = np.log(data['GrLivArea'])
    skewness_SpLiv2 = skew(data['SalePrice'])
    skewness_grLiv2 = skew(data['GrLivArea'])
    return skewness_grLiv2,skewness_SpLiv2

Example #10

0

Show file

File: train.py Project: digideskio/A_Cappella

def single_file_featurization(wavfile):
    '''
    INPUT:
    row of dataframe with 'audio_slice_name' as the filename of the audio sample

    OUTPUT:
    feature vector for audio sample

    Function for dataframe apply for extracting each audio sample into a feature vector
    of mfcc coefficients
    '''

    # print statements to update the progress of the processing
    try:
        # load the raw audio .wav file as a matrix using librosa
        wav_mat, sr = lr.load(wavfile, sr=sample_rate)

        # create the spectrogram using the predefined variables for mfcc extraction
        S = lr.feature.melspectrogram(wav_mat, sr=sr, n_mels=n_filters, fmax=sr/2, n_fft=window, hop_length=hop)

        # using the pre-defined spectrogram, extract the mfcc coefficients
        mfcc = lr.feature.mfcc(S=lr.logamplitude(S), n_mfcc=25)

        # calculate the first and second derivatives of the mfcc coefficients to detect changes and patterns
        mfcc_delta = lr.feature.delta(mfcc)
        mfcc_delta = mfcc_delta.T
        mfcc_delta2 = lr.feature.delta(mfcc, order=2)
        mfcc_delta2 = mfcc_delta2.T
        mfcc = mfcc.T

        # combine the mfcc coefficients and their derivatives in a column stack for analysis
        total_mfcc = np.column_stack((mfcc, mfcc_delta, mfcc_delta2))

        # use the average of each column to condense into a feature vector
        # this makes each sample uniform regardless of the length of original the audio sample
        # the following features are extracted
        # - avg of mfcc, first derivative, second derivative
        # - var of mfcc, first derivative, second derivative
        # - max of mfcc
        # - min of mfcc
        # - median of mfcc
        # - skew of mfcc
        # - kurtosis of mfcc
        avg_mfcc = np.mean(total_mfcc, axis=0)
        var_mfcc = np.var(total_mfcc, axis=0)
        max_mfcc = np.max(mfcc, axis=0)
        min_mfcc = np.min(mfcc, axis=0)
        med_mfcc = np.median(mfcc, axis=0)
        skew_mfcc = skew(mfcc, axis=0)
        kurt_mfcc = skew(mfcc, axis=0)

        # combine into one vector and append to the total feature matrix
        return np.concatenate((avg_mfcc, var_mfcc, max_mfcc, min_mfcc, med_mfcc, skew_mfcc, kurt_mfcc))
    except:
        print "Uhmmm something bad happened"
        return np.zeros(7)

Example #11

0

Show file

File: test_compare.py Project: gavinsimpson/macroeco

 def test_skew(self):
     
     # Using the scipy.stats definition which is optimized and unittested
     data = [[0,1,2,3,4,45,18,56,24,56], [1,1,1,1,56,78,23,23]]
     expt = []
     expt.append(stats.skew(data[0]))
     expt.append(stats.skew(data[1]))
     resulting_vals = skew(data)
     self.assertTrue(np.array_equal(np.array(expt),
                                                 np.array(resulting_vals)))

Example #12

0

Show file

File: test_mstats_basic.py Project: andycasey/scipy

    def test_skew(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)

            r = stats.skew(x)
            rm = stats.mstats.skew(xm)
            assert_almost_equal(r, rm, 10)

            r = stats.skew(y)
            rm = stats.mstats.skew(ym)
            assert_almost_equal(r, rm, 10)

Example #13

0

Show file

File: pretty_plot.py Project: acsutt0n/_Quantifying_Morphology-2

def stats_plots(V, labelsin, title=None):
  """
  4 plots of basic statistical properties. IC = intraclass correlation, 
  or the noise sources between the groups.
  """
  import scipy.stats as stats
  colors = ['darkkhaki', 'royalblue', 'forestgreen','tomato']
  var = [np.var(i) for i in V]
  skew = [stats.skew(i) for i in V]
  kurt = [stats.kurtosis(i) for i in V]
  uniq = list(set(labelsin))
  v_sort = [[] for u in uniq] # Make a blank list, preparing for IC
  v_means = [[] for u in uniq] # v_means is a list of list of means for each cell of each type
  v_var, v_skew, v_kurt = [[] for u in uniq], [[] for u in uniq], [[] for u in uniq]
  for v in range(len(V)):
    i = uniq.index(labelsin[v])
    v_sort[i].append(V[v])
    v_means[i].append(np.mean(V[v]))
    v_var[i].append(np.var(V[v]))
    v_skew[i].append(stats.skew(V[v]))
    v_kurt[i].append(stats.kurtosis(V[v]))
  # ic = var_between^2 / (var_between^2 + var_within^2)  
  ic = []
  for v in range(len(uniq)):
    I = np.var(v_means[v])**2 / \
        (np.var(v_means[v])**2 + sum([np.var(i) for i in v_sort[v]])**2)
    ic.append([I])
  print(ic)
  group_means = [np.mean(k) for k in v_means] # group_means are the master means (only 4)
  master_ic = np.var(group_means)**2 / \
              (np.var(group_means)**2 + sum([np.var(i) for i in v_means])**2)
  print('Master IC for this set: %.5f' %master_ic)
  ## Plotting stuff
  fig = plt.figure()
  axs = [fig.add_subplot(221), fig.add_subplot(222), 
         fig.add_subplot(223), fig.add_subplot(224)]
  t**s = ['Variance', 'Skew', 'Kurtosis', 'Intraclass correlation']
  plot_vars = [v_var, v_skew, v_kurt, ic]
  for a in axs: # For each plot
    for u in range(len(uniq)): # For each cell type
      a.scatter(np.ones(len(plot_vars[axs.index(a)][u]))*u, plot_vars[axs.index(a)][u], 
                c=colors[u], s=80, edgecolor='k', alpha=0.6)
      if axs.index(a) == 3:
        a.set_yticks([0,0.12,0.24])
      else:
        a.locator_params(axis='y', nbins=4)
      a.set_xticks([])
      a.set_title(t**s[axs.index(a)])
  # Legend and title
  #patches = [mpatches.Patch(color=colors[u], label=uniq[u]) for u in range(len(uniq))]
  #plt.legend(handles=patches, loc=5)
  if title is not None:
    plt.suptitle(title, fontsize=20)
  plt.show()

Example #14

0

Show file

File: test_stats.py Project: zoccolan/eyetracker

 def test_skewness(self):
     """
     sum((testmathworks-mean(testmathworks,axis=0))**3,axis=0)/
         ((sqrt(var(testmathworks)*4/5))**3)/5
     """
     y = stats.skew(self.testmathworks)
     assert_approx_equal(y,-0.29322304336607,10)
     y = stats.skew(self.testmathworks,bias=0)
     assert_approx_equal(y,-0.437111105023940,10)
     y = stats.skew(self.testcase)
     assert_approx_equal(y,0.0,10)

Example #15

0

Show file

File: skewness.py Project: fengqi0423/hahaha

def feature_skewness(svmfile):
    X, y = load_svmlight_file(svmfile, zero_based = False, query_id = False)
    m, n = X.shape
    for i in range(n):
        x = np.array(X[:,i].todense())[:,0]
        ecdf = ECDF(x)
        s1 = skew(x)
        s2 = skew(np.log2(x+1))
        s3 = skew(ecdf(x))
        if np.abs(s1) < np.abs(s2):
            print "%d %f -> %f or %f" % (i+1, s1, s2, s3)
        else:
            print "[!] %d %f -> %f or %f" % (i+1, s1, s2, s3)

Example #16

0

Show file

File: noise.py Project: piyanatk/sim

def mcnoise(data, noise_std, n, noise_scaling=1.):
    """
    Parameters
    ----------
    data : ndarray
        Array of data.
    noise_std : float
        Standard deviation of the noise
    n : int
        Number of repetition
    noise_scaling: float
        Scaling factor for noise

    Returns
    -------
    variance, variance error, skewness, skewness error, kurtosis, kurtosis error

    """
    noise_arr = np.random.normal(0, noise_std, (n, data.size)) * noise_scaling
    var_sample = np.var(data + noise_arr, axis=1)
    skew_sample = skew(data + noise_arr, axis=1)
    kurt_sample = kurtosis(data + noise_arr, axis=1)
    var_val = np.mean(var_sample)
    skew_val = np.mean(skew_sample)
    kurt_val = np.mean(kurt_sample)
    var_err = np.std(var_sample)
    skew_err = np.std(skew_sample)
    kurt_err = np.std(kurt_sample)
    return var_val, var_err, skew_val, skew_err, kurt_val, kurt_err

Example #17

0

Show file

File: sppatch.py Project: 0ceangypsy/statsmodels

def _fitstart(self, x):
    '''example method, method of moment estimator as starting values

    Parameters
    ----------
    x : array
        data for which the parameters are estimated

    Returns
    -------
    est : tuple
        preliminary estimates used as starting value for fitting, not
        necessarily a consistent estimator

    Notes
    -----
    This needs to be written and attached to each individual distribution

    This example was written for the gamma distribution, but not verified
    with literature

    '''
    loc = np.min([x.min(),0])
    a = 4/stats.skew(x)**2
    scale = np.std(x) / np.sqrt(a)
    return (a, loc, scale)

Example #18

0

Show file

File: intonation_profile.py Project: EQ4/pycompmusic

    def compute_profile(self):
        self.rec.label_contours(self.ji_intervals)
        distributions = {}
        for key, segments in self.rec.contour_labels.items():
            distributions[key] = []
            for indices in segments:
                distributions[key].extend(self.pitch_obj.pitch[indices[0]:indices[1]])

        parameters = {}
        for interval, distribution in distributions.items():
            distribution = np.array(distribution)
            #TODO: replace -10000 with whatever the bound is for invalid pitch values in cent scale
            distribution = distribution[distribution >= -10000]
            [n, be] = np.histogram(distribution, bins=1200)
            bc = (be[1:] + be[:-1])/2.0
            peak_pos = bc[np.argmax(n)]
            peak_mean = float(np.mean(distribution))
            peak_variance = float(variation(distribution))
            peak_skew = float(skew(distribution))
            peak_kurtosis = float(kurtosis(distribution))
            pearson_skew = float(3.0 * (peak_mean - peak_pos) / np.sqrt(abs(peak_variance)))
            parameters[interval] = {"position": float(peak_pos),
                                    "mean": peak_mean,
                                    "amplitude": float(max(n)),
                                    "variance": peak_variance,
                                    "skew1": peak_skew,
                                    "skew2": pearson_skew,
                                    "kurtosis": peak_kurtosis}
        all_amps = [parameters[interval]["amplitude"] for interval in parameters.keys()]
        peak_amp_sum = sum(all_amps)
        for interval in parameters.keys():
            parameters[interval]["amplitude"] = parameters[interval]["amplitude"]/peak_amp_sum

        self.intonation_profile = parameters

Example #19

0

Show file

File: test_continuous_basic.py Project: dagss/private-scipy-refactor

def test_cont_basic_slow():
    # same as above for slow distributions
    for distname, arg in distcont[:]:
        if distname not in distslow: continue
        distfn = getattr(stats, distname)
        np.random.seed(765456)
        sn = 1000
        rvs = distfn.rvs(size=sn,*arg)
        sm = rvs.mean()
        sv = rvs.var()
        skurt = stats.kurtosis(rvs)
        sskew = stats.skew(rvs)
        m,v = distfn.stats(*arg)
        yield check_sample_meanvar_, distfn, arg, m, v, sm, sv, sn, distname + \
              'sample mean test'
        # the sample skew kurtosis test has known failures, not very good distance measure
        #yield check_sample_skew_kurt, distfn, arg, sskew, skurt, distname
        yield check_moment, distfn, arg, m, v, distname
        yield check_cdf_ppf, distfn, arg, distname
        yield check_sf_isf, distfn, arg, distname
        yield check_pdf, distfn, arg, distname
        yield check_pdf_logpdf, distfn, arg, distname
        yield check_cdf_logcdf, distfn, arg, distname
        yield check_sf_logsf, distfn, arg, distname
        #yield check_oth, distfn, arg # is still missing
        if distname in distmissing:
            alpha = 0.01
            yield check_distribution_rvs, distname, arg, alpha, rvs

Example #20

0

Show file

File: stattools.py Project: chrisjordansquire/statsmodels

def jarque_bera(resids):
    """
    Calculate residual skewness, kurtosis, and do the JB test for normality

    Parameters
    -----------
    resids : array-like

    Returns
    -------
    JB, JBpv, skew, kurtosis

    JB = n/6*(S^2 + (K-3)^2/4)

    JBpv is the Chi^2 two-tail probability value

    skew is the measure of skewness

    kurtosis is the measure of kurtosis

    """
    resids = np.asarray(resids)
    # Calculate residual skewness and kurtosis
    skew = stats.skew(resids)
    kurtosis = 3 + stats.kurtosis(resids)

    # Calculate the Jarque-Bera test for normality
    JB = (resids.shape[0]/6) * (skew**2 + (1/4)*(kurtosis-3)**2)
    JBpv = stats.chi2.sf(JB,2);

    return JB, JBpv, skew, kurtosis

Example #21

0

Show file

File: PHCXFile.py Project: scienceguyrob/PulsarFeatureExtractor

 def computeDMCurveStatScores(self):
     """
     Returns a list of integer data points representing the candidate DM curve.
     
     Parameters:
     N/A
     
     Returns:
     A list data type containing data points.
     
     """
     
     try:
         bins=[]
         bins=self.profileOps.getDMCurveData(self.rawdata,self.profileIndex)
         
         mn = mean(bins)
         stdev = std(bins)
         skw = skew(bins)
         kurt = kurtosis(bins)
         
         stats = [mn,stdev,skw,kurt]
         return stats  
     
     except Exception as e: # catch *all* exceptions
         print "Error getting DM curve stat scores from PHCX file\n\t", sys.exc_info()[0]
         print self.format_exception(e)
         raise Exception("DM curve stat score extraction exception")
         return []

Example #22

0

Show file

File: DataHolder.py Project: victormocioiu/GBM_MET_CODE

 def grid_color_stat(patient_grid_1_color):
     shape_stats = np.zeros(4)
     shape_stats[0] = np.mean(patient_grid_1_color.flatten())
     shape_stats[1] = np.std(patient_grid_1_color.flatten())
     shape_stats[2] = skew(patient_grid_1_color.flatten())
     shape_stats[3] = kurtosis(patient_grid_1_color.flatten())
     return shape_stats

Example #23

0

Show file

File: test_moments.py Project: benracine/pandas

 def test_rolling_skew(self):
     try:
         from scipy.stats import skew
     except ImportError:
         raise nose.SkipTest('no scipy')
     self._check_moment_func(moments.rolling_skew,
                             lambda x: skew(x, bias=False))

Example #24

0

Show file

File: timeseries.py Project: ningwangpanda/pyfolio

def perf_stats(
        returns,
        returns_style='compound',
        return_as_dict=False,
        period=DAILY):
    """Calculates various performance metrics of a strategy, for use in
    plotting.show_perf_stats.

    Parameters
    ----------
    returns : pd.Series
        Daily returns of the strategy, noncumulative.
         - See full explanation in tears.create_full_tear_sheet.
    returns_style : str, optional
       See annual_returns' style
    return_as_dict : boolean, optional
       If True, returns the computed metrics in a dictionary.
    period : str, optional
        - defines the periodicity of the 'returns' data for purposes of
        annualizing. Can be 'monthly', 'weekly', or 'daily'
        - defaults to 'daily'.

    Returns
    -------
    dict / pd.DataFrame
        Performance metrics.

    """

    all_stats = OrderedDict()
    all_stats['annual_return'] = annual_return(
        returns,
        style=returns_style, period=period)
    all_stats['annual_volatility'] = annual_volatility(returns, period=period)
    all_stats['sharpe_ratio'] = sharpe_ratio(
        returns,
        returns_style=returns_style, period=period)
    all_stats['calmar_ratio'] = calmar_ratio(
        returns,
        returns_style=returns_style, period=period)
    all_stats['stability'] = stability_of_timeseries(returns)
    all_stats['max_drawdown'] = max_drawdown(returns)
    all_stats['omega_ratio'] = omega_ratio(returns)
    all_stats['sortino_ratio'] = sortino_ratio(returns)
    # TODO: The information_ratio method requires
    # a second argument for benchmark returns.
    # Setting information_ratio to NaN until
    # benchmark returns are added as an argument
    # to this method.
    all_stats['information_ratio'] = np.nan
    all_stats['skewness'] = stats.skew(returns)
    all_stats['kurtosis'] = stats.kurtosis(returns)
    if return_as_dict:
        return all_stats
    else:
        all_stats_df = pd.DataFrame(
            index=list(all_stats.keys()),
            data=list(all_stats.values()))
        all_stats_df.columns = ['perf_stats']
        return all_stats_df

Example #25

0

Show file

File: PHCXFile.py Project: scienceguyrob/PulsarFeatureExtractor

 def computeProfileStatScores(self):
     """
     Builds the scores using raw profile intensity data only. Returns the scores.
     
     Parameters:
     N/A
     
     Returns:
     An array of profile intensities as floating point values.
     """
     
     try:
         
         bins =[] 
         for intensity in self.profile:
             bins.append(float(intensity))
         
         mn = mean(bins)
         stdev = std(bins)
         skw = skew(bins)
         kurt = kurtosis(bins)
         
         stats = [mn,stdev,skw,kurt]
         return stats
     
     except Exception as e: # catch *all* exceptions
         print "Error getting Profile stat scores from PHCX file\n\t", sys.exc_info()[0]
         print self.format_exception(e)
         raise Exception("Profile stat score extraction exception")
         return []

Example #26

0

Show file

File: 6.5.stat.py Project: ytx123013/learning_code

def calc_statistics(x):
    n = x.shape[0]  # 样本个数

    # 手动计算
    m = 0
    m2 = 0
    m3 = 0
    m4 = 0
    for t in x:
        m += t
        m2 += t*t
        m3 += t**3
        m4 += t**4
    m /= n
    m2 /= n
    m3 /= n
    m4 /= n

    mu = m
    sigma = np.sqrt(m2 - mu*mu)
    skew = (m3 - 3*mu*m2 + 2*mu**3) / sigma**3
    kurtosis = (m4 - 4*mu*m3 + 6*mu*mu*m2 - 4*mu**3*mu + mu**4) / sigma**4 - 3
    print '手动计算均值、标准差、偏度、峰度：', mu, sigma, skew, kurtosis

    # 使用系统函数验证
    mu = np.mean(x, axis=0)
    sigma = np.std(x, axis=0)
    skew = stats.skew(x)
    kurtosis = stats.kurtosis(x)
    return mu, sigma, skew, kurtosis

Example #27

0

Show file

File: feature_engg_data_prep.py Project: ssgalitsky/Working-on-Audio-data

def get_stat_feature(fname):
    #b,_ = librosa.load(fname, res_type = 'kaiser_fast')
    b,_ = librosa.load(i, res_type = 'kaiser_fast')
    try:
        #basic statistical features
        length = len(b)
        mean = np.mean(b)
        minimum = np.min(b)
        maximum = np.max(b)
        std = np.std(b)
        rms = np.sqrt(np.mean(b**2))
        kurt = kurtosis(b)
        Skew = skew(b)
        #Audio length feature
        data,samp_rate = librosa.effects.trim(b,top_db = 40)
        len_init = len(data) 
        ratio_init = len_init/length
        splits = librosa.effects.split(b, top_db=40)
        if len(splits) > 1:
            b = np.concatenate([b[x[0]:x[1]] for x in splits]) 
        len_final = len(b) 
        ratio_final = len_final/length
        #return pd.Series([mean,minimum,maximum,std,rms,kurt,Skew,len_init,ratio_init,len_final,ratio_final])
        return pd.Series(np.hstack((mean,minimum,maximum,std,rms,kurt,Skew,len_init,ratio_init,len_final,ratio_final)))
    except:
        print("Bad file at {}".format(fname))
        return pd.Series([0]*11)

Example #28

0

Show file

File: mri_utils.py Project: ecastrow/pl2mind

def test_distribution(data, mask=None):
    logger.info("Testing distribution.")
    data = data.reshape(data.shape[0],
                        reduce(lambda x, y: x * y, data.shape[1:4]))
    if mask is not None:
        mask_idx = np.where(mask.flatten() == 1)[0].tolist()
        data = data[:, mask_idx]
    k = kurtosis(data, axis=0)
    s = skew(data, axis=0)

    logger.info("Proportion voxels k <= -1: %.2f"
                % (len(np.where(k <= -1)[0].tolist()) * 1. / data.shape[1]))
    logger.info("Proportion voxels -1 < k < 1: %.2f"
                % (len(np.where(np.logical_and(k > -1, k < 1))[0].tolist())
                   * 1. / data.shape[1]))
    logger.info("Proportion voxels 1 < k < 2: %.2f"
                % (len(np.where(np.logical_and(k >= 1, k < 2))[0].tolist())
                   * 1. / data.shape[1]))
    logger.info("Proportion voxels 2 < k < 3: %.2f"
                % (len(np.where(np.logical_and(k >= 2, k < 3))[0].tolist())
                   * 1. / data.shape[1]))
    logger.info("Proportion voxels k >= 3: %.2f"
                % (len(np.where(k >= 3)[0].tolist()) * 1. / data.shape[1]))

    values = len(np.unique(data))
    if (values * 1. / reduce(lambda x, y: x * y, data.shape) < 10e-4):
        logger.warn("Quantization probable (%d unique values out of %d)."
                    % (values, reduce(lambda x, y: x * y, data.shape)))
    logger.info("Number of unique values in data: %d" % values)

    logger.info("Krutosis k: %.2f (%.2f std) and skew s: %.2f (%.2f std)"
                % (k.mean(), k.std(), s.mean(), s.std()))

Example #29

0

Show file

File: spectrogram.py Project: himito/madmom

def statistical_spectrum_descriptors(spectrogram):
    """
    Statistical Spectrum Descriptors of the STFT.

    Parameters
    ----------
    spectrogram : numpy array
        Magnitude spectrogram.

    Returns
    -------
    statistical_spectrum_descriptors : dict
        Statistical spectrum descriptors of the spectrogram.

    References
    ----------
    .. [1] Thomas Lidy and Andreas Rauber,
           "Evaluation of Feature Extractors and Psycho-acoustic
           Transformations for Music Genre Classification",
           Proceedings of the 6th International Conference on Music Information
           Retrieval (ISMIR), 2005.

    """
    from scipy.stats import skew, kurtosis
    return {'mean': np.mean(spectrogram, axis=0),
            'median': np.median(spectrogram, axis=0),
            'variance': np.var(spectrogram, axis=0),
            'skewness': skew(spectrogram, axis=0),
            'kurtosis': kurtosis(spectrogram, axis=0),
            'min': np.min(spectrogram, axis=0),
            'max': np.max(spectrogram, axis=0)}

Example #30

0

Show file

File: Locey_McGlinn_2013.py Project: klocey/partitions

def get_skews(_list):

    skews = []
    for i in _list:
        skews.append(stats.skew(i))
    
    return skews

Example #31

0

Show file

File: gui.py Project: Stephenlin1997/Matlab-Codes

 def plot_trace(self, wplot, proctype, wroi, color):
     if wplot == 1:
         wp = self.p1
     else:
         wp = self.p2
     if proctype == 0 or proctype == 2:
         # motSVD
         if proctype == 0:
             ir = 0
         else:
             ir = wroi + 1
         cmap = cm.get_cmap("hsv")
         nc = min(10, self.motSVDs[ir].shape[1])
         cmap = (255 * cmap(np.linspace(0, 0.2, nc))).astype(int)
         norm = (self.motSVDs[ir][:, 0]).std()
         tr = (self.motSVDs[ir][:, :10]**2).sum(axis=1)**0.5 / norm
         for c in np.arange(0, nc, 1, int)[::-1]:
             pen = pg.mkPen(tuple(cmap[c, :]),
                            width=1)  #, style=QtCore.Qt.DashLine)
             tr2 = self.motSVDs[ir][:, c] / norm
             tr2 *= np.sign(skew(tr2))
             wp.plot(tr2, pen=pen)
         pen = pg.mkPen(color)
         wp.plot(tr, pen=pen)
         wp.setRange(yRange=(-3, 3))
     elif proctype == 1:
         pup = self.pupil[wroi]
         pen = pg.mkPen(color, width=2)
         pp = wp.plot(zscore(pup['area_smooth']) * 2, pen=pen)
         if 'com_smooth' in pup:
             pupcom = pup['com_smooth'].copy()
         else:
             pupcom = pup['com'].copy()
         pupcom -= pupcom.mean(axis=0)
         norm = pupcom.std()
         pen = pg.mkPen((155, 255, 155), width=1, style=QtCore.Qt.DashLine)
         py = wp.plot(pupcom[:, 0] / norm * 2, pen=pen)
         pen = pg.mkPen((0, 100, 0), width=1, style=QtCore.Qt.DashLine)
         px = wp.plot(pupcom[:, 1] / norm * 2, pen=pen)
         tr = np.concatenate((zscore(pup['area_smooth'])[np.newaxis, :] * 2,
                              pupcom[:, 0][np.newaxis, :] / norm * 2,
                              pupcom[:, 1][np.newaxis, :] / norm * 2),
                             axis=0)
         lg = wp.addLegend(offset=(0, 0))
         lg.addItem(pp, "<font color='white'><b>area</b></font>")
         lg.addItem(py, "<font color='white'><b>ypos</b></font>")
         lg.addItem(px, "<font color='white'><b>xpos</b></font>")
     elif proctype == 3:
         tr = zscore(self.blink[wroi])
         pen = pg.mkPen(color, width=2)
         wp.plot(tr, pen=pen)
     elif proctype == 4:
         running = self.running[wroi]
         running *= np.sign(running.mean(axis=0))
         running -= running.min()
         running /= running.max()
         running *= 16
         running -= 8
         wp.plot(running[:, 0], pen=color)
         wp.plot(running[:, 1], pen=color)
         tr = running.T
     return tr

Example #32

0

Show file

File: house_prices.py Project: varlen/eel891

plt.show()
sns.distplot(df_train['TotalBsmtSF'], bins=50, fit=norm)
plt.show()
sns.distplot(df_train['GarageArea'], bins=50, fit=norm)
plt.show()
sns.distplot(df_train[:train_len]['SalePrice'], bins=50, fit=norm)
plt.show()

# Observa-se que algumas variáveis e o alvo possuem assimetria (skewness) acentuada.
# Para normalizar esta situação e facilitar o treinamento dos modelos,
# Aplicaremos transformação logaritmica nas variaveis que apresentarem essa caracteristica.

df_train['SalePrice'][:train_len] = np.log1p(df_train['SalePrice'][:train_len])

skewness = df_train.select_dtypes(
    exclude='object').apply(lambda x: stats.skew(x))
skewness = skewness[abs(skewness) > 0.6]
skewed_features = skewness.index
df_train[skewed_features] = np.log1p(df_train[skewed_features])

# Gerar dummies das categoricas
df_train = pd.get_dummies(df_train)

# 1o treinamento
from sklearn.linear_model import LinearRegression, LassoCV

train_set = df_train[:train_len]
test_set = df_train[train_len:].drop('SalePrice', axis=1)
X_train, X_test, Y_train, Y_test = train_test_split(
    train_set.drop('SalePrice', axis=1), train_set.SalePrice)

Example #33

0

Show file

    lbl.fit(list(all_data[c].values)) 
    all_data[c] = lbl.transform(list(all_data[c].values))
       
print('Shape all_data: {}'.format(all_data.shape))

##### Dodanie nowej, dodatkowej zmiennej #####

all_data['TotalSF'] = all_data['TotalBsmtSF'] + all_data['1stFlrSF'] + all_data['2ndFlrSF']

##### Weryfikacja indeksow, ktore sa typy numeryczne #####

numeric_feats = all_data.dtypes[all_data.dtypes != "object"].index

# Check the skew of all numerical features

skewed_feats = all_data[numeric_feats].apply(lambda x: skew(x.dropna())).sort_values(ascending=False)
print("\nSkew in numerical features: \n")
skewness = pd.DataFrame({'Skew' :skewed_feats})
skewness.head(10)

##### Checking which measures have to be Box-Cox transformed ######

skewness = skewness[abs(skewness) > 0.75]
print("There are {} skewed numerical features to Box Cox transform".format(skewness.shape[0]))

from scipy.special import boxcox1p
skewed_features = skewness.index
lam = 0.15
for feat in skewed_features:
    #all_data[feat] += 1
    all_data[feat] = boxcox1p(all_data[feat], lam)

Example #34

0

Show file

File: code.py Project: NishthaShukla/eda

df2 = pd.read_csv(path2)

# Impute missing values with mean
df2 = df2.replace("?","NaN")

mean_imputer = Imputer(missing_values='NaN',strategy='mean',axis=0)

df2['normalized-losses'] = mean_imputer.fit_transform(df2[['normalized-losses']])

df2['horsepower'] = mean_imputer.fit_transform(df2[['horsepower']])
# Skewness of numeric features

num_cols = df2._get_numeric_data().columns

for num_col in num_cols:
    if skew(df2[num_col].values)>1:
        print(num_col)
        df2[num_col]= np.sqrt(df2[num_col])

print(df2.head())

cat_cols = list(set(df2.columns)- set(num_cols))

# Label encode 
label_encoder  = LabelEncoder()

for cat_col in cat_cols:
        df2[cat_col]= label_encoder.fit_transform(df2[cat_col])

df2['area']=df2['height']*df2['width']

Example #35

0

Show file

File: exercise_8.py Project: zero-one-group/zot-internship

    upper_bound = np.mean(data) + (np.percentile(data, confidence_level) *
                                   standard_error)
    lower_bound = np.mean(data) - (np.percentile(data, confidence_level) *
                                   standard_error)
    return lower_bound, upper_bound


data = read_data('nerve.txt')
bootstrap_sample = bootstrap(data, num_of_simulation=10000)

median = np.median(bootstrap_sample, axis=1)
plt.figure(0)
plt.hist(median, bins=10, label='Median (bootstrap)')
plt.savefig('median.png')

skewness = stats.skew(bootstrap_sample, axis=1)
plt.figure(1)
plt.hist(skewness, bins=50, label='Skewness (bootstrap)')
plt.savefig('skewness.png')

# Basic bootstrap confidence interval
print("Median CI (Basic) =", confidence_interval(median,
                                                 confidence_level=0.95))
print("Skewness CI (Basic) =",
      confidence_interval(skewness, confidence_level=0.95))

# Bootstrap-t confidence interval
print("Median CI (Bootstrap-t) =", bootstrap_t(median, confidence_level=0.95))
print("Skewness CI (Bootstrap-t) =",
      bootstrap_t(skewness, confidence_level=0.95))

Example #36

0

Show file

File: house_prices_data.py Project: vikenparikh/House-Prices-Prediction

alldata.shape

#create new data
train_new = alldata[alldata['SalePrice'].notnull()]
test_new = alldata[alldata['SalePrice'].isnull()]

print Train, train_new.shape
print ('----------------')
print Test, test_new.shape

#get numeric features
numeric_features = [f for f in train_new.columns if train_new[f].dtype != object]

#transform the numeric features using log(x + 1)
from scipy.stats import skew
skewed = train_new[numeric_features].apply(lambda x: skew(x.dropna().astype(float)))
skewed = skewed[skewed > 0.75]
skewed = skewed.index
train_new[skewed] = np.log1p(train_new[skewed])
test_new[skewed] = np.log1p(test_new[skewed])
del test_new['SalePrice']

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(train_new[numeric_features])
scaled = scaler.transform(train_new[numeric_features])

for i, col in enumerate(numeric_features):
       train_new[col] = scaled[:,i]

numeric_features.remove('SalePrice')

Example #37

0

Show file

plt.title('Area distribution')

fig = plt.figure()
res = stats.probplot(train['area'], plot=plt)
plt.show()

y_train = train.area.values

print("Skewness: %f" % train['area'].skew())
print("Kurtosis: %f" % train['area'].kurt())

numeric_feats = all_data.dtypes[all_data.dtypes != "object"].index

# Check the skew of all numerical features
skewed_feats = all_data[numeric_feats].apply(
    lambda x: skew(x.dropna())).sort_values(ascending=False)
skewness = pd.DataFrame({'Skewed Features': skewed_feats})
print(skewness.head())

skewness = skewness[abs(skewness) > 0.75]
print("There are {} skewed numerical features to Box Cox transform".format(
    skewness.shape[0]))

from scipy.special import boxcox1p
skewed_features = skewness.index
lam = 0.15
for feat in skewed_features:
    all_data[feat] = boxcox1p(all_data[feat], lam)
    all_data[feat] += 1

all_data = pd.get_dummies(all_data)

Example #38

0

Show file

File: image_procesing_functions.py Project: SMByC/def_point_val

def getBorderFeatures(img, nim):
    """genear descriptores basados en bordes No de lineas rectas, cantidad de bordes img es una imagen png 256x256 cargada con cv2

    :param img: 256x256 RGB intensity normalice image readed with OCV

    :param nim: image flattern in a one dimention vecto

    :return: dictionary with some border features: number of lines, numbre of border pixels in image
    """
    # primero para la imagen si reducir

    imgColor = nim
    img2 = remove_transparency(imgColor, 0)

    # Se detectan bordes usando Canny
    img_edged = cv2.Canny(img2, 100, 200)

    bordes = img_edged.sum()
    # Se cuentan las linea rectas usando HoughLines
    lines = cv2.HoughLines(
        img_edged, 1, np.pi / 180, 64
    )  # nomrmalmente deben se entre la curata y la midad de loz pixeles W.
    try:
        if lines.any():
            lines = len(lines)
        else:
            lines = 0
    except:
        lines = 0
    # Luego para la imagen encogida
    img = cv2.GaussianBlur(img, (3, 3), 0)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_AREA)
    imgColor = cv2.normalize(img,
                             None,
                             alpha=0,
                             beta=256,
                             norm_type=cv2.NORM_MINMAX,
                             dtype=cv2.CV_32F)
    imgGray = cv2.cvtColor(imgColor, cv2.COLOR_BGR2GRAY)
    imgGray = imgGray.astype('uint8')

    std = imgGray.std(axis=0).std(axis=0)
    media = imgGray.mean(axis=0).std(axis=0)
    ventana = int(math.sqrt(imgGray.size) / 4 + 1)

    # Se detectan los bordes usando Canny
    img2 = remove_transparency(imgColor, 0)
    img_edged = cv2.Canny(img2, 100, 200)

    bordes32 = img_edged.sum()

    # Se binariza la imagen usando umbral adaptativo queda ub "borde de la imagen con una textura simplificada"
    umbral = cv2.adaptiveThreshold(imgGray, 255,
                                   cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, ventana, 2 * std)

    # Binarizacion de la imagen usando Otsu
    blur = cv2.GaussianBlur(imgGray, (3, 3), 0)
    ret3, th3 = cv2.threshold(blur, 0, 255,
                              cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Detectar y contar lineas rectas
    lines32 = cv2.HoughLines(img_edged, 1, np.pi / 180, 16)
    try:
        if lines32.any():
            lines32 = len(lines32)
        else:
            lines32 = 0

    except:
        lines32 = 0

    simetriaImagenBinarizada = skew(th3).sum()
    curtosisImagenBinarizada = kurtosis(th3).sum()
    sumaUmbral = umbral.sum()

    return {
        'lines': lines,
        'bordes': bordes,
        'lines32': lines32,
        'bordes32': bordes32,
        'simetriaImagenBinarizada': simetriaImagenBinarizada,
        'curtosisImagenBinarizada': curtosisImagenBinarizada,
        'sumaUmbral': sumaUmbral
    }

Example #39

0

Show file

File: views.py Project: medzarka/kku_measurement

def section_action(request):
    __analysis = []
    context = {}
    __grades = ''
    __domain = request.get_host()

    __location = ''
    __teachers = ''
    __college = ''
    __department = ''
    __course = ''
    __course_code = ''
    __section = ''
    __message = ''
    __mean = 0
    __std = 0
    __min = 0
    __max = 0
    __skewness = 0
    __correlation = 0
    __show__result = 0
    __histogramfile = ''

    __mids = []
    __finals = []
    __totals = []
    __line = 7

    try:
        sem.acquire()
        print("The semaphore is locked")
        __grades = request.FILES['grades']

        if request.method == 'GET':
            raise Exception('Internal Error')

        if request.method == 'POST':

            # upload the file
            _grades_uploaded_file = 'data/upload/sections/' + str(__grades)
            if not os.path.exists('data/upload/sections/'):
                os.makedirs('data/upload/sections/')

            with open(_grades_uploaded_file, 'wb+') as destination:
                for chunk in __grades.chunks():
                    destination.write(chunk)

            # read the content of the uploaded file
            workbook = xl.open_workbook(_grades_uploaded_file, on_demand=True)
            worksheet = workbook.sheet_by_index(0)
            try:
                ____tmp = worksheet.cell_value(6, 5)
            except IndexError:
                ____tmp = ''

            __section = int(worksheet.cell_value(4, 1))
            __location = worksheet.cell_value(0, 1)

            if __section == '':
                raise Exception('Unable to read the section from the excel file !!!')
            if __location == '':
                raise Exception('Unable to read the location from the excel file !!!')

            __newfilename = 'data/upload/sections/section_' + str(__section) + _grades_uploaded_file[-4:]
            os.rename(_grades_uploaded_file, __newfilename)

            __section_obj = None
            __actualSemester = Semester.objects.get(semester_isInUse=True)
            for _mytest in Section.objects.all():
                if _mytest.section_department.department_location.college_location.location_name_ar == __location \
                        and _mytest.section_code == __section \
                        and _mytest.section_semester == __actualSemester:
                    __section_obj = _mytest
                    print('Section found with id = ' + str(_mytest.section_id))
                    __location = _mytest.section_department.department_location.college_location.location_name
                    __college = _mytest.section_department.department_location.college_name
                    __department = _mytest.section_department.department_name
                    __course = _mytest.section_course.course_name
                    __course_code = _mytest.section_course.course_code
                    for _teach in _mytest.section_teachers.all():
                        __teachers = __teachers + ' ' + _teach.teacher_name_ar
                    break

            if ____tmp == '':  # grades without mids
                while True:
                    try:
                        __student = worksheet.cell_value(__line, 0)
                        if worksheet.cell_value(__line, 2) != '':
                            __finals.append(int(worksheet.cell_value(__line, 2)))
                        if worksheet.cell_value(__line, 3) != '':
                            __totals.append(int(worksheet.cell_value(__line, 3)))
                        __line += 1
                    except IndexError:
                        break
            else:
                while True:
                    try:
                        __student = worksheet.cell_value(__line, 0)
                        if worksheet.cell_value(__line, 2) != '':
                            __mids.append(int(worksheet.cell_value(__line, 2)))
                        if worksheet.cell_value(__line, 3) != '':
                            __finals.append(int(worksheet.cell_value(__line, 3)))
                        if worksheet.cell_value(__line, 4) != '':
                            __totals.append(int(worksheet.cell_value(__line, 4)))
                        __line += 1
                    except IndexError:
                        break

            # debug data

            # print('grades = ' + str(__grades))
            # print('Section = ' + str(__section))
            # print('MIDs = ' + str(__mids))
            # print('Finals = ' + str(__finals))
            # print('Totals = ' + str(__totals))

            # compute statistics about the course grades

            if __section_obj == None:
                raise Exception('Unable to recognise the section in the database !!!')

            if __section != '' and __section_obj != None:
                __message = 'The grade Excel file was well loaded'
                __mean = float("{0:.4f}".format(statistics.mean(__totals)))
                __std = float("{0:.4f}".format(statistics.stdev(__totals)))
                __skewness = float("{0:.4f}".format(skew(__totals, bias=False)))
                if len(__mids) == 0:
                    __correlation = -99.99
                else:
                    __correlation = float("{0:.4f}".format(pearsonr(__mids, __finals)[1]))
                __min = min(__totals)
                __max = max(__totals)

                # plot the histogram

                a = np.array(__totals)
                # Fit a normal distribution to the data:
                mu, std = norm.fit(a)
                number = a.size

                # Plot the histogram.
                plt.hist(a, bins=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100], density=True, color='#607c8e',
                         edgecolor='black',
                         rwidth=0.8)
                # Plot the PDF.
                x = np.linspace(0, 100, 100)
                p = norm.pdf(x, mu, std)
                plt.plot(x, p, 'k', linewidth=3)
                title = "Histogram: Section = %d,  Number of Students = %d" % (__section, number)
                plt.title(title)

                __histogramfile = "data/media/histogram_section" + str(__section) + ".png"
                plt.savefig(__histogramfile)
                plt.close()

                print("mean : " + str(__mean))
                print("std : " + str(__std))
                print("skewness : " + str(__skewness))
                print("correlation : " + str(__correlation))

                __show__result = 1
                # save the section analysis
                __grades_data = {}
                __grades_data['mids'] = __mids
                __grades_data['finals'] = __finals
                __grades_data['totals'] = __totals
                str_grades = str(__grades_data)  # inverse dict2 = eval(str1)

                try:
                    obj = SectionDocRequest.objects.get(section=__section_obj)
                    print("--------> Updating the section data with id= " + str(obj.section_doc_id))
                except SectionDocRequest.DoesNotExist:
                    obj = SectionDocRequest()
                    print("--------> Creating new section data")

                obj.doc_correlation = __correlation
                obj.doc_explanation = ''
                obj.doc_max = __max
                obj.doc_mean = __mean
                obj.doc_min = __min
                obj.doc_skewness = __skewness
                obj.doc_std_deviation = __std
                obj.histogram = __histogramfile
                obj.student_grades = str_grades
                obj.section = __section_obj

                obj.save()

                # get the server domain
                if request.is_secure():
                    __domain = 'https://' + request.get_host()
                else:
                    __domain = 'http://' + request.get_host()
                __security = request.is_secure()



    except MultiValueDictKeyError:
        __message = 'Please fill all the form.'
    except ValueError as e:
        __message = 'Please use the grade file provided by the registration portal (Academia) without any change.'
        print(str(e))

    except Exception as e:
        __message = str(e)
        print(str(e))
    finally:
        sem.release()
        print("The semaphore was released")

    if len(__mids) == 0:
        __correlation = 'N/A'


    context = {
        'show_result': __show__result,
        'message': __message,
        'mean': __mean,
        'std': __std,
        'skewness': __skewness,
        'correlation': __correlation,
        'min': __min,
        'max': __max,
        'histogram': __histogramfile,
        'domain': __domain,
        'section': __section,
        'location': __location,
        'college': __college,
        'department': __department,
        'course': __course,
        'course_code': __course_code,
        'teachers': __teachers,
    }
    __results = analysis(context)
    context['analysis'] = __results



    del __grades
    del __section
    del __message
    del __mean
    del __std
    del __min
    del __max
    del __skewness
    del __correlation
    del __show__result
    del __histogramfile
    del __domain

    return render(request, 'section_result.html', context=context)

Example #40

0

Show file

def skew(X):
    ''' skewness for each variable in a segmented time series '''
    return stats.skew(X, axis=1)

Example #41

0

Show file

def get_features(df_, fs):
    """
    Calculates features to be used for ECG signal quality classification
    
    Parameters
    ----------
    df_: pandas dataframe
        Dataframe of ECG data, must contain the following columns: 
        processed, r_peaks, and beats

    fs: float
        Sampling rate of signal (must be in Hertz)
        
     
    Returns
    -------
    df: pandas dataframe
        Dataframe appended with computed features
    """

    df = pd.DataFrame.copy(df_)

    print('Computing features...'),
    # features from statistics of magnitude of ECG signal
    df.loc[:, 'f_stddev'] = df.processed.apply(lambda x: np.nanstd(x))
    df.loc[:, 'f_kurtosis'] = df.processed.apply(lambda x: kurtosis(x))
    df.loc[:, 'f_skewness'] = df.processed.apply(lambda x: skew(x))
    df.loc[:, 'f_rms'] = df.processed.apply(lambda x: rms(x))
    df.loc[:, 'f_energy'] = df.processed.apply(lambda x: sig_energy(x))

    # features from power spectrum of signal
    df.loc[:, 'f_relpower'] = df.processed.apply(lambda x: rel_power(x, fs))
    df.loc[:, 'f_relbasepower'] = df.processed.apply(lambda x: rel_power(
        x, fs, num_freqbounds=(1, 40), denom_freqbounds=(0, 40)))
    fbins, fmax = 10, 10
    powspec_vals = np.vstack(
        df.processed.apply(
            lambda x: power_spec(x, fs, bins=fbins, fmax=fmax)).values)
    for i in range(fbins):
        df.loc[:, 'f_powspec' + str(i)] = list(powspec_vals[:, i])

    # features from physiological parameters
    df.loc[:, 'f_rpeakcount'] = df.r_peaks.map(len)
    df.loc[:, 'f_nhr'] = df.processed.apply(lambda x: normal_hr(x, fs))
    df.loc[:, 'f_hrv'] = df.r_peaks.apply(lambda x: heart_rate_var(x, fs))
    df.loc[:, 'f_rtor'] = df.r_peaks.apply(lambda x: rtor_duration(x, fs))
    df.loc[:,
           'f_sumbe'] = df.beats.apply(lambda x: sum_beat_energy(np.array(x)))

    df.loc[:, 'f_pca'] = 0
    df.loc[df.beats.map(len) > 0, 'f_pca'] = df.beats[
        df.beats.map(len) > 0].apply(lambda x: pca_feature(np.array(x)))

    #    df.loc[:, 'f_mbe'] = 0
    df.loc[df.beats.map(len) > 0, 'f_mbe'] = df.beats[
        df.beats.map(len) > 0].apply(lambda x: mean_beat_energy(np.array(x)))

    df.loc[:, 'f_maxminbeat'] = 0
    df.loc[df.beats.map(len) > 0, 'f_maxminbeat'] = df.beats[
        df.beats.map(len) > 0].apply(lambda x: maxmin_beat(np.array(x)))

    print('Done!')

    return df

Example #42

0

Show file

File: 长度特征提取.py Project: buptjinlei/traffic

def myskew(l):
    if len(l) == 0:
        return 0
    ret = skew(l)
    return ret

Example #43

0

Show file

File: PARAM_HVR.py Project: vkmk47/Analyse-effect-of-RR-length

def skewness(RR):
    return skew(RR)

Example #44

0

Show file

def compute_var_skew(data):

    data = data[data.notnull()]
    skewness = skew(data)

    return skewness

Example #45

0

Show file

import matplotlib.pyplot as plt
import statistics
from scipy.stats import skew, kurtosis


randomNums = np.random.normal(scale=3, size=1000)
randomInts = np.round(randomNums)
axis = np.arange(start=min(randomInts), stop = max(randomInts) + 1)
plt.hist(randomInts, bins = axis)

srednia = np.mean(randomInts)
mediana = np.median(randomInts)
dominanta = statistics.mode(randomInts)
od_st = np.std(randomInts)
wariancja = statistics.variance(randomInts)
skosnosc = skew(randomInts)
kurtoza = kurtosis(randomInts)

print("Średnia: ",srednia)
print("Mediana: ",mediana)
print("Dominanta: ",dominanta)
print("Odchylenie ",od_st)
print("Wariancja ",wariancja)
print("Skośność ",skosnosc)
print("Kurtoza ",kurtoza)

Example #46

0

Show file

File: sidDataExplore.py Project: KrSiddhartha/pyFunc

def variableProfile(df, colName, varType="cat", outlierChk=True):
    df = df.copy()

    if (varType == "num"):
        print(df[colName].describe(), end="\n\n")

        plt.figure(figsize=(20, 15))
        plt.subplot(3, 1, 1)
        plt.hist(df[colName], color='lightblue', edgecolor='black', alpha=0.7)
        plt.xlabel(colName)

        plt.figure(figsize=(20, 15))
        plt.subplot(3, 1, 2)
        sns.kdeplot(df[colName])
        plt.xlabel(colName)

        plt.figure(figsize=(20, 15))
        plt.subplot(3, 1, 3)
        sns.boxplot(x=df[colName], color='lightblue')

        print(colName + " Skewness = " +
              str(round(stats.skew(df[colName][pd.notnull(df[colName])]), 3)),
              end="\n\n")

        plt.show()

        Q1, Q2, IQR, Lower_Whisker, Upper_Whisker = outlierIdenti(df[colName])
        upperOutCnt = sum(df[colName] > Upper_Whisker)
        lowerOutCnt = sum(df[colName] < Lower_Whisker)

        if (outlierChk == True):
            if ((upperOutCnt > 0) | (lowerOutCnt > 0)):

                print(printFormat.RED + "Outliers present in " + colName +
                      printFormat.END,
                      end="\n\n")

                print(colName + " IQR = " + str(round(IQR, 3)), end="\n\n")
                print(colName + " Lower outlier threshold = " +
                      str(round(Lower_Whisker, 3)),
                      end="\n\n")
                print(
                    colName +
                    " Count of observations below lower outlier threshold = " +
                    str(round(lowerOutCnt, 3)),
                    end="\n\n")
                print(
                    colName +
                    " Lower of observations below lower outlier threshold = " +
                    str(round((lowerOutCnt / df.shape[0]) * 100, 2)),
                    end="\n\n")
                print(colName + " Upper outlier threshold = " +
                      str(round(Upper_Whisker, 3)),
                      end="\n\n")
                print(
                    colName +
                    " Count of observations over upper outlier threshold = " +
                    str(round(upperOutCnt, 3)),
                    end="\n\n")
                print(
                    colName +
                    " Upper of observations over upper outlier threshold = " +
                    str(round((upperOutCnt / df.shape[0]) * 100, 2)),
                    end="\n\n\n\n")

                outlierTreatOptions(df, colName,
                                    "Quantile-based Flooring and Capping")
                outlierTreatOptions(df, colName, "median")
                outlierTreatOptions(df, colName, "mean")
            else:
                print(printFormat.GREEN + "No outliers present in " + colName +
                      printFormat.END,
                      end="\n\n")

    if (varType == "cat"):
        print(freqTab(df, colName), end="\n\n")
        plt.figure(figsize=(15, 7))
        plt.subplot(1, 1, 1)
        plt.hist(df[colName], color='lightblue', edgecolor='black', alpha=0.7)
        plt.xlabel(colName)

        plt.show()

Example #47

0

Show file

File: klasifikacijaSoVremenskiSerii.py Project: angelataseva/SNZ

def stats_calculate_all(x, stat_config):
    """Пресметка на статистиките од дадената листа x, врз основа на stat_config
    вредностите.

    :param x: листа на временската серија на податоци
    :type x: list(float)
    :param stat_config: листа со имиња на статистики кои треба да се пресметаат
    :type stat_config: list(str)
    :return: листа со пресметаните статистики според редоследот од stat_config
    :rtype: list
    """
    assert len(set(stat_config).difference(['len', 'min', 'max', 'range', 'mean', 'hmean',
                                            'gmean', 'var', 'std', 'skew', 'kurtosis',
                                            'median', 'mode', 'energy', 'energy_sample', 'snr'])) == 0

    x_array = np.array(x)
    n = len(x)
    if n == 0:
        values = [0 for i in range(len(stat_config))]
        return values, stat_config

    min_value = np.min(x_array)
    if min_value < 1:
        offset = 1 + np.abs(min_value)
    else:
        offset = 0
    max_value = np.max(x_array)

    values = []
    for stat_name in stat_config:
        if stat_name == 'len':
            values.append(n)
        elif stat_name == 'min':
            values.append(min_value)
        elif stat_name == 'max':
            values.append(max_value)
        elif stat_name == 'range':
            range_value = max_value - min_value
            values.append(range_value)
        elif stat_name == 'mean':
            mean_value = np.mean(x_array)
            values.append(mean_value)
        elif stat_name == 'hmean':
            hmean_value = sp.hmean(x_array + offset)
            values.append(hmean_value)
        elif stat_name == 'gmean':
            gmean_value = sp.gmean(x_array + offset)
            values.append(gmean_value)
        elif stat_name == 'var':
            std_value = np.std(x_array)
            var_value = std_value ** 2
            values.append(var_value)
        elif stat_name == 'std':
            std_value = np.std(x_array)
            values.append(std_value)
        elif stat_name == 'skew':
            skew_value = sp.skew(x_array)
            values.append(skew_value)
        elif stat_name == 'kurtosis':
            kurtosis_value = sp.kurtosis(x_array)
            values.append(kurtosis_value)
        elif stat_name == 'median':
            median_value = np.median(x_array)
            values.append(median_value)
        elif stat_name == 'mode':
            mode_value = sp.mode(x_array)[0][0]
            values.append(mode_value)
        elif stat_name == 'energy':
            energy_value = np.sum(x_array ** 2)
            values.append(energy_value)
        elif stat_name == 'energy_sample':
            energy_sample_value = np.sum(x_array ** 2) / n
            values.append(energy_sample_value)
        elif stat_name == 'snr':
            mean_value = np.mean(x_array)
            std_value = np.std(x_array)
            snr_value = 0.0
            if std_value != 0:
                snr_value = mean_value / std_value
            values.append(snr_value)

    return values

Example #48

0

Show file

def scanpy_hubness_analysis(
    adata,
    do_norm,
    norm_scale,
    do_log,
    do_pca,
    n_neighbors,
    metric,
    weighted,  # weighted adjmat for louvain/leiden clustering ?
    seed,
    n_comps=50,
    retained_cells_idx=None,
):

    results_dict = {}
    results_dict["params"] = dict(
        do_norm=do_norm,
        norm_scale=norm_scale,
        do_log=do_log,
        do_pca=do_pca,
        n_neighbors=n_neighbors,
        metric=metric,
        weighted=weighted,
        seed=seed,
        n_comps=n_comps,
    )

    start = time.time()

    ### preprocess, prepare clustering input ###
    if retained_cells_idx is None:
        retained_cells_idx = range(len(adata.X))

    if type(do_norm) is str:
        adata.X = scipy.sparse.csr_matrix(adata.X)

        if do_norm == "seurat":
            recipe_seurat(adata, do_log, norm_scale)
            print(f"\t\tseurat norm retained {adata.X.shape[1]} genes")
        elif do_norm == "zheng17":
            recipe_zheng17(adata, do_log, norm_scale, n_top_genes=5000)
            print(f"\t\tzheng norm retained {adata.X.shape[1]} genes")
        elif do_norm == "duo":
            recipe_duo(adata, do_log, renorm=norm_scale)
            print(f"\t\tduo norm retained {adata.X.shape[1]} genes")
        else:
            raise ValueError("do_norm not in 'duo', seurat', 'zheng17'")

    if scipy.sparse.issparse(adata.X):
        adata.X = adata.X.toarray()

    if do_log and not (type(do_norm) is str):
        print("\t\tlog_transformed data")
        sc.pp.log1p(adata)

    if do_pca:
        use_rep = "X_pca"
        sc.pp.pca(adata,
                  n_comps=min(adata.X.shape[1] - 1,
                              min(len(adata.X) - 1, n_comps)))
        X = adata.obsm["X_pca"]
        res_key = results_dict["X_pca"] = {}
    else:
        # already computed pca
        use_rep = "X_pca"
        X = adata.obsm["X_pca"]
        res_key = results_dict["X_pca"] = {}

    skews = {}
    # scanpy
    for method in ["umap", "gauss"]:
        # compute neighbors
        try:
            sc.pp.neighbors(
                adata,
                n_neighbors=n_neighbors + 1,
                metric=metric,
                use_rep=use_rep,
                method=method,
            )
        except:
            sc.pp.neighbors(
                adata,
                n_neighbors=n_neighbors + 1,
                metric=metric,
                use_rep=use_rep,
                method=method,
                knn=False,
            )

        skews[method] = skew(adata.obsp["connectivities"].sum(axis=0).flat)
    print("\t\t\tScoring:", round((time.time() - start) / 60, 2), "mn")

    return skews

Example #49

0

Show file

File: single_field_features.py Project: wuuusicong/vizml

def get_statistical_features(v, field_type, field_general_type):
    r = OrderedDict([(f['name'], None)
                     for f in field_c_statistical_features_list +
                     field_q_statistical_features_list])

    if not len(v):
        return r
    if field_general_type == 'c':
        r['list_entropy'] = list_entropy(v)

        value_lengths = [len(x) for x in v]
        r['mean_value_length'] = np.mean(value_lengths)
        r['median_value_length'] = np.median(value_lengths)
        r['min_value_length'] = np.min(value_lengths)
        r['max_value_length'] = np.max(value_lengths)
        r['std_value_length'] = np.std(value_lengths)
        r['percentage_of_mode'] = (pd.Series(v).value_counts().max() / len(v))

    if field_general_type in 'q':
        sample_mean = np.mean(v)
        sample_median = np.median(v)
        sample_var = np.var(v)
        sample_min = np.min(v)
        sample_max = np.max(v)
        sample_std = np.std(v)
        q1, q25, q75, q99 = np.percentile(v, [0.01, 0.25, 0.75, 0.99])
        iqr = q75 - q25

        r['mean'] = sample_mean
        r['normalized_mean'] = sample_mean / sample_max
        r['median'] = sample_median
        r['normalized_median'] = sample_median / sample_max

        r['var'] = sample_var
        r['std'] = sample_std
        r['coeff_var'] = (sample_mean / sample_var) if sample_var else None
        r['min'] = sample_min
        r['max'] = sample_max
        r['range'] = r['max'] - r['min']
        r['normalized_range'] = (r['max'] - r['min']) / \
            sample_mean if sample_mean else None

        r['entropy'] = entropy(v)
        r['gini'] = gini(v)
        r['q25'] = q25
        r['q75'] = q75
        r['med_abs_dev'] = np.median(np.absolute(v - sample_median))
        r['avg_abs_dev'] = np.mean(np.absolute(v - sample_mean))
        r['quant_coeff_disp'] = (q75 - q25) / (q75 + q25)
        r['coeff_var'] = sample_var / sample_mean
        r['skewness'] = skew(v)
        r['kurtosis'] = kurtosis(v)
        r['moment_5'] = moment(v, moment=5)
        r['moment_6'] = moment(v, moment=6)
        r['moment_7'] = moment(v, moment=7)
        r['moment_8'] = moment(v, moment=8)
        r['moment_9'] = moment(v, moment=9)
        r['moment_10'] = moment(v, moment=10)

        # Outliers
        outliers_15iqr = np.logical_or(v < (q25 - 1.5 * iqr), v >
                                       (q75 + 1.5 * iqr))
        outliers_3iqr = np.logical_or(v < (q25 - 3 * iqr), v > (q75 + 3 * iqr))
        outliers_1_99 = np.logical_or(v < q1, v > q99)
        outliers_3std = np.logical_or(v < (sample_mean - 3 * sample_std), v >
                                      (sample_mean + 3 * sample_std))
        r['percent_outliers_15iqr'] = np.sum(outliers_15iqr) / len(v)
        r['percent_outliers_3iqr'] = np.sum(outliers_3iqr) / len(v)
        r['percent_outliers_1_99'] = np.sum(outliers_1_99) / len(v)
        r['percent_outliers_3std'] = np.sum(outliers_3std) / len(v)

        r['has_outliers_15iqr'] = np.any(outliers_15iqr)
        r['has_outliers_3iqr'] = np.any(outliers_3iqr)
        r['has_outliers_1_99'] = np.any(outliers_1_99)
        r['has_outliers_3std'] = np.any(outliers_3std)

        # Statistical Distribution
        if len(v) >= 8:
            normality_k2, normality_p = normaltest(v)
            r['normality_statistic'] = normality_k2
            r['normality_p'] = normality_p
            r['is_normal_5'] = (normality_p < 0.05)
            r['is_normal_1'] = (normality_p < 0.01)

    return r

Example #50

0

Show file

File: misc.py Project: ballet/ballet

 def condition(X):
     return abs(skew(X)) > threshold

Example #51

0

Show file

File: word2vec_xgboost.py Project: riya1103/Identifying-Duplicate-Questions-Pairs-on-Quora

for i, q in enumerate(tqdm_notebook(df.question1.values)):
    question1_vectors[i, :] = sent2vec(q)
    
question2_vectors  = np.zeros((df.shape[0], 300))
for i, q in enumerate(tqdm_notebook(df.question2.values)):
    question2_vectors[i, :] = sent2vec(q)

df['cosine_distance'] = [cosine(x, y) for (x, y) in zip(np.nan_to_num(question1_vectors), np.nan_to_num(question2_vectors))]
df['cityblock_distance'] = [cityblock(x, y) for (x, y) in zip(np.nan_to_num(question1_vectors), np.nan_to_num(question2_vectors))]
df['jaccard_distance'] = [jaccard(x, y) for (x, y) in zip(np.nan_to_num(question1_vectors), np.nan_to_num(question2_vectors))]
df['canberra_distance'] = [canberra(x, y) for (x, y) in zip(np.nan_to_num(question1_vectors), np.nan_to_num(question2_vectors))]
df['euclidean_distance'] = [euclidean(x, y) for (x, y) in zip(np.nan_to_num(question1_vectors), np.nan_to_num(question2_vectors))]
df['minkowski_distance'] = [minkowski(x, y, 3) for (x, y) in zip(np.nan_to_num(question1_vectors), np.nan_to_num(question2_vectors))]
df['braycurtis_distance'] = [braycurtis(x, y) for (x, y) in zip(np.nan_to_num(question1_vectors), np.nan_to_num(question2_vectors))]
df['skew_q1vec'] = [skew(x) for x in np.nan_to_num(question1_vectors)]
df['skew_q2vec'] = [skew(x) for x in np.nan_to_num(question2_vectors)]
df['kur_q1vec'] = [kurtosis(x) for x in np.nan_to_num(question1_vectors)]
df['kur_q2vec'] = [kurtosis(x) for x in np.nan_to_num(question2_vectors)]

df['is_duplicate'].value_counts()

df.isnull().sum()

df.drop(['question1', 'question2'], axis=1, inplace=True)
df = df[pd.notnull(df['cosine_distance'])]
df = df[pd.notnull(df['jaccard_distance'])]

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

Example #52

0

Show file

File: temperature_processing.py Project: narahma2/xray

def main(test,
         folder,
         scan,
         reduced_intensity,
         reduced_q,
         temperature=None,
         structure_factor=None,
         y=None,
         ramping=False,
         scatter=None,
         background=None,
         pooled=False):
    """
    Processes data sets, create statistical fits, and outputs plots.
    =============
    --VARIABLES--
    test:               Type of liquid: "Water", "Ethanol", "Dodecane"
    folder:             Save location of the processed data set.
    scan:               Specific scan under the type of test.
    reduced_intensity   Intensity profiles reduced down to the cropped q.
    reduced_q           Cropped q range.
    temperature         Nozzle temperature.
    structure_factor    Structure factor profiles (water only).
    y                   Vertical location in spray (IJ only).
    ramping             Ramping IJ case (True/False).
    pooled              Ramping IJ pooled case (True/False)
    """

    prfl_fld = create_folder('{0}/{1}/Profiles/'.format(folder, scan))
    stats_fld = create_folder('{0}/{1}/Statistics/'.format(folder, scan))
    plt_fld = create_folder('{0}/{1}/Plots/'.format(folder, scan))
    tests_fld = create_folder('{0}/{1}/Tests/'.format(folder, scan))
    curves_fld = create_folder('{0}/{1}/Curves/'.format(folder, scan))

    if structure_factor is not None:
        sf_fld = create_folder('{0}/{1}/Structure Factor/'.format(
            folder, scan))

    if 'IJ' in str(scan) and 'Ethanol' in test:
        pinned_pts = np.abs(reduced_q - 1.40).argmin()
    elif 'IJ' in str(scan) and 'Water' in test:
        pinned_pts = np.abs(reduced_q - 2.79).argmin()
    else:
        # Find pinned points in the curves (least variation)
        intensity_std = np.std(reduced_intensity, axis=0)
        pinned_pts = find_peaks(-intensity_std)[0]
        # Find the minimum peak only (throw away every other valley)
        pinned_pts = pinned_pts[np.argmin(intensity_std[pinned_pts])]

    pinned_q = reduced_q[pinned_pts]

    # Designate fit_var
    if pooled:
        index = np.linspace(1, len(reduced_intensity), len(reduced_intensity))
        fit_var = index
        data_label = ''
    else:
        if y is not None:
            fit_var = y
            data_label = ' mm'
            np.savetxt('{0}/positions.txt'.format(prfl_fld), y)
            np.savetxt('{0}/positions.txt'.format(stats_fld), y)
        elif temperature is not None:
            fit_var = temperature
            data_label = ' K'
            np.savetxt('{0}/temperature.txt'.format(prfl_fld), temperature)
            np.savetxt('{0}/temperature.txt'.format(stats_fld), temperature)

    # Save images if scatter and background arrays are passed
    if scatter is not None:
        img_fld = create_folder('{0}/{1}/Images/'.format(folder, scan))
        saveimage(img_fld, fit_var, scatter, background)

    # Save intensities in tests_fld
    [
        np.savetxt('{0}/{1:03.0f}.txt'.format(tests_fld, i), x)
        for i, x in enumerate(reduced_intensity)
    ]

    profile('peak', fit_var, [np.max(x) for x in reduced_intensity], prfl_fld,
            stats_fld, test, plt_fld)
    profile('peakq', fit_var,
            [reduced_q[np.argmax(x)] for x in reduced_intensity], prfl_fld,
            stats_fld, test, plt_fld)
    profile('aratio', fit_var, [
        np.trapz(x[:pinned_pts], reduced_q[:pinned_pts]) /
        np.trapz(x[pinned_pts:], reduced_q[pinned_pts:])
        for x in reduced_intensity
    ], prfl_fld, stats_fld, test, plt_fld)
    profile('mean', fit_var, [np.mean(x) for x in reduced_intensity], prfl_fld,
            stats_fld, test, plt_fld)
    profile('var', fit_var, [np.var(x) for x in reduced_intensity], prfl_fld,
            stats_fld, test, plt_fld)
    profile('skew', fit_var, [stats.skew(x) for x in reduced_intensity],
            prfl_fld, stats_fld, test, plt_fld)
    profile('kurt', fit_var, [stats.kurtosis(x) for x in reduced_intensity],
            prfl_fld, stats_fld, test, plt_fld)
    profile('pca', fit_var, pca(reduced_intensity), prfl_fld, stats_fld, test,
            plt_fld)
    profile_peakq = np.loadtxt('{0}/profile_peakq.txt'.format(prfl_fld))

    rr = np.array([(x - min(fit_var)) / (max(fit_var) - min(fit_var))
                   for x in fit_var])
    bb = np.array([
        1 - (x - min(fit_var)) / (max(fit_var) - min(fit_var)) for x in fit_var
    ])

    for i, _ in enumerate(reduced_intensity):
        # Create intensity plots with q values of interest highlighted
        plt.figure()
        plt.plot(reduced_q,
                 reduced_intensity[i],
                 linestyle='-',
                 color=(rr[i], 0, bb[i]),
                 linewidth=2.0,
                 label='{0:0.1f}{1}'.format(fit_var[i], data_label))
        plt.axvline(x=profile_peakq[i],
                    linestyle='--',
                    color='C1',
                    label='peakq = {0:0.2f}'.format(profile_peakq[i]))
        plt.legend(loc='upper right')
        plt.xlabel('q (Å$^{-1}$)')
        plt.ylabel('Intensity (a.u.)')
        plt.autoscale(enable=True, axis='x', tight=True)
        plt.minorticks_on()
        plt.tick_params(which='both', direction='in')
        plt.title(test + ' Curves')
        plt.tight_layout()
        plt.savefig('{0}/curves_{1:0.1f}.png'.format(curves_fld, fit_var[i]))
        plt.close()

    if structure_factor is not None:
        for i, _ in enumerate(structure_factor):
            np.savetxt(
                '{0}/{1:02d}_{2:0.2f}K.txt'.format(tests_fld, i,
                                                   temperature[i]).replace(
                                                       '.', 'p'),
                structure_factor[i])

            plt.figure()
            plt.plot(reduced_q,
                     structure_factor[i],
                     linestyle='-',
                     color=(rr[i], 0, bb[i]),
                     linewidth=2.0,
                     label='{0:0.1f}{1}'.format(temperature[i], data_label))
            plt.legend(loc='upper right')
            plt.xlabel('q (Å$^{-1}$)')
            plt.ylabel('Structure Factor (a.u.)')
            plt.autoscale(enable=True, axis='x', tight=True)
            plt.minorticks_on()
            plt.tick_params(which='both', direction='in')
            plt.title(test + ' Curves')
            plt.tight_layout()
            plt.savefig('{0}/{1}curves_{2:0.1f}.png'.format(
                sf_fld, test, temperature[i]))
            plt.close()

    np.savetxt('{0}/{1}/q_range.txt'.format(folder, scan), reduced_q)
    if pooled:
        np.savetxt('{0}/{1}/temperature.txt'.format(folder, scan), temperature)
        np.savetxt('{0}/{1}/positions.txt'.format(folder, scan), y)
    else:
        if temperature is not None:
            np.savetxt('{0}/{1}/temperature.txt'.format(folder, scan),
                       temperature)
        elif y is not None:
            np.savetxt('{0}/{1}/positions.txt'.format(folder, scan), y)

    if 'IJ' not in str(scan):
        # Standard deviation plot of all intensities
        plt.figure()
        plt.plot(reduced_q, intensity_std, linewidth='2.0')
        plt.xlabel('q (Å$^{-1}$)')
        plt.ylabel('SD(Intensity) (a.u.)')
        plt.axvline(x=pinned_q, color='k', linestyle='--')
        plt.text(pinned_q,
                 0.6 * np.mean(intensity_std),
                 'q = {0:02.2f}'.format(pinned_q),
                 horizontalalignment='center',
                 bbox=dict(facecolor='white', alpha=1.0))
        plt.title('Scan {0}'.format(scan))
        plt.tight_layout()
        plt.savefig('{0}/stdev.png'.format(plt_fld))
        plt.close()

    # Superimposed intensity plot
    plt.figure()
    [
        plt.plot(reduced_q, x, color=(rr[i], 0, bb[i]))
        for i, x in enumerate(reduced_intensity)
    ]
    plt.xlabel('q (Å$^{-1}$)')
    plt.ylabel('Intensity (a.u.)')
    plt.axvline(x=pinned_q, color='k', linestyle='--')
    plt.text(pinned_q,
             0.5,
             'q = {0:02.2f}'.format(pinned_q),
             horizontalalignment='center',
             bbox=dict(facecolor='white', alpha=1.0))
    plt.title('Scan {0}'.format(scan))
    plt.tight_layout()
    plt.savefig('{0}/superimposedcurves.png'.format(plt_fld))
    plt.close()

    # Save the calibration data sets and log the date/time processing was done
    if temperature is not None and ramping is False and 'IJ' not in str(scan):
        with open('{0}/{1}/{1}_data.pckl'.format(folder, scan), 'wb') as f:
            pickle.dump([temperature, reduced_q, reduced_intensity], f)
        with open('{0}/{1}/{1}_log.txt'.format(folder, scan), 'a+') as f:
            f.write(datetime.now().strftime("\n%d-%b-%Y %I:%M:%S %p"))

    # Save the ethanol (cold/ambient/hot) and water impinging jet data sets
    # and log the date/time processing was done
    elif y is not None and ramping is False:
        with open('{0}/{1}/{1}_data.pckl'.format(folder, scan), 'wb') as f:
            pickle.dump([y, reduced_q, reduced_intensity], f)
        with open('{0}/{1}/{1}_log.txt'.format(folder, scan), 'a+') as f:
            f.write(datetime.now().strftime("\n%d-%b-%Y %I:%M:%S %p"))

    # Save the ethanol ramping impinging jet data set and log the date/time
    # processing was done
    elif ramping is True:
        with open(
                folder + '/' + str(scan) + '/' + str(scan).rsplit('/')[-1] +
                '_data.pckl', 'wb') as f:
            pickle.dump([temperature, y, reduced_q, reduced_intensity], f)
        with open(
                folder + '/' + str(scan) + '/' + str(scan).rsplit('/')[-1] +
                '_log.txt', 'a+') as f:
            f.write(datetime.now().strftime("\n%d-%b-%Y %I:%M:%S %p"))

Example #53

0

Show file

File: parenchyma_phenotypes.py Project: rjosest/test2

    def add_pheno_group(self, ct, mask, chest_region, chest_type, pheno_name):
        """For a given mask, this function computes all phenotypes corresponding
        to the masked structure and adds them to the dataframe with the
        'add_pheno' method

        Parameters
        ----------
        ct : array, shape ( X, Y, Z )
            The 3D CT image array

        mask : boolean array, shape ( X, Y, Z )
            Boolean mask where True values indicate presence of the structure
            of interest

        chest_region : string
            Name of the chest region in the (region, type) key used to populate
            the dataframe

        chest_type : string
            Name of the chest region in the (region, type) key used to populate
            the dataframe

        pheno_name : string
            Name of the phenotype used to populate the dataframe

        References
        ----------
        1. Schneider et al, 'Correlation between CT numbers and tissue
        parameters needed for Monte Carlo simulations of clinical dose
        distributions'
        """
        assert pheno_name in self.pheno_names_, "Invalid phenotype name"
        #print "Region: %s, Type: %s, Pheno: %s" % \
        #    (chest_region, chest_type, pheno_name)
        pheno_val = None
        mask_sum = np.sum(mask)
        if pheno_name == 'LAA950':
            pheno_val = float(np.sum(ct[mask] <= -950.)) / mask_sum
        elif pheno_name == 'LAA910':
            pheno_val = float(np.sum(ct[mask] <= -910.)) / mask_sum
        elif pheno_name == 'LAA856':
            pheno_val = float(np.sum(ct[mask] <= -856.)) / mask_sum
        elif pheno_name == 'HAA700':
            pheno_val = float(np.sum(ct[mask] >= -700.)) / mask_sum
        elif pheno_name == 'HAA600':
            pheno_val = float(np.sum(ct[mask] >= -600)) / mask_sum
        elif pheno_name == 'HAA500':
            pheno_val = float(np.sum(ct[mask] >= -500)) / mask_sum
        elif pheno_name == 'HAA250':
            pheno_val = float(np.sum(ct[mask] >= -250)) / mask_sum
        elif pheno_name == 'Perc15':
            pheno_val = np.percentile(ct[mask], 15)
        elif pheno_name == 'Perc10':
            pheno_val = np.percentile(ct[mask], 10)
        elif pheno_name == 'HUMean':
            pheno_val = np.mean(ct[mask])
        elif pheno_name == 'HUStd':
            pheno_val = np.std(ct[mask])
        elif pheno_name == 'HUKurtosis':
            pheno_val = kurtosis(ct[mask], bias=False, fisher=True)
        elif pheno_name == 'HUSkewness':
            pheno_val = skew(ct[mask], bias=False)
        elif pheno_name == 'HUMode':
            min_val = np.min(ct[mask])
            pheno_val = np.argmax(np.bincount(ct[mask] + np.abs(min_val))) - \
                np.abs(min_val)
        elif pheno_name == 'HUMedian':
            pheno_val = np.median(ct[mask])
        elif pheno_name == 'HUMin':
            pheno_val = np.min(ct[mask])
        elif pheno_name == 'HUMax':
            pheno_val = np.max(ct[mask])
        elif pheno_name == 'HUMean500':
            hus = ct[np.logical_and(mask, ct <= -500)]
            if hus.shape[0] > 0:
                pheno_val = np.mean(hus)
        elif pheno_name == 'HUStd500':
            hus = ct[np.logical_and(mask, ct <= -500)]
            if hus.shape[0] > 0:
                pheno_val = np.std(hus)
        elif pheno_name == 'HUKurtosis500':
            hus = ct[np.logical_and(mask, ct <= -500)]
            if hus.shape[0]:
                pheno_val = kurtosis(hus, bias=False, fisher=True)
        elif pheno_name == 'HUSkewness500':
            hus = ct[np.logical_and(mask, ct <= -500)]
            if hus.shape[0] > 0:
                pheno_val = skew(hus, bias=False)
        elif pheno_name == 'HUMode500':
            hus = ct[np.logical_and(mask, ct <= -500)]
            if hus.shape[0] > 0:
                min_val = np.min(hus)
                pheno_val = np.argmax(np.bincount(hus + np.abs(min_val))) - \
                    np.abs(min_val)
        elif pheno_name == 'HUMedian500':
            hus = ct[np.logical_and(mask, ct <= -500)]
            if hus.shape[0] > 0:
                pheno_val = np.median(hus)
        elif pheno_name == 'HUMin500':
            hus = ct[np.logical_and(mask, ct <= -500)]
            if hus.shape[0] > 0:
                pheno_val = np.min(hus)
        elif pheno_name == 'HUMax500':
            hus = ct[np.logical_and(mask, ct <= -500)]
            if hus.shape[0] > 0:
                pheno_val = np.max(hus)
        elif pheno_name == 'Volume':
            pheno_val = np.prod(self._spacing) * float(mask_sum)
        elif pheno_name == 'Mass':
            # This quantity is computed in a piecewise linear form according
            # to the prescription presented in ref. [1]. Mass is computed in
            # grams. First compute the contribution in HU interval from -98
            # and below.
            pheno_val = 0.0
            HU_tmp = ct[np.logical_and(mask, ct < -98)].clip(-1000)
            if HU_tmp.shape[0] > 0:
                m = (1.21e-3 - 0.93) / (-1000 + 98)
                b = 1.21e-3 + 1000 * m
                pheno_val += np.sum((m*HU_tmp + b)*\
                    np.prod(self._spacing)*0.001)

            # Now compute the mass contribution in the interval [-98, 18] HU.
            # Note the in the original paper, the interval is defined from
            # -98HU to 14HU, but we extend in slightly here so there are no
            # gaps in coverage. The values we report in the interval [14, 23]
            # should be viewed as approximate.
            HU_tmp = ct[np.logical_and(np.logical_and(mask, ct >= -98),
                                       ct <= 18)]
            if HU_tmp.shape[0] > 0:
                pheno_val += np.sum((1.018 + 0.893*HU_tmp/1000.0)*\
                    np.prod(self._spacing)*0.001)

            # Compute the mass contribution in the interval (18, 100]
            HU_tmp = ct[np.logical_and(np.logical_and(mask, ct > 18),
                                       ct <= 100)]
            if HU_tmp.shape[0] > 0:
                pheno_val += np.sum((1.003 + 1.169*HU_tmp/1000.0)*\
                    np.prod(self._spacing)*0.001)

            # Compute the mass contribution in the interval > 100
            HU_tmp = ct[np.logical_and(mask, ct > 100)]
            if HU_tmp.shape[0] > 0:
                pheno_val += np.sum((1.017 + 0.592*HU_tmp/1000.0)*\
                    np.prod(self._spacing)*0.001)

        if pheno_val is not None:
            self.add_pheno([chest_region, chest_type], pheno_name, pheno_val)

Example #54

0

Show file

File: skewness1.py Project: Rangerix/proteins

        for chain in model.get_list():
            xvalues = []
            yvalues = []
            zvalues = []
            for residue in chain.get_list():
                if residue.has_id("CA"):
                    ca = residue["CA"]
                    #print(ca.get_coord())
                    temp = ca.get_coord()
                    #print(temp[0])
                    xvalues.append(temp[0])
                    yvalues.append(temp[1])
                    zvalues.append(temp[2])

            #print(chain)
            xskew = skew(xvalues)
            yskew = skew(yvalues)
            zskew = skew(zvalues)
            avg = (xskew + yskew + zskew) / 3
            #print(avg)
            skewchain.append(avg)
    print("chainwise average : ", numpy.mean(skewchain))
    #idea 1.2: (xi - meanx)^3+(yi - meany)^3+(zi - meanz)^3/sdx+sdy+sdz

    #idea 2: measure skewness for all CA atoms present
    xvalues = []
    yvalues = []
    zvalues = []
    for model in structure.get_list():
        for chain in model.get_list():
            for residue in chain.get_list():

Example #55

0

Show file

File: test_panel4d.py Project: t1c1/pandas

 def this_skew(x):
     if len(x) < 3:
         return np.nan
     return skew(x, bias=False)

Example #56

0

Show file

File: 03-ridge-lasso-python.py Project: volkb/spring2021_website

 
- First I'll transform the skewed numeric features by taking log(feature + 1) - this will make the features more normal    
- Create Dummy variables for the categorical features    
- Replace the numeric missing values (NaN's) with the mean of their respective columns

matplotlib.rcParams['figure.figsize'] = (12.0, 6.0)
prices = pd.DataFrame({"price":train["SalePrice"], "log(price + 1)":np.log1p(train["SalePrice"])})
prices.hist()

#log transform the target:
train["SalePrice"] = np.log1p(train["SalePrice"])

#log transform skewed numeric features:
numeric_feats = all_data.dtypes[all_data.dtypes != "object"].index

skewed_feats = train[numeric_feats].apply(lambda x: skew(x.dropna())) #compute skewness
skewed_feats = skewed_feats[skewed_feats > 0.75]
skewed_feats = skewed_feats.index

all_data[skewed_feats] = np.log1p(all_data[skewed_feats])

all_data = pd.get_dummies(all_data)

#filling NA's with the mean of the column:
all_data = all_data.fillna(all_data.mean())

#creating matrices for sklearn:
X_train = all_data[:train.shape[0]]
X_test = all_data[train.shape[0]:]
y = train.SalePrice

Example #57

0

Show file

#Check remaning missing values if any
data_features.isnull().sum()[data_features.isnull().sum() > 0].sort_values(
    ascending=False)

data_features['MSSubClass'].unique()
data_features['YrSold'] = data_features['YrSold'].astype(str)
data_features['OverallCond'] = data_features['OverallCond'].astype(str)
data_features['MSSubClass'] = data_features['MSSubClass'].astype(str)
data_features['MoSold'] = data_features['MoSold'].astype(str)
aa = list(data_features.select_dtypes(include=['object']).columns)

numerical_features = data_features.select_dtypes(exclude=["object"]).columns
num_feat = data_features[numerical_features]
print("Numerical features : " + str(len(numerical_features)))
skewness = num_feat.apply(lambda x: skew(x))
skewness = skewness[abs(skewness) > 1]
skewness.sort_values(ascending=False)
from scipy.special import boxcox1p
skewed_features = skewness.index
lam = 0.15
for feat in skewed_features:
    num_feat[feat] = boxcox1p(num_feat[feat],
                              stats.boxcox_normmax(num_feat[feat] + 1))
    data_features[feat] = boxcox1p(
        data_features[feat], stats.boxcox_normmax(data_features[feat] + 1))

#label encoding to some ordering categorical variable
from sklearn.preprocessing import LabelEncoder
cols = ('FireplaceQu', 'BsmtQual', 'BsmtCond', 'GarageQual', 'GarageCond',
        'ExterQual', 'ExterCond', 'HeatingQC', 'PoolQC', 'KitchenQual',

Example #58

0

Show file

File: build.py Project: preetiail/feature_engineering_project

def skewness_log(data): 
    data_trans=data.copy()
    data_trans['GrLivArea']=np.log(data_trans['GrLivArea'])
    data_trans['SalePrice']=np.log(data_trans['SalePrice'])
    return(skew(data_trans['GrLivArea']),skew(data_trans['SalePrice']))

Example #59

0

Show file

def masks_and_traces(ops, stat_manual, stat_orig):
    ''' main extraction function
        inputs: ops and stat
        creates cell and neuropil masks and extracts traces
        returns: F (ROIs x time), Fneu (ROIs x time), F_chan2, Fneu_chan2, ops, stat
        F_chan2 and Fneu_chan2 will be empty if no second channel
    '''
    if 'aspect' in ops:
        dy, dx = int(ops['aspect'] * 10), 10
    else:
        d0 = ops['diameter']
        dy, dx = (d0, d0) if isinstance(d0, int) else d0
    t0 = time.time()
    # Concatenate stat so a good neuropil function can be formed
    stat_all = stat_manual.copy()
    for n in range(len(stat_orig)):
        stat_all.append(stat_orig[n])
    stat_all = roi_stats(stat_all, dy, dx, ops['Ly'], ops['Lx'])
    cell_masks = [
        masks.create_cell_mask(stat,
                               Ly=ops['Ly'],
                               Lx=ops['Lx'],
                               allow_overlap=ops['allow_overlap'])
        for stat in stat_all
    ]
    cell_pix = masks.create_cell_pix(stat_all, Ly=ops['Ly'], Lx=ops['Lx'])
    manual_roi_stats = stat_all[:len(stat_manual)]
    manual_cell_masks = cell_masks[:len(stat_manual)]
    manual_neuropil_masks = masks.create_neuropil_masks(
        ypixs=[stat['ypix'] for stat in manual_roi_stats],
        xpixs=[stat['xpix'] for stat in manual_roi_stats],
        cell_pix=cell_pix,
        inner_neuropil_radius=ops['inner_neuropil_radius'],
        min_neuropil_pixels=ops['min_neuropil_pixels'],
    )
    print('Masks made in %0.2f sec.' % (time.time() - t0))

    F, Fneu, F_chan2, Fneu_chan2, ops = extract_traces_from_masks(
        ops, manual_cell_masks, manual_neuropil_masks)

    # compute activity statistics for classifier
    npix = np.array([stat_orig[n]['npix']
                     for n in range(len(stat_orig))]).astype('float32')
    for n in range(len(manual_roi_stats)):
        manual_roi_stats[
            n]['npix_norm'] = manual_roi_stats[n]['npix'] / np.mean(
                npix[:100])  # What if there are less than 100 cells?
        manual_roi_stats[n]['compact'] = 1
        manual_roi_stats[n]['footprint'] = 2
        manual_roi_stats[n]['manual'] = 1  # Add manual key

    # subtract neuropil and compute skew, std from F
    dF = F - ops['neucoeff'] * Fneu
    sk = stats.skew(dF, axis=1)
    sd = np.std(dF, axis=1)

    for n in range(F.shape[0]):
        manual_roi_stats[n]['skew'] = sk[n]
        manual_roi_stats[n]['std'] = sd[n]
        manual_roi_stats[n]['med'] = [
            np.mean(manual_roi_stats[n]['ypix']),
            np.mean(manual_roi_stats[n]['xpix'])
        ]

    dF = F - ops['neucoeff'] * Fneu
    spks = oasis(F=dF,
                 batch_size=ops['batch_size'],
                 tau=ops['tau'],
                 fs=ops['fs'])

    return F, Fneu, F_chan2, Fneu_chan2, spks, ops, manual_roi_stats

Example #60

0

Show file

File: views.py Project: medzarka/kku_measurement

def course_action(request):
    context = {}

    __course_id = int(request.POST['course'])
    __course_obj = None

    __section_objects = []

    __sections = []
    __message = ''
    __mean = 0
    __std = 0
    __min = 0
    __max = 0
    __skewness = 0
    __ttest_annova_type = ''
    __ttest_annova_value = 0
    __ttest_annova_sig = 0
    __correlation = 0
    __show__result = 0
    __histogramfile = ''
    __mids = []
    __finals = []
    __totals = []
    __domain = ''
    __course = ''
    counter = 0
    __nbr_sections = 0
    __found_section = 0
    try:
        sem.acquire()
        print("The semaphore is locked")

        if request.method == 'GET':
            raise Exception('Internal Error')

        if request.method == 'POST':
            __course_obj = Course.objects.get(course_id=__course_id)
            print('Dealing with course ' + __course_obj.course_name_ar)
            __course = __course_obj.course_name_ar

            for _section in Section.objects.all():
                if _section.section_course.course_id == __course_id:
                    __section_objects.append(_section)
                    __nbr_sections += 1
            print('Dealing with  ' + str(__nbr_sections) + ' sections : ' + str(__section_objects))

            for _section in __section_objects:
                for _report in SectionDocRequest.objects.all():
                    if _report.section.section_code == _section.section_code:
                        __sections.append(_report)
                        __found_section += 1
                        __data = eval(_report.student_grades)
                        for grade in __data['mids']:
                            __mids.append(grade)
                        for grade in __data['finals']:
                            __finals.append(grade)
                        for grade in __data['totals']:
                            __totals.append(grade)
                        break
            print('Dealing with  ' + str(__found_section) + ' section reports: ' + str(__sections))

            if __nbr_sections != __found_section:
                raise Exception('Some sections need to be analysed first')

            # compute statistics about the course grades
            __mean = float("{0:.4f}".format(statistics.mean(__totals)))
            __std = float("{0:.4f}".format(statistics.stdev(__totals)))
            __skewness = float("{0:.4f}".format(skew(__totals, bias=False)))
            if len(__mids) == 0:
                __correlation = -99.99
            else:
                __correlation = float("{0:.4f}".format(pearsonr(__mids, __finals)[1]))
            __min = min(__totals)
            __max = max(__totals)

            if __nbr_sections == 2:
                # T-Test
                __ttest_annova_type = 'T-Test'
                _total1 = eval(__sections[0].student_grades)['totals']
                _total2 = eval(__sections[1].student_grades)['totals']
                res = scipy.stats.ttest_ind(_total1, _total2)

                __ttest_annova_value = float("{0:.4f}".format(res.statistic))
                __ttest_annova_sig = float("{0:.4f}".format(res.pvalue))

            else:
                # annova
                __ttest_annova_type = 'ANOVA'
                if len(__sections) == 3:
                    __ttest_annova_value = float("{0:.4f}".format(
                        scipy.stats.f_oneway(eval(__sections[0].student_grades)['totals'],
                                             eval(__sections[1].student_grades)['totals'],
                                             eval(__sections[2].student_grades)['totals'])[0]))
                    __ttest_annova_sig = float("{0:.4f}".format(
                        scipy.stats.f_oneway(eval(__sections[0].student_grades)['totals'],
                                             eval(__sections[1].student_grades)['totals'],
                                             eval(__sections[2].student_grades)['totals'])[1]))
                elif len(__sections) == 4:
                    __ttest_annova_value = float("{0:.4f}".format(
                        scipy.stats.f_oneway(eval(__sections[0].student_grades)['totals'],
                                             eval(__sections[1].student_grades)['totals'],
                                             eval(__sections[2].student_grades)['totals'],
                                             eval(__sections[3].student_grades)['totals'])[0]))
                    __ttest_annova_sig = float("{0:.4f}".format(
                        scipy.stats.f_oneway(eval(__sections[0].student_grades)['totals'],
                                             eval(__sections[1].student_grades)['totals'],
                                             eval(__sections[2].student_grades)['totals'],
                                             eval(__sections[3].student_grades)['totals'])[1]))
                elif len(__sections) == 5:
                    __ttest_annova_value = float("{0:.4f}".format(
                        scipy.stats.f_oneway(eval(__sections[0].student_grades)['totals'],
                                             eval(__sections[1].student_grades)['totals'],
                                             eval(__sections[2].student_grades)['totals'],
                                             eval(__sections[3].student_grades)['totals'],
                                             eval(__sections[4].student_grades)['totals'])[0]))
                    __ttest_annova_sig = float("{0:.4f}".format(
                        scipy.stats.f_oneway(eval(__sections[0].student_grades)['totals'],
                                             eval(__sections[1].student_grades)['totals'],
                                             eval(__sections[2].student_grades)['totals'],
                                             eval(__sections[3].student_grades)['totals'],
                                             eval(__sections[4].student_grades)['totals'])[1]))
                else:
                    raise Exception('To be implemented : managing more that 5 sections per a course !!!!')

            print("mean : " + str(__mean))
            print("std : " + str(__std))
            print("skewness : " + str(__skewness))
            print("__ttest_annova_type : " + str(__ttest_annova_type))
            print("__ttest_annova_value : " + str(__ttest_annova_value))
            print("__ttest_annova_sig : " + str(__ttest_annova_sig))

            # plot the histogram

            a = np.array(__totals)
            # Fit a normal distribution to the data:
            mu, std = norm.fit(a)
            number = a.size

            # Plot the histogram.
            plt.hist(a, bins=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100], density=True, color='#607c8e',
                     edgecolor='black',
                     rwidth=0.8)
            # Plot the PDF.
            x = np.linspace(0, 100, 100)
            p = norm.pdf(x, mu, std)
            plt.plot(x, p, 'k', linewidth=3)
            title = "Histogram for course: " + __course_obj.course_name + ",  N=%d" % (number)
            plt.title(title)

            __histogramfile = "data/media/histogram_course_" + str(__course) + ".png"
            plt.savefig(__histogramfile)
            plt.close()

            print("mean : " + str(__mean))
            print("std : " + str(__std))
            print("skewness : " + str(__skewness))
            print("correlation : " + str(__correlation))

            __show__result = 1

            try:
                obj = CourseDocRequest.objects.get(course=__course_obj)
                print("--------> Updating the course report data with id= " + str(obj.course_doc_id))
            except CourseDocRequest.DoesNotExist:
                obj = CourseDocRequest()
                obj.course = __course_obj
                print("--------> Creating new course report data")

            obj.doc_correlation = __correlation
            obj.doc_explanation = ''
            obj.doc_max = __max
            obj.doc_mean = __mean
            obj.doc_min = __min
            obj.doc_skewness = __skewness
            obj.doc_std_deviation = __std
            obj.histogram = __histogramfile
            obj.doc_ttest_annova_sig = __ttest_annova_sig
            obj.doc_ttest_annova_value = __ttest_annova_value
            obj.doc_ttest_annova_type = __ttest_annova_type

            obj.save()

            # get the server domain
            if request.is_secure():
                __domain = 'https://' + request.get_host()
            else:
                __domain = 'http://' + request.get_host()
            __security = request.is_secure()



    except Exception as e:
        __message = e.__str__()

    finally:
        sem.release()
        print("The semaphore was released")

    if len(__mids) == 0:
        __correlation = 'N/A'

    context = {
        'ttest_annova_sig': __ttest_annova_sig,
        'ttest_annova_type': __ttest_annova_type,
        'ttest_annova_value': __ttest_annova_value,
        'show_result': __show__result,
        'message': __message,
        'mean': __mean,
        'std': __std,
        'skewness': __skewness,
        'correlation': __correlation,
        'min': __min,
        'max': __max,
        'histogram': __histogramfile,
        'domain': __domain,
        'course': __course,
        'sections': __sections,
    }

    __results = analysis(context)
    context['analysis'] = __results

    del __course
    del __message
    del __mean
    del __std
    del __min
    del __max
    del __skewness
    del __correlation
    del __show__result
    del __histogramfile
    del __domain
    del __sections

    return render(request, 'course_result.html', context=context)