def eddyStats(DI) :

    grp = sp.check_output(["grep -h '^ *[1-9]' "+ DI['cdir']+"runtime/runtime_*"],shell=True).split('\n')[:-1]

    arr = np.empty( ( len(grp), len(grp[0].split()) ) )

    for i in range(len(grp)) :
        arr[i,:] = np.fromstring(grp[i], sep=" ")

    L = arr[:,5]
    y0 = arr[:,6]

    neddies = np.size(L) / len(glob.glob(DI['cdir']+"runtime/runtime_*"))
    Lavg = np.mean(L)
    Lrms = np.std(L)
    y0avg = np.mean(y0)
    y0rms = np.std(y0)

    nbins = 100
    P_y0, y0_bins = compute_pdf(y0, np.min(y0), np.max(y0), nbins)
    P_logL,  logL_bins = compute_pdf(np.log10(L), np.min(np.log10(L)), np.max(np.log10(L)), nbins)
    #P_L,  L_bins  = compute_pdf(L,  np.min(L),  np.max(L),   nbins)

    #fname = "../../data/"+caseN + "/post/eddyStats.dat"
    fname = DI['pdir']+"eddyStats.dat"
    with open(fname, 'w') as ofile :
        ofile.write("# neddies = %i\n" %(neddies))
        ofile.write("# Lavg    = %f\n" %(Lavg))
        ofile.write("# Lrms    = %f\n" %(Lrms))
        ofile.write("# y0avg   = %f\n" %(y0avg))
        ofile.write("# y0rms   = %f\n" %(y0rms))
        ofile.write(commentHdr+" y0, PDF_y0, log10L, PDF_log10L\n")
        for i in range(nbins) :
            ofile.write("%12.5e  %12.5e  %12.5e  %12.5e \n" %(y0_bins[i],P_y0[i],logL_bins[i],P_logL[i]))
    def set_test_dists(self, test_fnames):
        # initialize empty histograms
        # since one histogram/pdf  is computed for each element of test set
        # as below, it needs to be initialized at every testing
        for c in preferences.CLASSES:
            self.test_histograms[c], self.test_pdfs[c] = {}, {}
            for test_fname in test_fnames[c]:
                self.test_histograms[c][test_fname], self.test_pdfs[c][test_fname] = {}, {}
                for o in preferences.OBSERVABLES:
                    self.test_histograms[c][test_fname][o] = data_tools.initialize_histogram(o)
                    self.test_pdfs[c][test_fname][o] = []
        # compute histograms for each class (using test set)
        for c in preferences.CLASSES:   
            for test_fname in test_fnames[c]:

                data = np.load(test_fname)
                data_A, data_B = data_tools.extract_individual_data(data)
                obs_data = data_tools.compute_observables(data_A, data_B)
                for o in preferences.OBSERVABLES:
                    self.test_histograms[c][test_fname][o] = data_tools.compute_histogram_1D(o, obs_data[o])

        for c in preferences.CLASSES:
            for test_fname in test_fnames[c]:
                for o in preferences.OBSERVABLES:
                    self.test_pdfs[c][test_fname][o] = data_tools.compute_pdf(o, self.test_histograms[c][test_fname][o])
 def set_train_dists(self, train_fnames):
     # initialize empty histograms
     # since histogram is accumulated as below, it needs to be initialized 
     # at every training           
     for c in preferences.CLASSES:
         self.train_histograms[c] = {}
         self.train_pdfs[c] = {}
         for o in preferences.OBSERVABLES:
             self.train_histograms[c][o] = data_tools.initialize_histogram(o)
     # compute histograms for each class (using training set)
     for c in preferences.CLASSES:   
         for train_fname in train_fnames[c]:
             data = np.load(train_fname)
             data_A, data_B = data_tools.extract_individual_data(data)
             obs_data = data_tools.compute_observables(data_A, data_B)
             for o in preferences.OBSERVABLES:
                 self.train_histograms[c][o] += data_tools.compute_histogram_1D(o, obs_data[o])
     for c in preferences.CLASSES:
         for o in preferences.OBSERVABLES:
             self.train_pdfs[c][o] = data_tools.compute_pdf(o, self.train_histograms[c][o])
Example #4
    def train(self, train_fnames):

        train_histograms1D = {}
        # initialize empty histograms
        for o in preferences.OBSERVABLES:
            train_histograms1D[o], self.train_pdfs1D[o] = {}, {}
            for c in preferences.CLASSES:
                train_histograms1D[o][c] = data_tools.initialize_histogram(o)

        # compute histograms for each class
        for c in preferences.CLASSES:
            for file_path in train_fnames[c]:
                data = np.load(file_path)
                data_A, data_B = data_tools.extract_individual_data(data)
                obs_data = data_tools.compute_observables(data_A, data_B)
                for o in preferences.OBSERVABLES:
                        c] += data_tools.compute_histogram_1D(o, obs_data[o])

        for o in preferences.OBSERVABLES:
            for c in preferences.CLASSES:
                self.train_pdfs1D[o][c] = data_tools.compute_pdf(
                    o, train_histograms1D[o][c])
Example #5
if __name__ == "__main__":

    start_time = time.time()
    data_fnames = file_tools.get_data_fnames('../data/gender_compositions/')

    histograms1D = {}
    pdfs1D = {}
    # initialize empty histograms
    for o in preferences.OBSERVABLES:
        histograms1D[o], pdfs1D[o] = {}, {}
        for c in preferences.CLASSES_RAW:
            histograms1D[o][c] = data_tools.initialize_histogram(o)
    # compute histograms for each class
    for c in preferences.CLASSES_RAW:   
        for file_path in data_fnames[c]:
            data = np.load(file_path)
            data_A, data_B = data_tools.extract_individual_data(data)
            obs_data = data_tools.compute_observables(data_A, data_B)
            for o in preferences.OBSERVABLES:
                histograms1D[o][c] += data_tools.compute_histogram_1D(o, obs_data[o])
    for o in preferences.OBSERVABLES:
        for c in preferences.CLASSES_RAW:
            pdfs1D[o][c] = data_tools.compute_pdf(o, histograms1D[o][c])
    elapsed_time = time.time() - start_time
    print('\nTime elapsed  %2.2f sec' %elapsed_time)
Example #6
def get_pdfs(DI, favre=False, nbins=60):

    dataFiles = glob.glob(DI['cdir']+"data_py/data_*.npy")
    ntimes    = len(dataFiles)
    nrlz      = get_nRlz(DI)
    varNames  = get_dataHeaderVars(DI)
    times     = get_inputFileParameter(DI, ("dumpTimes",))         # times or ypositions if spatial
    times     = times[0:ntimes]    # limit times to number of dataFiles, not all listed dumpTimes
    nvar      = len(varNames)
    cCoord    = get_inputFileParameter(DI, ("params", "cCoord"))
    dxmin     = get_inputFileParameter(DI, ("params", "dxmin"))
    dxmax     = get_inputFileParameter(DI, ("params", "dxmax"))
    L         = get_inputFileParameter(DI, ("params", "domainLength"))
    umax      =  get_inputFileParameter(DI, ("initParams", "vel_max"))
    umin      =  get_inputFileParameter(DI, ("initParams", "vel_min"))
    vmin      = -0.05*np.abs(umax-umin)
    vmax      = 0.05*np.abs(umax-umin)
    wmin      = -0.05*np.abs(umax-umin)
    wmax      = 0.05*np.abs(umax-umin)
    x0, xL    = get_domainBounds(DI) 

    dxmin *= L
    dxmax *= L
    try :
        iposf = varNames.index("posf")
    except :
        raise ValueError("In basic_stats: no posf variable found")
    try :
        ipos = varNames.index("pos")
    except :
        raise ValueError("In basic_stats: no pos variable found")
    try :
        irho = varNames.index("rho")
    except :
        irho = -1
    try :
        idvisc = varNames.index("dvisc")
    except :
        idvisc = -1
    try :
        imixf = varNames.index("mixf")
    except :
        imixf = -1
    try :
        iuvel = varNames.index("uvel")
    except :
        iuvel = -1
    try :
        ivvel = varNames.index("vvel")
    except :
        ivvel = -1
    try :
        iwvel = varNames.index("wvel")
    except :
        iwvel = -1

    if irho == -1 and favre :
        raise ValueError("In basic_stats: favre is true, but there is no rho in data file")

    dxpdfs = np.zeros((nbins, ntimes))

    P_uvel = np.zeros([nbins,ntimes])
    P_vvel = np.zeros([nbins,ntimes])
    P_wvel   = np.zeros([nbins,ntimes])
    P_diss = np.zeros([nbins,ntimes])
    P_logDiss = np.zeros([nbins,ntimes])
    P_diffU  =  np.zeros([nbins,ntimes])
    #P_logDiffUpos = np.zeros([nbins,ntimes])
    #P_logDiffUneg = np.zeros([nbins,ntimes])

    dumpTimesString = ''

    for itime in range(ntimes) :
        dx = np.empty(0)

        dumpTimesString = dumpTimesString + '"P(t='+'{:.2e}'.format(times[itime])+' s)" '
        fname = DI['cdir']+"data_py/data_py_" + "{0:0>5}".format(itime) + ".npy"
        data = np.load(fname)

        print("Processing time # %i of %i" %(itime+1, ntimes))
        x  = data[:,ipos]
        xf = data[:,iposf]
        xf = np.append(xf,x[-1]+(xf[-1]-x[-1]))    
        rho = data[:,irho]
        uvel = data[:,iuvel]
        vvel = data[:,ivvel]
        wvel = data[:,iwvel]
        dvisc = data[:,idvisc]

        wt1 = np.abs(np.abs(xf[1:])**cCoord - np.abs(xf[0:-1])**cCoord)
        i = np.where(xf[1:] * xf[0:-1] < 0)[0]
        wt1[i] = np.abs(np.abs(xf[i+1])**cCoord + np.abs(xf[i])**cCoord) 
        # the crossing between realizations is computed correctly, so we don't need to mask end points
        #maskCrossing = (xf[1:] > xf[0:-1])
        wt = ( wt1*rho if favre else wt1 ) #  don't need this: (wt1*rho if favre else wt1)*maskCrossing
        j = np.where(  ( uvel - umin ) > 0.0001*(umax-umin) )
        P_uvel[:,itime] , uvel_bins = compute_wpdf(uvel[j], wt[j], umin, umax, nbins)
        P_vvel[:,itime] , vvel_bins = compute_wpdf(vvel[j], wt[j], vmin, vmax, nbins)
        P_wvel[:,itime] , wvel_bins = compute_wpdf(wvel[j], wt[j], wmin, wmax, nbins)

        # dissipation computation
        uvel  = np.append(uvel,uvel[-1])  # get same number of entries as dx
        dvisc = np.append(dvisc,dvisc[-1]) 
        dx = (xf[1:] - xf[0:-1])
        du = ( uvel[1:] - uvel[0:-1] )
        dvisc = 0.5 * ( dvisc[1:] + dvisc[0:-1] )
        diss = dvisc * ( du / dx )**2.0 # uses dx from cell centers and averaged dvisc
        #don't compute dissipation at crossing and don't count points with negligible dissipation
        j = np.where( np.logical_and( dx > 0.0 , diss > 10**(-6) ) )
        P_diss[:,itime] , diss_bins = compute_wpdf(diss[j], wt[j], 0, 1000, nbins)
        P_logDiss[:,itime] , logDiss_bins = compute_wpdf(np.log10(diss[j]), wt[j], -6, 8, nbins)

        #dxpdfs[:,itime], bins = compute_pdf(dx, dxmin, dxmax, nbins)
        dxpdfs[:,itime], bins = compute_pdf(np.log10(dx[j]), np.log10(dxmin), np.log10(dxmax), nbins)

        bdx = np.vstack([bins,dxpdfs.T]).T


#    head = " log(dx)[m]     "  + dumpTimesString
#    for i,time in enumerate(times) :
#        hi = "time_" + str(i+2) + "_" + str(time) 
#        hi = hi + (22-len(hi))*" "
#        head = head + hi

    var = np.vstack([uvel_bins,P_uvel.T]).T
    head = " u[m/s]   "  + dumpTimesString
    np.savetxt(DI['pdir']+'pdfs_uvel.dat', var, header=head, fmt="%15.8e ", comments=commentHdr)

    var = np.vstack([vvel_bins,P_vvel.T]).T
    head = " v[m/s]   "  + dumpTimesString
    np.savetxt(DI['pdir']+'pdfs_vvel.dat', var, header=head, fmt="%15.8e ", comments=commentHdr)

    var = np.vstack([wvel_bins,P_wvel.T]).T
    head = " w[m/s]   "  + dumpTimesString
    np.savetxt(DI['pdir']+'pdfs_wvel.dat', var, header=head, fmt="%15.8e ", comments=commentHdr)

    var = np.vstack([diss_bins,P_diss.T]).T
    head = " TKEdiss   "  + dumpTimesString
    np.savetxt(DI['pdir']+'pdfs_TKEdiss.dat', var, header=head, fmt="%15.8e ", comments=commentHdr)

    var = np.vstack([logDiss_bins,P_logDiss.T]).T
    head = " log10(TKEdiss)   "  + dumpTimesString
    np.savetxt(DI['pdir']+'pdfs_logTKEdiss.dat', var, header=head, fmt="%15.8e ", comments=commentHdr)
def get_dxpdfs(DI, nbins=60):

    dataFiles = glob.glob(DI['cdir']+"data_py/data_*.npy")
    ntimes    = len(dataFiles)
    nrlz      = get_nRlz(DI)
    varNames  = get_dataHeaderVars(DI)
    times     = get_inputFileParameter(DI, ("dumpTimes",))         # times or ypositions if spatial
    times     = times[0:ntimes]    # limit times to number of dataFiles, not all listed dumpTimes
    nvar      = len(varNames)
    dxmin     = get_inputFileParameter(DI, ("params", "dxmin"))
    dxmax     = get_inputFileParameter(DI, ("params", "dxmax"))
    L         = get_inputFileParameter(DI, ("params", "domainLength"))
    x0, xL    = get_domainBounds(DI) 

    dxmin *= L
    dxmax *= L
    try :
        iposf = varNames.index("posf")
    except :
        raise ValueError("In basic_stats: no posf variable found")
    try :
        ipos = varNames.index("pos")
    except :
        raise ValueError("In basic_stats: no pos variable found")

    dxpdfs = np.zeros((nbins, ntimes))

    for itime in range(ntimes) :
        dx = np.empty(0)

        fname = DI['cdir']+"data_py/data_py_" + "{0:0>5}".format(itime) + ".npy"
        data_all = np.load(fname)
        posf_all   = data_all[:,1]
        dx_all = posf_all[1:]-posf_all[0:-1]

        istarts = np.where(dx_all <0.0)[0] + 1
        istarts = np.insert(istarts, 0, 0.0)

        iends = np.where(dx_all < 0.0)[0]
        iends = np.append(iends, len(posf_all)-1)
        nrlz = len(istarts)  # some sims end early so compute nrlz for each time

        for irlz in range(nrlz) : 
            print("Processing time # %i of %i; for realization %i of %i" %(itime+1, ntimes, irlz+1, nrlz))
            #data = get_data_realization(DI, itime, irlz)
            i_s = istarts[irlz]
            i_e = iends[irlz]

            data = data_all[i_s:i_e+1, :]
            x  = data[:,ipos]
            xf = data[:,iposf]
            xf = np.append(xf,x[-1]+(xf[-1]-x[-1]))       
            dx = np.append(dx, (xf[1:] - xf[0:-1]))


        #dxpdfs[:,itime], bins = compute_pdf(dx, dxmin, dxmax, nbins)
        dxpdfs[:,itime], bins = compute_pdf(np.log10(dx), np.log10(dxmin), np.log10(dxmax), nbins)

        bdx = np.vstack([bins,dxpdfs.T]).T


    head = " log(dx)[m]     "
    for i,time in enumerate(times) :
        hi = "loc_" + str(i+2) + "_" + str(time) 
        hi = hi + (22-len(hi))*" "
        head = head + hi
    np.savetxt(DI['pdir']+'dxpdfs.dat', bdx, header=head, fmt="%15.8e ", comments=commentHdr)