def eddyStats(DI) : grp = sp.check_output(["grep -h '^ *[1-9]' "+ DI['cdir']+"runtime/runtime_*"],shell=True).split('\n')[:-1] arr = np.empty( ( len(grp), len(grp[0].split()) ) ) for i in range(len(grp)) : arr[i,:] = np.fromstring(grp[i], sep=" ") L = arr[:,5] y0 = arr[:,6] neddies = np.size(L) / len(glob.glob(DI['cdir']+"runtime/runtime_*")) Lavg = np.mean(L) Lrms = np.std(L) y0avg = np.mean(y0) y0rms = np.std(y0) nbins = 100 P_y0, y0_bins = compute_pdf(y0, np.min(y0), np.max(y0), nbins) P_logL, logL_bins = compute_pdf(np.log10(L), np.min(np.log10(L)), np.max(np.log10(L)), nbins) #P_L, L_bins = compute_pdf(L, np.min(L), np.max(L), nbins) #fname = "../../data/"+caseN + "/post/eddyStats.dat" fname = DI['pdir']+"eddyStats.dat" with open(fname, 'w') as ofile : ofile.write("# neddies = %i\n" %(neddies)) ofile.write("# Lavg = %f\n" %(Lavg)) ofile.write("# Lrms = %f\n" %(Lrms)) ofile.write("# y0avg = %f\n" %(y0avg)) ofile.write("# y0rms = %f\n" %(y0rms)) ofile.write("#\n") ofile.write(commentHdr+" y0, PDF_y0, log10L, PDF_log10L\n") for i in range(nbins) : ofile.write("%12.5e %12.5e %12.5e %12.5e \n" %(y0_bins[i],P_y0[i],logL_bins[i],P_logL[i]))
def set_test_dists(self, test_fnames): # initialize empty histograms # since one histogram/pdf is computed for each element of test set # as below, it needs to be initialized at every testing for c in preferences.CLASSES: self.test_histograms[c], self.test_pdfs[c] = {}, {} for test_fname in test_fnames[c]: self.test_histograms[c][test_fname], self.test_pdfs[c][test_fname] = {}, {} for o in preferences.OBSERVABLES: self.test_histograms[c][test_fname][o] = data_tools.initialize_histogram(o) self.test_pdfs[c][test_fname][o] = [] # compute histograms for each class (using test set) for c in preferences.CLASSES: for test_fname in test_fnames[c]: data = np.load(test_fname) data_A, data_B = data_tools.extract_individual_data(data) obs_data = data_tools.compute_observables(data_A, data_B) for o in preferences.OBSERVABLES: self.test_histograms[c][test_fname][o] = data_tools.compute_histogram_1D(o, obs_data[o]) for c in preferences.CLASSES: for test_fname in test_fnames[c]: for o in preferences.OBSERVABLES: self.test_pdfs[c][test_fname][o] = data_tools.compute_pdf(o, self.test_histograms[c][test_fname][o])
def set_train_dists(self, train_fnames): # initialize empty histograms # since histogram is accumulated as below, it needs to be initialized # at every training for c in preferences.CLASSES: self.train_histograms[c] = {} self.train_pdfs[c] = {} for o in preferences.OBSERVABLES: self.train_histograms[c][o] = data_tools.initialize_histogram(o) # compute histograms for each class (using training set) for c in preferences.CLASSES: for train_fname in train_fnames[c]: data = np.load(train_fname) data_A, data_B = data_tools.extract_individual_data(data) obs_data = data_tools.compute_observables(data_A, data_B) for o in preferences.OBSERVABLES: self.train_histograms[c][o] += data_tools.compute_histogram_1D(o, obs_data[o]) for c in preferences.CLASSES: for o in preferences.OBSERVABLES: self.train_pdfs[c][o] = data_tools.compute_pdf(o, self.train_histograms[c][o])
def train(self, train_fnames): train_histograms1D = {} # initialize empty histograms for o in preferences.OBSERVABLES: train_histograms1D[o], self.train_pdfs1D[o] = {}, {} for c in preferences.CLASSES: train_histograms1D[o][c] = data_tools.initialize_histogram(o) # compute histograms for each class for c in preferences.CLASSES: for file_path in train_fnames[c]: data = np.load(file_path) data_A, data_B = data_tools.extract_individual_data(data) obs_data = data_tools.compute_observables(data_A, data_B) for o in preferences.OBSERVABLES: train_histograms1D[o][ c] += data_tools.compute_histogram_1D(o, obs_data[o]) for o in preferences.OBSERVABLES: for c in preferences.CLASSES: self.train_pdfs1D[o][c] = data_tools.compute_pdf( o, train_histograms1D[o][c])
if __name__ == "__main__": start_time = time.time() data_fnames = file_tools.get_data_fnames('../data/gender_compositions/') histograms1D = {} pdfs1D = {} # initialize empty histograms for o in preferences.OBSERVABLES: histograms1D[o], pdfs1D[o] = {}, {} for c in preferences.CLASSES_RAW: histograms1D[o][c] = data_tools.initialize_histogram(o) # compute histograms for each class for c in preferences.CLASSES_RAW: for file_path in data_fnames[c]: data = np.load(file_path) data_A, data_B = data_tools.extract_individual_data(data) obs_data = data_tools.compute_observables(data_A, data_B) for o in preferences.OBSERVABLES: histograms1D[o][c] += data_tools.compute_histogram_1D(o, obs_data[o]) for o in preferences.OBSERVABLES: for c in preferences.CLASSES_RAW: pdfs1D[o][c] = data_tools.compute_pdf(o, histograms1D[o][c]) plot_pdf(pdfs1D) elapsed_time = time.time() - start_time print('\nTime elapsed %2.2f sec' %elapsed_time)
def get_pdfs(DI, favre=False, nbins=60): #-------------------------------------------------------------------------------------------- dataFiles = glob.glob(DI['cdir']+"data_py/data_*.npy") ntimes = len(dataFiles) nrlz = get_nRlz(DI) varNames = get_dataHeaderVars(DI) times = get_inputFileParameter(DI, ("dumpTimes",)) # times or ypositions if spatial times = times[0:ntimes] # limit times to number of dataFiles, not all listed dumpTimes nvar = len(varNames) cCoord = get_inputFileParameter(DI, ("params", "cCoord")) dxmin = get_inputFileParameter(DI, ("params", "dxmin")) dxmax = get_inputFileParameter(DI, ("params", "dxmax")) L = get_inputFileParameter(DI, ("params", "domainLength")) umax = get_inputFileParameter(DI, ("initParams", "vel_max")) umin = get_inputFileParameter(DI, ("initParams", "vel_min")) vmin = -0.05*np.abs(umax-umin) vmax = 0.05*np.abs(umax-umin) wmin = -0.05*np.abs(umax-umin) wmax = 0.05*np.abs(umax-umin) x0, xL = get_domainBounds(DI) dxmin *= L dxmax *= L try : iposf = varNames.index("posf") except : raise ValueError("In basic_stats: no posf variable found") try : ipos = varNames.index("pos") except : raise ValueError("In basic_stats: no pos variable found") try : irho = varNames.index("rho") except : irho = -1 try : idvisc = varNames.index("dvisc") except : idvisc = -1 try : imixf = varNames.index("mixf") except : imixf = -1 try : iuvel = varNames.index("uvel") except : iuvel = -1 try : ivvel = varNames.index("vvel") except : ivvel = -1 try : iwvel = varNames.index("wvel") except : iwvel = -1 if irho == -1 and favre : raise ValueError("In basic_stats: favre is true, but there is no rho in data file") dxpdfs = np.zeros((nbins, ntimes)) P_uvel = np.zeros([nbins,ntimes]) P_vvel = np.zeros([nbins,ntimes]) P_wvel = np.zeros([nbins,ntimes]) P_diss = np.zeros([nbins,ntimes]) P_logDiss = np.zeros([nbins,ntimes]) P_diffU = np.zeros([nbins,ntimes]) #P_logDiffUpos = np.zeros([nbins,ntimes]) #P_logDiffUneg = np.zeros([nbins,ntimes]) dumpTimesString = '' #-------------------------------------------------------------------------------------------- for itime in range(ntimes) : dx = np.empty(0) dumpTimesString = dumpTimesString + '"P(t='+'{:.2e}'.format(times[itime])+' s)" ' fname = DI['cdir']+"data_py/data_py_" + "{0:0>5}".format(itime) + ".npy" data = np.load(fname) print("Processing time # %i of %i" %(itime+1, ntimes)) x = data[:,ipos] xf = data[:,iposf] xf = np.append(xf,x[-1]+(xf[-1]-x[-1])) rho = data[:,irho] uvel = data[:,iuvel] vvel = data[:,ivvel] wvel = data[:,iwvel] dvisc = data[:,idvisc] wt1 = np.abs(np.abs(xf[1:])**cCoord - np.abs(xf[0:-1])**cCoord) i = np.where(xf[1:] * xf[0:-1] < 0)[0] wt1[i] = np.abs(np.abs(xf[i+1])**cCoord + np.abs(xf[i])**cCoord) # the crossing between realizations is computed correctly, so we don't need to mask end points #maskCrossing = (xf[1:] > xf[0:-1]) wt = ( wt1*rho if favre else wt1 ) # don't need this: (wt1*rho if favre else wt1)*maskCrossing #-------------- j = np.where( ( uvel - umin ) > 0.0001*(umax-umin) ) P_uvel[:,itime] , uvel_bins = compute_wpdf(uvel[j], wt[j], umin, umax, nbins) P_vvel[:,itime] , vvel_bins = compute_wpdf(vvel[j], wt[j], vmin, vmax, nbins) P_wvel[:,itime] , wvel_bins = compute_wpdf(wvel[j], wt[j], wmin, wmax, nbins) # dissipation computation uvel = np.append(uvel,uvel[-1]) # get same number of entries as dx dvisc = np.append(dvisc,dvisc[-1]) dx = (xf[1:] - xf[0:-1]) du = ( uvel[1:] - uvel[0:-1] ) dvisc = 0.5 * ( dvisc[1:] + dvisc[0:-1] ) diss = dvisc * ( du / dx )**2.0 # uses dx from cell centers and averaged dvisc #don't compute dissipation at crossing and don't count points with negligible dissipation j = np.where( np.logical_and( dx > 0.0 , diss > 10**(-6) ) ) P_diss[:,itime] , diss_bins = compute_wpdf(diss[j], wt[j], 0, 1000, nbins) P_logDiss[:,itime] , logDiss_bins = compute_wpdf(np.log10(diss[j]), wt[j], -6, 8, nbins) #-------------- #dxpdfs[:,itime], bins = compute_pdf(dx, dxmin, dxmax, nbins) dxpdfs[:,itime], bins = compute_pdf(np.log10(dx[j]), np.log10(dxmin), np.log10(dxmax), nbins) bdx = np.vstack([bins,dxpdfs.T]).T #-------------- # head = " log(dx)[m] " + dumpTimesString # for i,time in enumerate(times) : # hi = "time_" + str(i+2) + "_" + str(time) # hi = hi + (22-len(hi))*" " # head = head + hi var = np.vstack([uvel_bins,P_uvel.T]).T head = " u[m/s] " + dumpTimesString np.savetxt(DI['pdir']+'pdfs_uvel.dat', var, header=head, fmt="%15.8e ", comments=commentHdr) var = np.vstack([vvel_bins,P_vvel.T]).T head = " v[m/s] " + dumpTimesString np.savetxt(DI['pdir']+'pdfs_vvel.dat', var, header=head, fmt="%15.8e ", comments=commentHdr) var = np.vstack([wvel_bins,P_wvel.T]).T head = " w[m/s] " + dumpTimesString np.savetxt(DI['pdir']+'pdfs_wvel.dat', var, header=head, fmt="%15.8e ", comments=commentHdr) var = np.vstack([diss_bins,P_diss.T]).T head = " TKEdiss " + dumpTimesString np.savetxt(DI['pdir']+'pdfs_TKEdiss.dat', var, header=head, fmt="%15.8e ", comments=commentHdr) var = np.vstack([logDiss_bins,P_logDiss.T]).T head = " log10(TKEdiss) " + dumpTimesString np.savetxt(DI['pdir']+'pdfs_logTKEdiss.dat', var, header=head, fmt="%15.8e ", comments=commentHdr)
def get_dxpdfs(DI, nbins=60): #-------------------------------------------------------------------------------------------- dataFiles = glob.glob(DI['cdir']+"data_py/data_*.npy") ntimes = len(dataFiles) nrlz = get_nRlz(DI) varNames = get_dataHeaderVars(DI) times = get_inputFileParameter(DI, ("dumpTimes",)) # times or ypositions if spatial times = times[0:ntimes] # limit times to number of dataFiles, not all listed dumpTimes nvar = len(varNames) dxmin = get_inputFileParameter(DI, ("params", "dxmin")) dxmax = get_inputFileParameter(DI, ("params", "dxmax")) L = get_inputFileParameter(DI, ("params", "domainLength")) x0, xL = get_domainBounds(DI) dxmin *= L dxmax *= L try : iposf = varNames.index("posf") except : raise ValueError("In basic_stats: no posf variable found") try : ipos = varNames.index("pos") except : raise ValueError("In basic_stats: no pos variable found") dxpdfs = np.zeros((nbins, ntimes)) #-------------------------------------------------------------------------------------------- for itime in range(ntimes) : dx = np.empty(0) fname = DI['cdir']+"data_py/data_py_" + "{0:0>5}".format(itime) + ".npy" data_all = np.load(fname) posf_all = data_all[:,1] dx_all = posf_all[1:]-posf_all[0:-1] istarts = np.where(dx_all <0.0)[0] + 1 istarts = np.insert(istarts, 0, 0.0) iends = np.where(dx_all < 0.0)[0] iends = np.append(iends, len(posf_all)-1) nrlz = len(istarts) # some sims end early so compute nrlz for each time for irlz in range(nrlz) : print("Processing time # %i of %i; for realization %i of %i" %(itime+1, ntimes, irlz+1, nrlz)) #data = get_data_realization(DI, itime, irlz) i_s = istarts[irlz] i_e = iends[irlz] data = data_all[i_s:i_e+1, :] x = data[:,ipos] xf = data[:,iposf] xf = np.append(xf,x[-1]+(xf[-1]-x[-1])) dx = np.append(dx, (xf[1:] - xf[0:-1])) #-------------- #dxpdfs[:,itime], bins = compute_pdf(dx, dxmin, dxmax, nbins) dxpdfs[:,itime], bins = compute_pdf(np.log10(dx), np.log10(dxmin), np.log10(dxmax), nbins) bdx = np.vstack([bins,dxpdfs.T]).T #-------------- head = " log(dx)[m] " for i,time in enumerate(times) : hi = "loc_" + str(i+2) + "_" + str(time) hi = hi + (22-len(hi))*" " head = head + hi np.savetxt(DI['pdir']+'dxpdfs.dat', bdx, header=head, fmt="%15.8e ", comments=commentHdr)