def graph_experimental_data(DATA_DIR, OUTPUT_DIR): # make output directory DIR = os.path.join(OUTPUT_DIR, 'experimental') if not os.path.exists(DIR): os.makedirs(DIR) # concentration range low = 0.0 high = 1.0 # get the csv data in a list Csvs = get_data(DATA_DIR) # filnames # filnames fnames = [f.split('.csv')[0] for f in os.listdir(DATA_DIR)] fig = plt.figure() # export a graph for the fitting of the integral reaction rate Plot = os.path.join(DIR, 'experimental_conversion.' + graph_format) for indx, Csv in enumerate(Csvs): df = pd.read_csv(Csv) # read data file conversion, time, temperature = read_filtrated_datafile(df, low, high) # read variable units timeUnits, tempUnits = read_units(df) plt.scatter(time, conversion, s=10, label=str(temperature) + tempUnits) # export experimental data data = { 'time': time, 'conversion': conversion, 'temperature': temperature, 'temperature_units': tempUnits } df = pd.DataFrame(data) csv_name = fnames[indx] + '_experimental_conversion.csv' df.to_csv(os.path.join(DIR, csv_name), index=False) plt.xlim(0, ) plt.ylim(0, 1.0) plt.legend() plt.ylabel('conversion') plt.xlabel('time [' + timeUnits + ']') plt.tight_layout() plt.savefig(Plot, format=graph_format, dpi=graph_dpi) plt.close() # to avoid memory warnings
def measures2heatmaps(OUTPUT_DIR): ext = [ 'integral_regression', 'differential_regression', 'conversion_regression' ] # loop over directories for DIR in ext: # set working directory WDIR = os.path.join(OUTPUT_DIR, DIR) # get all csvs in the directory Csvs = get_data(WDIR) # filnames fnames = [f.split('.csv')[0] for f in os.listdir(WDIR) if 'csv' in f] # loop over the csv files in this DIR for indx, Csv in enumerate(Csvs): df = pd.read_csv(Csv) fname = fnames[indx] df2heatmap(WDIR, df, fname)
def export_experimental_reaction(DATA_DIR, OUTPUT_DIR, pdeg, npoints): # pdeg: degree of the polynomial # npoints: numper of polynomial interpolation points # make output directory DIR = os.path.join(OUTPUT_DIR, 'experimental') if not os.path.exists(DIR): os.makedirs(DIR) # concentration range low = 0.0 high = 1.0 # get the csv data in a list Csvs = get_data(DATA_DIR) # filnames fnames = [f.split('.csv')[0] for f in os.listdir(DATA_DIR)] for indx, Csv in enumerate(Csvs): # get experimental conversion df = pd.read_csv(Csv) conversion, time, temperature = read_filtrated_datafile(df, low, high) # experimental reaction rate from polynomial conversion dadt_polynomial = data2Polrate(Csv, low, high, pdeg, npoints) # experimental reaction rate from actual conversion dadt_numerical = data2Exprate(Csv, low, high) data = { 'time': time, 'dadt_polynomial': dadt_polynomial, 'dadt_numerical': dadt_numerical, 'temperature': temperature } rate_df = pd.DataFrame(data) csv_name = fnames[indx] + '_reaction_rate.csv' rate_df.to_csv(os.path.join(DIR, csv_name), index=False)
def criteria2desicionIndex(DATA_DIR,OUTPUT_DIR,measure,fitExp): def findMeasure(df,measure,modelname): # return the value for a particular measure and model return df[ df['model'] == modelname ][measure].iloc[0] def df2erarray(df,measure,models): # return all the measure values for all models err = np.zeros(len(models)) for i_model, modelname in enumerate(models): err[i_model] = findMeasure(df, measure, modelname) return err # output directory for desicions ODIR = os.path.join(OUTPUT_DIR,'desicion') if not os.path.exists(ODIR): os.makedirs(ODIR) # get names of the experimental data bnames = [f.split('.csv')[0] for f in os.listdir(DATA)] # loop over files (temperatures) for bn in bnames: # conversion regression directory DIR = os.path.join(OUTPUT_DIR,'conversion_regression') Csvs = get_data(DIR) convReg_csvs = [f for f in Csvs if 'conversion_regression_accuracy' in f] exprateFit_csvs = [f for f in Csvs if 'experimental_rate_fit_accuracy' in f] polrateFit_csvs = [f for f in Csvs if 'polynomial_rate_fit_accuracy' in f] # paths of the csv files convReg_csv = [i for i in convReg_csvs if bn in i][0] exprateFit_csv = [i for i in exprateFit_csvs if bn in i][0] polrateFit_csv = [i for i in polrateFit_csvs if bn in i][0] # integral regression directory DIR = os.path.join(OUTPUT_DIR,'integral_regression') Csvs = get_data(DIR) interateReg_csvs = [f for f in Csvs if 'integral_regression_accuracy' in f] # path of the csv file interateReg_csv = [i for i in interateReg_csvs if bn in i][0] # differential regression directory DIR = os.path.join(OUTPUT_DIR,'differential_regression') Csvs = get_data(DIR) diffrateReg_csvs = [f for f in Csvs if 'differential_regression_accuracy' in f] # path of the csv file diffrateReg_csv = [i for i in diffrateReg_csvs if bn in i][0] # get the modelnames models = pd.read_csv(convReg_csv)['model'].tolist() # get corresponding dataframes df = pd.read_csv( convReg_csv ) convReg_err = df2erarray(df,measure,models) if fitExp: df = pd.read_csv( exprateFit_csv ) rateFit_err = df2erarray(df,measure,models) else: df = pd.read_csv( polrateFit_csv ) rateFit_err = df2erarray(df,measure,models) df = pd.read_csv( interateReg_csv ) interateReg_err = df2erarray(df,measure,models) df = pd.read_csv( diffrateReg_csv ) diffrateReg_err = df2erarray(df,measure,models) # error data error_data = tuple(zip(convReg_err,rateFit_err,interateReg_err,diffrateReg_err)) # euclidean norm a = np.array(error_data[0]) b = np.zeros(len(a)) # base vector (ideal values) # calculate the L2_norm of the error vector for each model L2_norm = np.zeros(len(models)) for i_model, modelname in enumerate(models): a = np.array(error_data[i_model]) L2_norm[i_model] = np.linalg.norm(a-b) # get error fitting data measure_key = measure+'_L2_norm' data = { 'model': models, measure_key: L2_norm } des_df = pd.DataFrame(data) des_df.sort_values(by=[measure_key], inplace=True) # export csv Expname = os.path.join( ODIR, bn + '_' + measure + '_desicion.csv' ) des_df.to_csv(Expname,index=False)
# DIRECTORIES MAIN_DIR = os.getcwd() # current working directory DATA = os.path.join(MAIN_DIR, 'data') # data directory OUTPUT = os.path.join(MAIN_DIR, 'output') # output directory # limit conversion fraction low = 0.05 high = 0.95 # polynomial degree and interpolation points for the polynomial fit of the experimental conversion fraction pdeg = 9 npoints = 1000 # get data files Csvs = get_data(DATA) # plot and export solely the experimental data graph_experimental_data(DATA, OUTPUT) # perform linear regression on the integral rate experimental data data2integralFit(DATA, OUTPUT, modelNames, low, high) # perform non-linear regression on the exact conversion data2conversionFit(DATA, OUTPUT, modelNames, low, high) # perform non-linear regression on the differential rate experimental data data2differentialFit(DATA, OUTPUT, modelNames, low, high) # export reaction rate data export_experimental_reaction(DATA, OUTPUT, pdeg, npoints)
def ratedata2Fit(DATA_DIR,OUTPUT_DIR,modelNames,low,high,pdeg,npoints,fitExp): # low : lower limit for conversion fraction # high : upper limit for conversion fraction # DATA_DIR : directory containing data # OUTPUT_DIR : output directory # make output directory DIR = os.path.join(OUTPUT_DIR,'conversion_regression') if not os.path.exists(DIR): os.makedirs(DIR) # get csvs Csvs = get_data(DATA_DIR) # filnames fnames = os.listdir(DATA_DIR) for indx, Csv in enumerate(Csvs): # get dataframe df = pd.read_csv(Csv) # get experimental conversion conversion, time, temperature = read_filtrated_datafile(df,low,high) # read variable units timeUnits, tempUnits = read_units(df) # experimental reaction rate from polynomial conversion dadt_polynomial = data2Polrate(Csv,low,high,pdeg,npoints) # experimental reaction rate from actual conversion dadt_numerical = data2Exprate(Csv,low,high) # accuracy criteria ss_res = [] # sum of square residuals (ideal = 0) mse = [] # mean square error (ideal = 0) res_AEr = [] # residuals absolute error (ideal = 0) res_REr = [] # residuals relative error (ideal = 0) k_arrhenius = [] # Arrhenius rate constant # loop over all models for modelName in modelNames: # pick up a model model = Model(modelName) if modelName not in ['D2','D4']: # experimental integral reaction rate y = conversion # perform regression k, yfit = conversionRegression(time,conversion, modelName) # calculate the modeled differential reaction rate dadt_model = np.array( [k*model.f(a) for a in yfit] ) yfit = dadt_model else: # experimental integral reaction rate y = np.array([model.g(c) for c in conversion]) # perform regression k, yfit = integralRateRegression(time,conversion, modelName) # calculate the modeled differential reaction rate dadt_model = np.array( [k*model.f(a) for a in conversion] ) yfit = dadt_model if fitExp: y = dadt_numerical else: y = dadt_polynomial # calculate validation errors ss_res.append(ssRes(y,yfit)) mse.append(MSE(y,yfit)) res_AEr.append(resAEr(y,yfit)) res_REr.append(resREr(y,yfit)) k_arrhenius.append(k) # export regression accuracy data error_data = { 'model' : modelNames, 'ss_res' : ss_res, 'mse' : mse, 'resAEr' : res_AEr, 'resREr' : res_REr, 'temperature' : temperature, 'temperature_units': tempUnits } df = pd.DataFrame(error_data) prefix = fnames[indx].split('.csv')[0] if fitExp: df.to_csv(os.path.join(DIR,prefix + '_experimental_rate_fit_accuracy.csv'),index=False) else: df.to_csv(os.path.join(DIR,prefix + '_polynomial_rate_fit_accuracy.csv'),index=False)
def rateFitGraphs(DATA_DIR, OUTPUT_DIR, low, high, pdeg, npoints, fitExp): # get names (without format suffix) of the data csv files # bnames : base names bnames = [f.split('.csv')[0] for f in os.listdir(DATA_DIR)] # paths of the experimental data csv files data_Csvs = get_data(DATA_DIR) # metrics directory METRICS_DIR = os.path.join(OUTPUT_DIR, 'conversion_regression') # paths of the metrics from the conversion regression metrics_Csvs = get_data(METRICS_DIR) # filter proper csvs metrics_Csvs = [ f for f in metrics_Csvs if 'conversion_regression_accuracy' in f ] # zip data files and metrics data = sortOnData(bnames, data_Csvs) metrics = sortOnData(bnames, metrics_Csvs) data_and_metrics = list(zip(data, metrics)) # loop over all data for i_csv, csv in enumerate(data_and_metrics): # make directory for the graphs DIR = os.path.join(METRICS_DIR, 'png') DIR = os.path.join(DIR, 'rate_fit') GRAPH_DIR = os.path.join(DIR, bnames[i_csv]) if not os.path.exists(GRAPH_DIR): os.makedirs(GRAPH_DIR) # data dataframe data_df = pd.read_csv(csv[0]) # metrics dataframe metrics_df = pd.read_csv(csv[1]) # data conversion, time, temperature = read_filtrated_datafile( data_df, low, high) # read variable units timeUnits, tempUnits = read_units(data_df) # experimental reaction rate from polynomial conversion dadt_polynomial = data2Polrate(csv[0], low, high, pdeg, npoints) # experimental reaction rate from actual conversion dadt_numerical = data2Exprate(csv[0], low, high) modelNames = metrics_df['model'].tolist() ks = metrics_df['k_arrhenius'].to_numpy() # calculate experimental reaction rate if fitExp: y = dadt_numerical else: y = dadt_polynomial x = time # loop over models for i_model, modelName in enumerate(modelNames): # pick up a model model = Model(modelName) # choose the corresponding arrhenius rate constant k = ks[i_model] if modelName not in ['D2', 'D4']: # calculate the modeled differential reaction rate tfit = np.linspace(time[0], time[-1], num=npoints) yfit = np.array([model.alpha(t, k) for t in tfit]) dadt_model = np.array([k * model.f(a) for a in yfit]) yfit = dadt_model xfit = tfit # export a graph for the fitting of the integral reaction rate fig = plt.figure() if fitExp: ext = '_experimental_rate_fit.' else: ext = '_polynomial_rate_fit.' fname = modelName + '_' + bnames[i_csv] + ext + graph_format Plot = os.path.join(GRAPH_DIR, fname) plt.scatter(x, y, s=10, label='experimental') plt.plot(xfit, yfit, lw=lwidth, label=modelName) plt.legend() plt.ylabel(r'reaction rate') plt.xlabel('time [' + timeUnits + ']') plt.tight_layout() plt.savefig(Plot, format=graph_format, dpi=graph_dpi) plt.close() # to avoid memory warnings
def integralRegressionGraphs(DATA_DIR, OUTPUT_DIR, low, high, npoints): # get names (without format suffix) of the data csv files # bnames : base names bnames = [f.split('.csv')[0] for f in os.listdir(DATA_DIR)] # paths of the experimental data csv files data_Csvs = get_data(DATA_DIR) # metrics directory METRICS_DIR = os.path.join(OUTPUT_DIR, 'integral_regression') # paths of the metrics from the integral regression metrics_Csvs = get_data(METRICS_DIR) # zip data files and metrics data = sortOnData(bnames, data_Csvs) metrics = sortOnData(bnames, metrics_Csvs) data_and_metrics = list(zip(data, metrics)) # loop over all data for i_csv, csv in enumerate(data_and_metrics): # make directory for the graphs DIR = os.path.join(METRICS_DIR, 'png') GRAPH_DIR = os.path.join(DIR, bnames[i_csv]) if not os.path.exists(GRAPH_DIR): os.makedirs(GRAPH_DIR) # data dataframe data_df = pd.read_csv(csv[0]) # metrics dataframe metrics_df = pd.read_csv(csv[1]) # data conversion, time, temperature = read_filtrated_datafile( data_df, low, high) # read variable units timeUnits, tempUnits = read_units(data_df) modelNames = metrics_df['model'].tolist() ks = metrics_df['k_arrhenius'].to_numpy() # loop over models for i_model, modelName in enumerate(modelNames): # pick up a model model = Model(modelName) # choose the corresponding arrhenius rate constant k = ks[i_model] # calculate experimental integral reaction rate y = np.array([model.g(a) for a in conversion]) x = time # fit tfit = np.linspace(time[0], time[-1], num=npoints) yfit = k * tfit xfit = tfit # export a graph for the fitting of the integral reaction rate fig = plt.figure() fname = modelName + '_' + bnames[ i_csv] + '_integral_regression.' + graph_format Plot = os.path.join(GRAPH_DIR, fname) plt.scatter(x, y, s=10, label='experimental') plt.plot(xfit, yfit, lw=lwidth, label=r'kt') plt.legend() plt.ylabel(r'g (a)') plt.xlabel('time [' + timeUnits + ']') plt.tight_layout() plt.savefig(Plot, format=graph_format, dpi=graph_dpi) plt.close() # to avoid memory warnings
def integral_isoconversional(DATA_DIR, OUTPUT_DIR, low, high): def interpolate_time(y, x, interpolated_x): # arguments # x (numpy array) : conversion # y (numpy array) : time # returns # the time for the interpolated points y = time x = conversion interpol = interp1d(x, y, kind='nearest', fill_value="extrapolate") return interpol(interpolated_x) def isoconversional_enthalpy(time, temperature): # arguments # time (numpy array) # temperature (numpy array) # returns # the activation enthalpy (Ea) in a list like [activation enthalpy, mean square error] # and the ln[g(a)A] factor # gas constant R = 8.31446261815324 # J K-1 mol-1 x = 1.0 / temperature y = np.log(time) x = x.reshape((-1, 1)) # linear regression for the logarithmic Arrhenius equation regr = LinearRegression() regr.fit(x, y) y_pred = regr.predict(x) Ea = regr.coef_[0] * R * 1.0e-3 # in kJ mol-1 gA = regr.intercept_ MSE = mean_squared_error(y, y_pred) R2 = r2_score(y_pred, y) return [Ea, MSE], gA Csvs = get_data(DATA_DIR) npoints = 10 interpolated_conversion = np.linspace(low, high, npoints) isoconversional_data = {'conversion': interpolated_conversion} for csv in Csvs: df = pd.read_csv(csv) # read a data file conversion, time, temperature = read_filtrated_datafile(df, low, high) if df['temperature units'][0] == 'C': theta = df['temperature'].to_numpy() T = Celsius2Kelvin(theta) else: T = df['temperature'].to_numpy() temperature = T[0] # get time in the specified interpolated_conversion points interpolated_time = interpolate_time(time, conversion, interpolated_conversion) isoconversional_data.update({str(temperature): interpolated_time}) df = pd.DataFrame.from_dict(isoconversional_data) # linear regression y = df['conversion'].to_numpy() temperature = [float(i) for i in df.columns.values if i != 'conversion'] temperature = np.array(temperature) Ea = [] # Activation energy (kJ/mol) intercept = [] # Intercept ln[A g(a)] MSE = [] # Standard deviation R2 = [] # Determination coefficient dfSize = df.shape[0] for i in range(dfSize): time = df.iloc[i, 1::].to_numpy() enthalpy, gA = isoconversional_enthalpy(time, temperature) Ea.append(enthalpy[0]) MSE.append(enthalpy[1]) intercept.append(gA) isoconversional_data = { 'activation_enthalpy': Ea, 'std': MSE, 'intercept': intercept, 'conversion': y } df = pd.DataFrame.from_dict(isoconversional_data) # make output directory DIR = os.path.join(OUTPUT_DIR, 'isoconversional') if not os.path.exists(DIR): os.makedirs(DIR) df.to_csv(os.path.join(DIR, 'isoconversional_energy.csv'), index=False) pass