def column_of_data(data_file_path, start, column, end="-1", units=""): """This function extracts a column of data from a ProCoDA data file. Parameters ---------- data_file_path : string File path. If the file is in the working directory, then the file name is sufficient. start : int Index of first row of data to extract from the data file end : int, optional Index of last row of data to extract from the data Defaults to -1, which extracts all the data in the file column : int or string int: Index of the column that you want to extract. Column 0 is time. The first data column is column 1. string: Name of the column header that you want to extract units : string, optional The units you want to apply to the data, e.g. 'mg/L'. Defaults to "" which indicates no units Returns ------- numpy array Experimental data with the units applied. Examples -------- column_of_data(Reactor_data.txt, 0, 1, -1, "mg/L") """ if not isinstance(start, int): start = int(start) if not isinstance(end, int): end = int(end) df = pd.read_csv(data_file_path, delimiter='\t') if units == "": if isinstance(column, int): data = np.array(pd.to_numeric(df.iloc[start:end, column])) else: df[column][0:len(df)] else: if isinstance(column, int): data = np.array(pd.to_numeric(df.iloc[start:end, column])) * u(units) else: df[column][0:len(df)] * u(units) return data
def Solver_AD_Pe(t_data, C_data, theta_guess, C_bar_guess): """Use non-linear least squares to fit the function Tracer_AD_Pe(t_seconds, t_bar, C_bar, Pe) to reactor data. :param t_data: Array of times with units :type t_data: float list :param C_data: Array of tracer concentration data with units :type C_data: float list :param theta_guess: Estimate of time spent in one CMFR with units. :type theta_guess: float :param C_bar_guess: Estimate of average concentration with units ((mass of tracer)/(volume of one CMFR)) :type C_bar_guess: float :return: tuple of * **theta** (*float*)- Residence time in seconds * **C_bar** (*float*) - Average concentration with same units as C_bar_guess * **Pe** (*float*) - Peclet number that best fits the data """ #remove time=0 data to eliminate divide by zero error t_data = t_data[1:-1] C_data = C_data[1:-1] C_unitless = C_data.magnitude C_units = str(C_bar_guess.units) t_seconds = (t_data.to(u.s)).magnitude # assume that a guess of 1 reactor in series is close enough to get a solution p0 = [theta_guess.to(u.s).magnitude, C_bar_guess.magnitude,5] popt, pcov = curve_fit(Tracer_AD_Pe, t_seconds, C_unitless, p0, bounds=(0.01,np.inf)) Solver_theta = popt[0]*u.s Solver_C_bar = popt[1]*u(C_units) Solver_Pe = popt[2] Reactor_results = collections.namedtuple('Reactor_results', 'theta C_bar Pe') AD = Reactor_results(theta=Solver_theta, C_bar=Solver_C_bar, Pe=Solver_Pe) return AD
def Solver_CMFR_N(t_data, C_data, theta_guess, C_bar_guess): """Use non-linear least squares to fit the function Tracer_CMFR_N(t_seconds, t_bar, C_bar, N) to reactor data. :param t_data: Array of times with units :type t_data: float list :param C_data: Array of tracer concentration data with units :type C_data: float list :param theta_guess: Estimate of time spent in one CMFR with units. :type theta_guess: float :param C_bar_guess: Estimate of average concentration with units ((mass of tracer)/(volume of one CMFR)) :type C_bar_guess: float :return: tuple of * **theta** (*float*)- Residence time in seconds * **C_bar** (*float*) - Average concentration with same units as C_bar_guess * **N** (*float*)- Number of CMFRS in series that best fit the data """ C_unitless = C_data.magnitude C_units = str(C_bar_guess.units) t_seconds = (t_data.to(u.s)).magnitude # assume that a guess of 1 reactor in series is close enough to get a solution p0 = [theta_guess.to(u.s).magnitude, C_bar_guess.magnitude,1] popt, pcov = curve_fit(Tracer_CMFR_N, t_seconds, C_unitless, p0) Solver_theta = popt[0]*u.s Solver_C_bar = popt[1]*u(C_units) Solver_N = popt[2] Reactor_results = collections.namedtuple('Reactor_results','theta C_bar N') CMFR = Reactor_results(theta=Solver_theta, C_bar=Solver_C_bar, N=Solver_N) return CMFR
def column_of_data(path, start, column, end=None, units=""): """This function extracts a column of data from a ProCoDA data file. Note: Column 0 is time. The first data column is column 1. :param path: The file path of the ProCoDA data file :type path: string :param start: Index of first row of data to extract, inclusive :type start: int :param end: Index of last row of data to extract until, exclusive. Defaults to extracting all rows. :type end: int, optional :param column: Index or label of the column that you want to extract :type column: int or string :param units: The units you want to apply to the data, e.g. 'mg/L'. Defaults to "" (dimensionless). :type units: string, optional :return: The column of data :rtype: numpy.ndarray in units of [units] :Examples: .. code-block:: python data = column_of_data("Reactor_data.txt", 0, 1, -1, "mg/L") """ df = pd.read_csv(path, delimiter='\t') if isinstance(column, int): data = df.iloc[start:end, column] else: data = df.iloc[start:end][column] num_data = data[pd.to_numeric(data, errors='coerce').notnull()] return np.array(num_data) * u(units)
def column_of_data(path, start, column, end="-1", units=""): """This function extracts a column of data from a ProCoDA data file. Note: Column 0 is time. The first data column is column 1. :param path: The file path of the ProCoDA data file. If the file is in the working directory, then the file name is sufficient. :type path: string :param start: Index of first row of data to extract from the data file :type start: int :param end: Index of last row of data to extract from the data. Defaults to last row :type end: int, optional :param column: Index of the column that you want to extract OR name of the column header that you want to extract :type column: int or string :param units: The units you want to apply to the data, e.g. 'mg/L'. Defaults to "" (dimensionless) :type units: string, optional :return: Experimental data with the units applied. :rtype: numpy.array :Examples: .. code-block:: python data = column_of_data("Reactor_data.txt", 0, 1, -1, "mg/L") """ if not isinstance(start, int): start = int(start) if not isinstance(end, int): end = int(end) df = pd.read_csv(path, delimiter='\t') if units == "": if isinstance(column, int): data = np.array(pd.to_numeric(df.iloc[start:end, column])) else: df[column][0:len(df)] else: if isinstance(column, int): data = np.array(pd.to_numeric(df.iloc[start:end, column])) * u(units) else: df[column][0:len(df)] * u(units) return data
def Solver_AD_Pe(t_data, C_data, theta_guess, C_bar_guess): """Use non-linear least squares to fit the function Tracer_AD_Pe(t_seconds, t_bar, C_bar, Pe) to reactor data. Parameters ---------- t_data : float list Array of times with units C_data : float list Array of tracer concentration data with units theta_guess : float Estimate of time spent in one CMFR with units. C_bar_guess : float Estimate of average concentration with units (Mass of tracer)/(volume of one CMFR) Returns ------- tuple theta : float residence time in seconds C_bar : float average concentration with same units as C_bar_guess Pe : float peclet number that best fits the data """ C_unitless = C_data.magnitude C_units = str(C_bar_guess.units) t_seconds = (t_data.to(u.s)).magnitude # assume that a guess of 1 reactor in series is close enough to get a solution p0 = [theta_guess.to(u.s).magnitude, C_bar_guess.magnitude, 5] popt, pcov = curve_fit(Tracer_AD_Pe, t_seconds, C_unitless, p0) Solver_theta = popt[0] * u.s Solver_C_bar = popt[1] * u(C_units) Solver_Pe = popt[2] Reactor_results = collections.namedtuple('Reactor_results', 'theta C_bar Pe') AD = Reactor_results(theta=Solver_theta, C_bar=Solver_C_bar, Pe=Solver_Pe) return AD
def perform_function_on_state(func, dates, state, column, units="", path="", extension=".xls"): """Performs the function given on each state of the data for the given state in the given column and outputs the result for each instance of the state Note: Column 0 is time. The first data column is column 1. :param func: A function that will be applied to data from each instance of the state :type func: function :param dates: A single date or list of dates for which data was recorded, formatted "M-D-YYYY" :type dates: string or string list :param state: The state ID number for which data should be extracted :type state: int :param column: Index of the column that you want to extract OR header of the column that you want to extract :type column: int or string :param units: The units you want to apply to the data, e.g. 'mg/L'. Defaults to "" (dimensionless) :type units: string, optional :param path: The file path of the ProCoDA data file. If the file is in the working directory, then the file name is sufficient. :type path: string :param extension: The file extension of the tab delimited file. Defaults to ".xls" if no argument is passed in :type extension: string, optional :requires: func takes in a list of data with units and outputs the correct units :return: The outputs of the given function for each instance of the given state :type: list :Examples: .. code-block:: python def avg_with_units(lst): num = np.size(lst) acc = 0 for i in lst: acc = i + acc return acc / num data_avgs = perform_function_on_state(avg_with_units, ["6-19-2013", "6-20-2013"], 1, 28, "mL/s") """ data_agg = [] day = 0 first_day = True overnight = False if not isinstance(dates, list): dates = [dates] for d in dates: state_file = path + "statelog " + d + extension data_file = path + "datalog " + d + extension states = pd.read_csv(state_file, delimiter='\t') data = pd.read_csv(data_file, delimiter='\t') states = np.array(states) data = np.array(data) # get the start and end times for the state state_start_idx = states[:, 1] == state state_start = states[state_start_idx, 0] state_end_idx = np.append( [False], state_start_idx[0:(np.size(state_start_idx) - 1)]) state_end = states[state_end_idx, 0] if overnight: state_start = np.insert(state_start, 0, 0) state_end = np.insert(state_end, 0, states[0, 0]) if state_start_idx[-1]: state_end.append(data[0, -1]) # get the corresponding indices in the data array data_start = [] data_end = [] for i in range(np.size(state_start)): add_start = True for j in range(np.size(data[:, 0])): if (data[j, 0] > state_start[i]) and add_start: data_start.append(j) add_start = False if (data[j, 0] > state_end[i]): data_end.append(j - 1) break if first_day: start_time = data[1, 0] # extract data at those times for i in range(np.size(data_start)): if isinstance(column, int): c = data[data_start[i]:data_end[i], column] else: c = data[column][data_start[i]:data_end[i]] if overnight and i == 0: data_agg = np.insert(data_agg[-1], np.size(data_agg[-1][:]), c) else: data_agg.append(c) day += 1 if first_day: first_day = False if state_start_idx[-1]: overnight = True output = np.zeros(np.size(data_agg)) for i in range(np.size(data_agg)): if units != "": output[i] = func(data_agg[i] * u(units)).magnitude else: output[i] = func(data_agg[i]) if units != "": return output * func(data_agg[i] * u(units)).units else: return output
def read_state(dates, state, column, units="", path="", extension=".xls"): """Reads a ProCoDA file and outputs the data column and time vector for each iteration of the given state. Note: Column 0 is time. The first data column is column 1. :param dates: A single date or list of dates for which data was recorded, formatted "M-D-YYYY" :type dates: string or string list :param state: The state ID number for which data should be extracted :type state: int :param column: Index of the column that you want to extract OR header of the column that you want to extract :type column: int or string :param units: The units you want to apply to the data, e.g. 'mg/L'. Defaults to "" (dimensionless) :type units: string, optional :param path: The file path of the ProCoDA data file. If the file is in the working directory, then the file name is sufficient. :type path: string :param extension: The file extension of the tab delimited file. Defaults to ".xls" if no argument is passed in :type extension: string, optional :return: time (numpy.array) - Times corresponding to the data (with units) :return: data (numpy.array) - Data in the given column during the given state with units :Examples: .. code-block:: python time, data = read_state(["6-19-2013", "6-20-2013"], 1, 28, "mL/s") """ data_agg = [] day = 0 first_day = True overnight = False if not isinstance(dates, list): dates = [dates] for d in dates: state_file = path + "statelog " + d + extension data_file = path + "datalog " + d + extension states = pd.read_csv(state_file, delimiter='\t') data = pd.read_csv(data_file, delimiter='\t') states = np.array(states) data = np.array(data) # get the start and end times for the state state_start_idx = states[:, 1] == state state_start = states[state_start_idx, 0] state_end_idx = np.append( [False], state_start_idx[0:(np.size(state_start_idx) - 1)]) state_end = states[state_end_idx, 0] if overnight: state_start = np.insert(state_start, 0, 0) state_end = np.insert(state_end, 0, states[0, 0]) if state_start_idx[-1]: state_end.append(data[0, -1]) # get the corresponding indices in the data array data_start = [] data_end = [] for i in range(np.size(state_start)): add_start = True for j in range(np.size(data[:, 0])): if (data[j, 0] > state_start[i]) and add_start: data_start.append(j) add_start = False if (data[j, 0] > state_end[i]): data_end.append(j - 1) break if first_day: start_time = data[1, 0] # extract data at those times for i in range(np.size(data_start)): t = data[data_start[i]:data_end[i], 0] + day - start_time if isinstance(column, int): c = data[data_start[i]:data_end[i], column] else: c = data[column][data_start[i]:data_end[i]] if overnight and i == 0: data_agg = np.insert(data_agg[-1], np.size(data_agg[-1][:, 0]), np.vstack((t, c)).T) else: data_agg.append(np.vstack((t, c)).T) day += 1 if first_day: first_day = False if state_start_idx[-1]: overnight = True data_agg = np.vstack(data_agg) if units != "": return data_agg[:, 0] * u.day, data_agg[:, 1] * u(units) else: return data_agg[:, 0] * u.day, data_agg[:, 1]
def average_state(dates, state, column, units="", path="", extension=".tsv"): """Outputs the average value of the data for each instance of a state in the given ProCoDA files Note: Column 0 is time. The first data column is column 1. :param dates: A single date or list of dates for which data was recorded, formatted "M-D-YYYY" :type dates: string or string list :param state: The state ID number for which data should be extracted :type state: int :param column: Index of the column that you want to extract OR header of the column that you want to extract :type column: int or string :param units: The units you want to apply to the data, e.g. 'mg/L'. Defaults to "" (dimensionless) :type units: string, optional :param path: The file path of the ProCoDA data file. :type path: string :param extension: The file extension of the tab delimited file. Defaults to ".tsv" :type extension: string, optional :return: A list of averages for each instance of the given state :rtype: float list :Examples: .. code-block:: python data_avgs = average_state(["6-19-2013", "6-20-2013"], 1, 28, "mL/s") """ data_agg = [] day = 0 first_day = True overnight = False if not isinstance(dates, list): dates = [dates] for d in dates: state_file = path + "statelog " + d + extension data_file = path + "datalog " + d + extension states = pd.read_csv(state_file, delimiter='\t') data = pd.read_csv(data_file, delimiter='\t') states = np.array(states) data = np.array(data) # get the start and end times for the state state_start_idx = states[:, 1] == state state_start = states[state_start_idx, 0] state_end_idx = np.append( [False], state_start_idx[0:(np.size(state_start_idx) - 1)]) state_end = states[state_end_idx, 0] if overnight: state_start = np.insert(state_start, 0, 0) state_end = np.insert(state_end, 0, states[0, 0]) if state_start_idx[-1]: state_end.append(data[0, -1]) # get the corresponding indices in the data array data_start = [] data_end = [] for i in range(np.size(state_start)): add_start = True for j in range(np.size(data[:, 0])): if (data[j, 0] > state_start[i]) and add_start: data_start.append(j) add_start = False if (data[j, 0] > state_end[i]): data_end.append(j - 1) break if first_day: start_time = data[1, 0] # extract data at those times for i in range(np.size(data_start)): if isinstance(column, int): c = data[data_start[i]:data_end[i], column] else: c = data[column][data_start[i]:data_end[i]] if overnight and i == 0: data_agg = np.insert(data_agg[-1], np.size(data_agg[-1][:]), c) else: data_agg.append(c) day += 1 if first_day: first_day = False if state_start_idx[-1]: overnight = True averages = np.zeros(np.size(data_agg)) for i in range(np.size(data_agg)): averages[i] = np.average(data_agg[i]) if units != "": return averages * u(units) else: return averages
def perform_function_on_state(func, dates, state, column, units="", path="", extension=".xls"): """Performs the function given on each state of the data for the given state in the given column and outputs the result for each instance of the state Parameters ---------- func : function A function which will be applied to data from each instance of the state dates : string (list) A list of dates or single date for which data was recorded, in the form "M-D-Y" state : int The state ID number for which data should be extracted column : int or string int: Index of the column that you want to extract. Column 0 is time. The first data column is column 1. string: Name of the column header that you want to extract units : string, optional The units you want to apply to the data, e.g. 'mg/L'. Defaults to "" which indicates no units path : string, optional Optional argument of the path to the folder containing your ProCoDA files. Defaults to the current directory if no argument is passed in extension : string, optional The file extension of the tab delimited file. Defaults to ".xls" if no argument is passed in Returns ------- list The outputs of the given function for each instance of the given state Requires -------- func takes in a list of data with units and outputs the correct units Examples -------- def avg_with_units(lst): num = np.size(lst) acc = 0 for i in lst: acc = i + acc return acc / num data_avgs = perform_function_on_state(avg_with_units, ["6-19-2013", "6-20-2013"], 1, 28, "mL/s") """ data_agg = [] day = 0 first_day = True overnight = False if not isinstance(dates, list): dates = [dates] for d in dates: state_file = path + "statelog " + d + extension data_file = path + "datalog " + d + extension states = pd.read_csv(state_file, delimiter='\t') data = pd.read_csv(data_file, delimiter='\t') states = np.array(states) data = np.array(data) # get the start and end times for the state state_start_idx = states[:, 1] == state state_start = states[state_start_idx, 0] state_end_idx = np.append( [False], state_start_idx[0:(np.size(state_start_idx) - 1)]) state_end = states[state_end_idx, 0] if overnight: state_start = np.insert(state_start, 0, 0) state_end = np.insert(state_end, 0, states[0, 0]) if state_start_idx[-1]: state_end.append(data[0, -1]) # get the corresponding indices in the data array data_start = [] data_end = [] for i in range(np.size(state_start)): add_start = True for j in range(np.size(data[:, 0])): if (data[j, 0] > state_start[i]) and add_start: data_start.append(j) add_start = False if (data[j, 0] > state_end[i]): data_end.append(j - 1) break if first_day: start_time = data[1, 0] # extract data at those times for i in range(np.size(data_start)): if isinstance(column, int): c = data[data_start[i]:data_end[i], column] else: c = data[column][data_start[i]:data_end[i]] if overnight and i == 0: data_agg = np.insert(data_agg[-1], np.size(data_agg[-1][:]), c) else: data_agg.append(c) day += 1 if first_day: first_day = False if state_start_idx[-1]: overnight = True output = np.zeros(np.size(data_agg)) for i in range(np.size(data_agg)): if units != "": output[i] = func(data_agg[i] * u(units)).magnitude else: output[i] = func(data_agg[i]) if units != "": return output * func(data_agg[i] * u(units)).units else: return output
def read_state(dates, state, column, units="", path="", extension=".xls"): """Reads a ProCoDA file and outputs the data column and time vector for each iteration of the given state. Parameters ---------- dates : string (list) A list of dates or single date for which data was recorded, in the form "M-D-Y" state : int The state ID number for which data should be extracted column : int or string int: Index of the column that you want to extract. Column 0 is time. The first data column is column 1. string: Name of the column header that you want to extract units : string, optional The units you want to apply to the data, e.g. 'mg/L'. Defaults to "" which indicates no units path : string, optional Optional argument of the path to the folder containing your ProCoDA files. Defaults to the current directory if no argument is passed in extension : string, optional The file extension of the tab delimited file. Defaults to ".xls" if no argument is passed in Returns ------- time : numpy array Times corresponding to the data (with units) data : numpy array Data in the given column during the given state with units Examples -------- time, data = read_state(["6-19-2013", "6-20-2013"], 1, 28, "mL/s") """ data_agg = [] day = 0 first_day = True overnight = False if not isinstance(dates, list): dates = [dates] for d in dates: state_file = path + "statelog " + d + extension data_file = path + "datalog " + d + extension states = pd.read_csv(state_file, delimiter='\t') data = pd.read_csv(data_file, delimiter='\t') states = np.array(states) data = np.array(data) # get the start and end times for the state state_start_idx = states[:, 1] == state state_start = states[state_start_idx, 0] state_end_idx = np.append( [False], state_start_idx[0:(np.size(state_start_idx) - 1)]) state_end = states[state_end_idx, 0] if overnight: state_start = np.insert(state_start, 0, 0) state_end = np.insert(state_end, 0, states[0, 0]) if state_start_idx[-1]: state_end.append(data[0, -1]) # get the corresponding indices in the data array data_start = [] data_end = [] for i in range(np.size(state_start)): add_start = True for j in range(np.size(data[:, 0])): if (data[j, 0] > state_start[i]) and add_start: data_start.append(j) add_start = False if (data[j, 0] > state_end[i]): data_end.append(j - 1) break if first_day: start_time = data[1, 0] # extract data at those times for i in range(np.size(data_start)): t = data[data_start[i]:data_end[i], 0] + day - start_time if isinstance(column, int): c = data[data_start[i]:data_end[i], column] else: c = data[column][data_start[i]:data_end[i]] if overnight and i == 0: data_agg = np.insert(data_agg[-1], np.size(data_agg[-1][:, 0]), np.vstack((t, c)).T) else: data_agg.append(np.vstack((t, c)).T) day += 1 if first_day: first_day = False if state_start_idx[-1]: overnight = True data_agg = np.vstack(data_agg) if units != "": return data_agg[:, 0] * u.day, data_agg[:, 1] * u(units) else: return data_agg[:, 0] * u.day, data_agg[:, 1]