def set_parameter_values(watertxt_data, name, values): """ Set new values for a particular parameter contained in the watertxt_data dictionary. Parameters ---------- watertxt_data : dictionary Dictionary holding data found in WATER output text file. name : string String name of parameter values : numpy array Array of data values Returns ------- watertxt_data : dictionary Dictionary holding updated data values """ for parameter in watertxt_data['parameters']: if parameter["name"].split('(')[0].strip() == name.split("(")[0].strip(): param_mean, param_max, param_min = helpers.compute_simple_stats(data = values) parameter["data"] = values parameter["mean"] = param_mean parameter["max"] = param_max parameter["min"] = param_min return watertxt_data
def set_parameter_values(watertxt_data, name, values): """ Set new values for a particular parameter contained in the watertxt_data dictionary. Parameters ---------- watertxt_data : dictionary Dictionary holding data found in WATER output text file. name : string String name of parameter values : numpy array Array of data values Returns ------- watertxt_data : dictionary Dictionary holding updated data values """ for parameter in watertxt_data['parameters']: if parameter["name"].split('(')[0].strip() == name.split( "(")[0].strip(): param_mean, param_max, param_min = helpers.compute_simple_stats( data=values) parameter["data"] = values parameter["mean"] = param_mean parameter["max"] = param_max parameter["min"] = param_min return watertxt_data
def add_parameter(watertxt_data, name, param_data): """ Add a parameter to the list of existing parameters in watertxt_data and the list of column names. The new parameter will is appended to the end of the existing parameter list. Parameters ---------- watertxt_data : dictionary Dictionary holding data found in WATER output text file. name : string String name of parameter param_data : numpy array Array of parameter data values Returns ------- watertxt_data : dictionary Updated dictionary with the new parameter added. """ # add name to column names watertxt_data["column_names"].append(name) # compute simple stats param_mean, param_max, param_min = helpers.compute_simple_stats( data=param_data) # find last index indices = [] for parameter in watertxt_data["parameters"]: indices.append(parameter["index"]) # add to parameter list watertxt_data["parameters"].append({ "name": name, "index": max(indices) + 1, "data": param_data, "mean": param_mean, "max": param_max, "min": param_min }) return watertxt_data
def add_parameter(watertxt_data, name, param_data): """ Add a parameter to the list of existing parameters in watertxt_data and the list of column names. The new parameter will is appended to the end of the existing parameter list. Parameters ---------- watertxt_data : dictionary Dictionary holding data found in WATER output text file. name : string String name of parameter param_data : numpy array Array of parameter data values Returns ------- watertxt_data : dictionary Updated dictionary with the new parameter added. """ # add name to column names watertxt_data["column_names"].append(name) # compute simple stats param_mean, param_max, param_min = helpers.compute_simple_stats(data = param_data) # find last index indices = [] for parameter in watertxt_data["parameters"]: indices.append(parameter["index"]) # add to parameter list watertxt_data["parameters"].append({"name": name, "index": max(indices) + 1, "data": param_data, "mean": param_mean, "max": param_max, "min": param_min} ) return watertxt_data
def read_file_in(filestream): """ Read and process a WATER \*.txt file. Finds any parameter and its respective data. Parameters ---------- filestream : file object A python file object that contains an open data file. Returns ------- data : dictionary Returns a dictionary containing data found in data file. Notes ----- data = { "user": None, "date_created": None, "stationid": None, "column_names": None, "dates": [], "parameters": [], } The "parameters" key contains a list of dictionaries containing the parameters found in the data file See Also -------- create_parameter : Create new dictionary to hold parameter data """ # read all the lines in the filestream data_file = filestream.readlines() # regular expression patterns in data file patterns = { "user": "******", "date_created": "(Date:)\t(.+)", "stationid": "(StationID:)\t(.+)", "column_names": "(Date)\t(.+)", "data_row": "([0-9]{1,2}/[0-9]{1,2}/[0-9]{4})\t(.+)" } # initialize a dictionary to hold all the data of interest data = { "user": None, "date_created": None, "stationid": None, "column_names": None, "dates": [], "parameters": [] } # process file for line in data_file: # find match match_user = re.search(pattern = patterns["user"], string = line) match_date_created = re.search(pattern = patterns["date_created"], string = line) match_stationid = re.search(pattern = patterns["stationid"], string = line) match_column_names = re.search(pattern = patterns["column_names"], string = line) match_data_row = re.search(pattern = patterns["data_row"], string = line) # if match is found, add it to data dictionary if match_user: data["user"] = match_user.group(2) if match_date_created: data["date_created"] = match_date_created.group(2) if match_stationid: data["stationid"] = match_stationid.group(2) if match_column_names: data["column_names"] = match_column_names.group(2).split("\t") # create a dictionary for each column_name (excluding "Date") for name in data["column_names"]: parameter = create_parameter(name = name, index = data["column_names"].index(name), data = [], mean = None, max = None, min = None) data["parameters"].append(parameter) if match_data_row: # add date to data dictionary date = get_date(date_str = match_data_row.group(1)) data["dates"].append(date) for parameter in data["parameters"]: value = match_data_row.group(2).split("\t")[parameter["index"]] value = helpers.convert_to_float(value = value, helper_str = "parameter {} on {}".format(parameter["name"], date.strftime("%Y-%m-%d_%H.%M"))) parameter["data"].append(float(value)) # convert the date list to a numpy array data["dates"] = np.array(data["dates"]) # convert each parameter data list in data["parameter"] convert to a numpy array and # compute mean, max, and min for parameter in data["parameters"]: parameter["data"] = np.array(parameter["data"]) param_mean, param_max, param_min = helpers.compute_simple_stats(data = parameter["data"]) parameter["mean"] = param_mean parameter["max"] = param_max parameter["min"] = param_min # return data return data
def read_file_in(filestream): """ Read and process a WATER \*.txt file. Finds any parameter and its respective data. Parameters ---------- filestream : file object A python file object that contains an open data file. Returns ------- data : dictionary Returns a dictionary containing data found in data file. Notes ----- data = { "user": None, "date_created": None, "stationid": None, "column_names": None, "dates": [], "parameters": [], } The "parameters" key contains a list of dictionaries containing the parameters found in the data file See Also -------- create_parameter : Create new dictionary to hold parameter data """ # read all the lines in the filestream data_file = filestream.readlines() # regular expression patterns in data file patterns = { "user": "******", "date_created": "(Date:)\t(.+)", "stationid": "(StationID:)\t(.+)", "column_names": "(Date)\t(.+)", "data_row": "([0-9]{1,2}/[0-9]{1,2}/[0-9]{4})\t(.+)" } # initialize a dictionary to hold all the data of interest data = { "user": None, "date_created": None, "stationid": None, "column_names": None, "dates": [], "parameters": [] } # process file for line in data_file: # find match match_user = re.search(pattern=patterns["user"], string=line) match_date_created = re.search(pattern=patterns["date_created"], string=line) match_stationid = re.search(pattern=patterns["stationid"], string=line) match_column_names = re.search(pattern=patterns["column_names"], string=line) match_data_row = re.search(pattern=patterns["data_row"], string=line) # if match is found, add it to data dictionary if match_user: data["user"] = match_user.group(2) if match_date_created: data["date_created"] = match_date_created.group(2) if match_stationid: data["stationid"] = match_stationid.group(2) if match_column_names: data["column_names"] = match_column_names.group(2).split("\t") # create a dictionary for each column_name (excluding "Date") for name in data["column_names"]: parameter = create_parameter( name=name, index=data["column_names"].index(name), data=[], mean=None, max=None, min=None) data["parameters"].append(parameter) if match_data_row: # add date to data dictionary date = get_date(date_str=match_data_row.group(1)) data["dates"].append(date) for parameter in data["parameters"]: value = match_data_row.group(2).split("\t")[parameter["index"]] value = helpers.convert_to_float( value=value, helper_str="parameter {} on {}".format( parameter["name"], date.strftime("%Y-%m-%d_%H.%M"))) parameter["data"].append(float(value)) # convert the date list to a numpy array data["dates"] = np.array(data["dates"]) # convert each parameter data list in data["parameter"] convert to a numpy array and # compute mean, max, and min for parameter in data["parameters"]: parameter["data"] = np.array(parameter["data"]) param_mean, param_max, param_min = helpers.compute_simple_stats( data=parameter["data"]) parameter["mean"] = param_mean parameter["max"] = param_max parameter["min"] = param_min # return data return data