def _get_invited_customers(self): invited_customers = [] for line in open('invited_customers/customers.txt', 'r'): customer = json.loads(line) self._parse_customer(customer) lat = convert_to_float(customer['latitude']) lng = convert_to_float(customer['longitude']) distance = self._get_distance_from_office(lat, lng) customers_log.debug('User Id: {}, Distance: {} km'.format( customer['user_id'], distance)) if distance < self.radius: invited_customers.append( self._format_customer(customer, distance)) return invited_customers
def read_file_in(filestream): """ Read and process a WATER \*.txt file. Finds any parameter and its respective data. Parameters ---------- filestream : file object A python file object that contains an open data file. Returns ------- data : dictionary Returns a dictionary containing data found in data file. Notes ----- data = { "user": None, "date_created": None, "stationid": None, "column_names": None, "dates": [], "parameters": [], } The "parameters" key contains a list of dictionaries containing the parameters found in the data file See Also -------- create_parameter : Create new dictionary to hold parameter data """ # read all the lines in the filestream data_file = filestream.readlines() # regular expression patterns in data file patterns = { "user": "******", "date_created": "(Date:)\t(.+)", "stationid": "(StationID:)\t(.+)", "column_names": "(Date)\t(.+)", "data_row": "([0-9]{1,2}/[0-9]{1,2}/[0-9]{4})\t(.+)" } # initialize a dictionary to hold all the data of interest data = { "user": None, "date_created": None, "stationid": None, "column_names": None, "dates": [], "parameters": [] } # process file for line in data_file: # find match match_user = re.search(pattern = patterns["user"], string = line) match_date_created = re.search(pattern = patterns["date_created"], string = line) match_stationid = re.search(pattern = patterns["stationid"], string = line) match_column_names = re.search(pattern = patterns["column_names"], string = line) match_data_row = re.search(pattern = patterns["data_row"], string = line) # if match is found, add it to data dictionary if match_user: data["user"] = match_user.group(2) if match_date_created: data["date_created"] = match_date_created.group(2) if match_stationid: data["stationid"] = match_stationid.group(2) if match_column_names: data["column_names"] = match_column_names.group(2).split("\t") # create a dictionary for each column_name (excluding "Date") for name in data["column_names"]: parameter = create_parameter(name = name, index = data["column_names"].index(name), data = [], mean = None, max = None, min = None) data["parameters"].append(parameter) if match_data_row: # add date to data dictionary date = get_date(date_str = match_data_row.group(1)) data["dates"].append(date) for parameter in data["parameters"]: value = match_data_row.group(2).split("\t")[parameter["index"]] value = helpers.convert_to_float(value = value, helper_str = "parameter {} on {}".format(parameter["name"], date.strftime("%Y-%m-%d_%H.%M"))) parameter["data"].append(float(value)) # convert the date list to a numpy array data["dates"] = np.array(data["dates"]) # convert each parameter data list in data["parameter"] convert to a numpy array and # compute mean, max, and min for parameter in data["parameters"]: parameter["data"] = np.array(parameter["data"]) param_mean, param_max, param_min = helpers.compute_simple_stats(data = parameter["data"]) parameter["mean"] = param_mean parameter["max"] = param_max parameter["min"] = param_min # return data return data
def read_file_in(filestream): """ Read and process a WATER \*.txt file. Finds any parameter and its respective data. Parameters ---------- filestream : file object A python file object that contains an open data file. Returns ------- data : dictionary Returns a dictionary containing data found in data file. Notes ----- data = { "user": None, "date_created": None, "stationid": None, "column_names": None, "dates": [], "parameters": [], } The "parameters" key contains a list of dictionaries containing the parameters found in the data file See Also -------- create_parameter : Create new dictionary to hold parameter data """ # read all the lines in the filestream data_file = filestream.readlines() # regular expression patterns in data file patterns = { "user": "******", "date_created": "(Date:)\t(.+)", "stationid": "(StationID:)\t(.+)", "column_names": "(Date)\t(.+)", "data_row": "([0-9]{1,2}/[0-9]{1,2}/[0-9]{4})\t(.+)" } # initialize a dictionary to hold all the data of interest data = { "user": None, "date_created": None, "stationid": None, "column_names": None, "dates": [], "parameters": [] } # process file for line in data_file: # find match match_user = re.search(pattern=patterns["user"], string=line) match_date_created = re.search(pattern=patterns["date_created"], string=line) match_stationid = re.search(pattern=patterns["stationid"], string=line) match_column_names = re.search(pattern=patterns["column_names"], string=line) match_data_row = re.search(pattern=patterns["data_row"], string=line) # if match is found, add it to data dictionary if match_user: data["user"] = match_user.group(2) if match_date_created: data["date_created"] = match_date_created.group(2) if match_stationid: data["stationid"] = match_stationid.group(2) if match_column_names: data["column_names"] = match_column_names.group(2).split("\t") # create a dictionary for each column_name (excluding "Date") for name in data["column_names"]: parameter = create_parameter( name=name, index=data["column_names"].index(name), data=[], mean=None, max=None, min=None) data["parameters"].append(parameter) if match_data_row: # add date to data dictionary date = get_date(date_str=match_data_row.group(1)) data["dates"].append(date) for parameter in data["parameters"]: value = match_data_row.group(2).split("\t")[parameter["index"]] value = helpers.convert_to_float( value=value, helper_str="parameter {} on {}".format( parameter["name"], date.strftime("%Y-%m-%d_%H.%M"))) parameter["data"].append(float(value)) # convert the date list to a numpy array data["dates"] = np.array(data["dates"]) # convert each parameter data list in data["parameter"] convert to a numpy array and # compute mean, max, and min for parameter in data["parameters"]: parameter["data"] = np.array(parameter["data"]) param_mean, param_max, param_min = helpers.compute_simple_stats( data=parameter["data"]) parameter["mean"] = param_mean parameter["max"] = param_max parameter["min"] = param_min # return data return data
def read_file_in(filestream): """ Read and process a delta \*.txt file. Returns a dictionary with keys named as the column header names found in the file. Parameters ---------- filestream : file object A python file object that contains an open data file. Returns ------- data : dictionary Returns a dictionary containing data found in data file. Notes ----- data = { "Model": string of model name, "Scenario": string of scenario name, "Target": string of scenario name, "Variable": string of variable name, "Tile": list of tile numbers, "January": array of delta values for each tile . . . "December": array of delta values for each tile } """ # read all the lines in the filestream data_file = filestream.readlines() # regular expression patterns in data file patterns = { "column_names": "(Model.+)", "data_row": "([a-zA-z0-9-]+)\t(\w+)\t(\d+)\t(.+)" } # initialize a temporary dictionary to hold data of interest initial_data = {"column_names": None, "parameters": []} # process file for line in data_file: match_column_names = re.search(pattern=patterns["column_names"], string=line) match_data_row = re.search(pattern=patterns["data_row"], string=line) # if match is found add it to data dictionary if match_column_names: initial_data["column_names"] = match_column_names.group(0).split( "\t") for name in initial_data["column_names"]: initial_data["parameters"].append({ "name": name, "index": initial_data["column_names"].index(name), "data": [] }) if match_data_row: for parameter in initial_data["parameters"]: value = match_data_row.group(0).split("\t")[parameter["index"]] parameter["data"].append(value) # format data into a dictionary; remove duplicate text values from certain column_names, and dynamically create keys with column names data = {} duplicate_value_columns = ["Model", "Scenario", "Target", "Variable"] for parameter in initial_data["parameters"]: if parameter["name"] in duplicate_value_columns: parameter["data"] = parameter["data"][ 0] # duplicate values, so just get first value elif parameter["name"] == "Tile": pass # leave Tile values as strings else: for i in range(len( parameter["data"])): # ensure that value is a float value = helpers.convert_to_float( value=parameter["data"][i], helper_str="parameter {}".format(parameter["name"])) parameter["data"][i] = float(value) # populate data dictionary with clean data sets data[parameter["name"]] = parameter["data"] # return data return data
def read_file_in(filestream): """ Read and process a delta \*.txt file. Returns a dictionary with keys named as the column header names found in the file. Parameters ---------- filestream : file object A python file object that contains an open data file. Returns ------- data : dictionary Returns a dictionary containing data found in data file. Notes ----- data = { "Model": string of model name, "Scenario": string of scenario name, "Target": string of scenario name, "Variable": string of variable name, "Tile": list of tile numbers, "January": array of delta values for each tile . . . "December": array of delta values for each tile } """ # read all the lines in the filestream data_file = filestream.readlines() # regular expression patterns in data file patterns = { "column_names": "(Model.+)", "data_row": "([a-zA-z0-9-]+)\t(\w+)\t(\d+)\t(.+)" } # initialize a temporary dictionary to hold data of interest initial_data = {"column_names": None, "parameters": []} # process file for line in data_file: match_column_names = re.search(pattern = patterns["column_names"], string = line) match_data_row = re.search(pattern = patterns["data_row"], string = line) # if match is found add it to data dictionary if match_column_names: initial_data["column_names"] = match_column_names.group(0).split("\t") for name in initial_data["column_names"]: initial_data["parameters"].append({"name": name, "index": initial_data["column_names"].index(name), "data": []}) if match_data_row: for parameter in initial_data["parameters"]: value = match_data_row.group(0).split("\t")[parameter["index"]] parameter["data"].append(value) # format data into a dictionary; remove duplicate text values from certain column_names, and dynamically create keys with column names data = {} duplicate_value_columns = ["Model", "Scenario", "Target", "Variable"] for parameter in initial_data["parameters"]: if parameter["name"] in duplicate_value_columns: parameter["data"] = parameter["data"][0] # duplicate values, so just get first value elif parameter["name"] == "Tile": pass # leave Tile values as strings else: for i in range(len(parameter["data"])): # ensure that value is a float value = helpers.convert_to_float(value = parameter["data"][i], helper_str = "parameter {}".format(parameter["name"])) parameter["data"][i] = float(value) # populate data dictionary with clean data sets data[parameter["name"]] = parameter["data"] # return data return data
def read_file_in(filestream): """ Read and process a water use \*.txt file. Finds any parameter and its respective data. Parameters ---------- filestream : file object A python file object that contains an open data file. Returns ------- data : dictionary Returns a dictionary containing data found in data file. Notes ----- expected = { "huc12": list of string ids, "newhydroid": list of string ids, "AqGwWL": list of float water use values, "CoGwWL": list of float water use values, ... } """ # read all the lines in the filestream data_file = filestream.readlines() # regular expression patterns in data file patterns = { "months": "(#)\s([JFMASOND].+)", "units": "(#)\s(Units:)(.+)", "column_names": "(huc12)\t(.+)", "data_row": "(^[0-9]{,12})\t(.+)" } # initialize a temporary dictionary to hold data of interest initial_data = {"months": None, "units": None, "column_names": None, "parameters": []} # process file for line in data_file: line = line.strip() # find match match_months = re.search(pattern = patterns["months"], string = line) match_units = re.search(pattern = patterns["units"], string = line) match_column_names = re.search(pattern = patterns["column_names"], string = line) match_data_row = re.search(pattern = patterns["data_row"], string = line) # if match is found add it to data dictionary if match_months: initial_data["months"] = match_months.group(2).strip() if match_units: initial_data["units"] = match_units.group(3).strip() if match_column_names: initial_data["column_names"] = match_column_names.group(0).split("\t") for name in initial_data["column_names"]: initial_data["parameters"].append({"name": name, "index": initial_data["column_names"].index(name), "data": []}) if match_data_row: for parameter in initial_data["parameters"]: value = match_data_row.group(0).split("\t")[parameter["index"]] parameter["data"].append(value) # format data into a dictionary; dynamically create keys with column names data = {"months": initial_data["months"], "units": initial_data["units"], "column_names": initial_data["column_names"]} string_columns = ["huc12", "newhydroid"] for parameter in initial_data["parameters"]: if parameter["name"] in string_columns: pass # leave values as strings else: for i in range(len(parameter["data"])): # ensure that value is a float value = helpers.convert_to_float(value = parameter["data"][i], helper_str = "parameter {}".format(parameter["name"])) parameter["data"][i] = float(value) # populate data dictionary with clean data sets data[parameter["name"]] = parameter["data"] # return data return data
def read_factor_file_in(filestream): """ Read and process a water use factor \*.txt file. Finds any parameter and its respective data. Parameters ---------- filestream : file object A python file object that contains an open data file. Returns ------- data : dictionary Returns a dictionary containing data found in data file. Notes ----- expected = { "AqGwWL": float water use factor value, "CoGwWL": float water use factor value, ... } """ # read all the lines in the filestream data_file = filestream.readlines() # regular expression patterns in data file patterns = { "column_names": "(^[aA-zZ].+)", "data_row": "(^[0-9].+)" } # initialize a temporary dictionary to hold data of interest initial_data = {"column_names": None, "parameters": []} # process file for line in data_file: line = line.strip() # find match match_column_names = re.search(pattern = patterns["column_names"], string = line) match_data_row = re.search(pattern = patterns["data_row"], string = line) # if match is found add it to data dictionary if match_column_names: initial_data["column_names"] = match_column_names.group(0).split("\t") for name in initial_data["column_names"]: initial_data["parameters"].append({"name": name, "index": initial_data["column_names"].index(name), "data": None}) if match_data_row: for parameter in initial_data["parameters"]: value = match_data_row.group(0).split("\t")[parameter["index"]] parameter["data"] = value # format data into a dictionary; dynamically create keys with column names data = {"column_names": initial_data["column_names"]} for parameter in initial_data["parameters"]: # ensure that value is a float value = helpers.convert_to_float(value = parameter["data"], helper_str = "parameter {}".format(parameter["name"])) parameter["data"] = float(value) # populate data dictionary with clean data sets data[parameter["name"]] = parameter["data"] water_factor_type_list = ['AqGwWL', 'CoGwWL', 'DoGwWL', 'InGwWL', 'IrGwWL', 'LvGwWL', 'MiGwWL', 'ReGwWL', 'TeGwWL', 'WsGwWL', 'AqSwWL', 'CoSwWL', 'InSwWL', 'IrSwWL', 'LvSwWL', 'MiSwWL', 'TeSwWL', 'WsSwWL', 'InGwRT', 'InSwRT', 'STswRT', 'WSgwRT', 'WSTransRC', 'WStransNY'] for data_key in data["column_names"]: if data_key not in water_factor_type_list: raise IOError("Error in water factor file!\nBad column found: {}\nThis indicates an incorrect water factor file.".format(data_key)) # return data return data