def process_yoy_summary(filename, output_schema=cfg.YOY_SUMMARY_OUTPUT_SCHEMA): """Processes specified year-over-year summary file and outputs data per the provided output schema""" # Output columns: "month", "date", "yoy_num", "yoy_vol" # Load specified file as input data inputdata = util.load_csv(filename) # Initialize output data data = [] proc = {} # Process data for row in inputdata: monthstr, value, type_str = row monthnum = int(monthstr) if monthnum not in proc: proc[monthnum] = {"num": None, "vol": None} # Input column "group" is "Dollar Volume" or "Number of Loans" if "number" in type_str.lower(): proc[monthnum]["num"] = value elif "volume" in type_str.lower(): proc[monthnum]["vol"] = value elif "inquiry" in type_str.lower(): # Ignore 'Inquiry Index' entries in current output pass elif "tightness" in type_str.lower(): # Ignore 'Credit Tightness Index' entries in current output pass else: msg = "YOY Summary Data row (below) improperly " + \ "formatted in {}\n{}".format(filename, row) logger.error(msg) raise TypeError(msg) # Turn dictionaries into a data list for output # This order MUST match the provided schema order for monthnum, value in proc.items(): data.append( [monthnum, actual_date(monthnum), value["num"], value["vol"]]) # Prep for output by sorting (by month number) and inserting a header data.sort() data.insert(0, output_schema) # Check if data exists and JSON-format if len(data) > 1: json = json_for_bar_chart(data[1:]) return True, data, json return True, [], []
def process_file_summary(filename, output_schema): """Processes specified summary file and outputs data per the schema""" # Load specified file as input data try: inputdata = util.load_csv(filename) except Exception as e: logger.error("Make sure you are running Python 2.x!".format(filename)) raise e # Process data proc = {} for row in inputdata: monthstr, value, is_adj_str = row monthnum = int(monthstr) if monthnum not in proc: proc[monthnum] = {"adj": None, "unadj": None} if "unadjust" in is_adj_str.lower(): proc[monthnum]["unadj"] = value elif "seasonal" in is_adj_str.lower(): proc[monthnum]["adj"] = value else: msg = "Data row (below) does not specify seasonal adjustment " + \ "in {}\n{}".format( filename, ",".join(row) ) logger.error(msg) raise TypeError(msg) # Turn dictionaries into a data list for output # This order MUST match the provided schema order data = [] for monthnum, value in proc.items(): data.append( [monthnum, actual_date(monthnum), value["adj"], value["unadj"]]) # Prep for output by sorting (by month number) and inserting a header data.sort() data.insert(0, output_schema) # Check if data exists and JSON-format if len(data) > 1: json = json_for_line_chart(data[1:]) return True, data, json return True, [], []
def process_group_yoy_groups(filename, group_names, output_schema): """Processes specified group year-over-year file and outputs data per the provided output schema""" # Load specified file as input data inputdata = util.load_csv(filename) # Initialize output data with column headers data = [] proc = {} # Process data for row in inputdata: monthstr, value, group = row monthnum = int(monthstr) if monthnum not in proc: proc[monthnum] = {name: None for name in group_names} if group in group_names: proc[monthnum][group] = value else: msg = "Data row (below) contains illegal group " + \ "name '{}'\n{}".format(filename, ",".join(row)) logger.error(msg) raise TypeError(msg) # Turn dictionaries into a data list for output for monthnum, values in proc.items(): data.append([monthnum, actual_date(monthnum)] + [values[gname] for gname in group_names]) # Prep for output by sorting (by month number) and inserting a header data.sort() data.insert(0, output_schema) # Check if data exists and JSON-format # Unlike other methods, the individual group calls handle the JSON if len(data) > 1: return True, data return True, []
def process_map(filename, output_schema=cfg.MAP_OUTPUT_SCHEMA): """Processes specified map file and outputs data per the schema""" # Input columns: "state", "value" # Output columns: "fips_code", "state_abbr", "value" # Load specified file as input data inputdata = util.load_csv(filename) # Initialize output data with column headers data = [output_schema] # Process data # TODO: Add error handling for unsupported FIPS codes # TODO: Make sure all 50 states (or other expected data) is represented for row in inputdata: data.append([row[0], cfg.FIPS_CODES[int(row[0])], row[1]]) # Check if data exists and JSON-format if len(data) > 1: json = json_for_tile_map(data[1:]) return True, data, json return True, [], []
def process_data_snapshot(filepath, date_schema=cfg.SNAPSHOT_DATE_SCHEMA): """Process a file at filepath that contains data snapshot information for all markets and prepare human-readable text for output. Returns a list of market-data dictionaries.""" # Load specified file as input data inputdata = util.load_csv(filepath) logger.info("Loaded data snapshot file from {}".format(filepath)) # Initialize output data market_info = {} for row in inputdata: # Unpack the row values monthnum, market, var_name, value, value_yoy = row monthnum = int(monthnum) var_name = var_name.lower() # Determine month string from month number month = actual_date(monthnum, schema=date_schema) # If first time seeing market, create sub-dict if market not in market_info: market_info[market] = {"market_key": market} # Handle the variable type # Each variable has value and value_yoy if "originations" in var_name: # Calculate originations orig_fmt = util.human_numbers(float(value), whole_units_only=1) # Calculate year-over-year change in originations yoy = float(value_yoy) yoy_num = "{:.1f}".format(abs(yoy)) yoy_desc = cfg.PERCENT_CHANGE_DESCRIPTORS[yoy > 0] yoy_fmt = "{}% {}".format(yoy_num, yoy_desc) # Store data for market market_info[market]["data_month"] = month market_info[market]["num_originations"] = orig_fmt market_info[market]["year_over_year_change"] = yoy_fmt elif "volume" in var_name: vol_fmt = "${}".format(util.human_numbers(float(value))) market_info[market]["value_originations"] = vol_fmt # Volume month is the same as origination month elif "inquiry" in var_name: yoy = float(value_yoy) yoy_num = "{:.1f}".format(abs(yoy)) yoy_desc = cfg.PERCENT_CHANGE_DESCRIPTORS[yoy > 0] yoy_fmt = "{}% {}".format(yoy_num, yoy_desc) market_info[market]["inquiry_yoy_change"] = yoy_fmt market_info[market]["inquiry_month"] = month elif "tightness" in var_name: yoy = float(value_yoy) yoy_num = "{:.1f}".format(abs(yoy)) yoy_desc = cfg.PERCENT_CHANGE_DESCRIPTORS[yoy > 0] yoy_fmt = "{}% {}".format(yoy_num, yoy_desc) market_info[market]["tightness_yoy_change"] = yoy_fmt market_info[market]["tightness_month"] = month else: msg = "Data snapshot row (below) contains unknown " + \ "var_name name '{}'\n{}".format( var_name, ",".join(row) ) logger.error(msg) raise ValueError(msg) return list(market_info.values())
def process_group_file(filename, output_schema): """Processes specified group volume file and outputs data per the schema""" # Load specified file as input data inputdata = util.load_csv(filename) # Initialize output data with column headers data = [] proc = {} # Process data for row in inputdata: monthstr, value, group, is_adj_str = row monthnum = int(monthstr) if monthnum not in proc: proc[monthnum] = {} if group not in proc[monthnum]: proc[monthnum][group] = {"adj": None, "unadj": None} if "unadjust" in is_adj_str.lower(): proc[monthnum][group]["unadj"] = value elif "seasonal" in is_adj_str.lower(): proc[monthnum][group]["adj"] = value else: msg = "Data row (below) does not specify seasonal adjustment " + \ "in {}\n{}".format( filename, ",".join(row) ) logger.error(msg) raise TypeError(msg) # Turn dictionaries into a data list for output # This order MUST match the provided schema order for monthnum, group in proc.items(): for groupname, value in group.items(): # Parse for any text fixes required if groupname in cfg.TEXT_FIXES: data.append([ monthnum, actual_date(monthnum), value["adj"], value["unadj"], cfg.TEXT_FIXES[groupname] ]) else: data.append([ monthnum, actual_date(monthnum), value["adj"], value["unadj"], groupname ]) # Prep for output by sorting (by month number) and inserting a header data.sort() data.insert(0, output_schema) # Check if data exists and JSON-format if len(data) > 1: json = json_for_group_line_chart(data[1:]) return True, data, json return True, [], []