def make_report(title, ordered_cities): def make_detail_line(city): return { 'city': city, 'median_price': median_prices[city], 'median_price_index': median_prices_indices[city], 'n_trades': n_trades[city], 'n_trades_index': n_trades_indices[city], } c = ColumnsTable(( ('city', 30, '%30s', ('', '', '', '', '', 'City'), 'city name'), ('median_price', 7, '%7.0f', ('', '', '', '', 'median', 'price'), 'median price in city'), ('median_price_index', 7, '%7.2f', ('median', 'price', '/', 'overall', 'median', 'price'), 'median price as fraction of overall median price'), ('n_trades', 7, '%7.0f', ('', '', '', '', 'number', 'trades'), 'number of trades across all months'), ('n_trades_index', 7, '%7.2f', ('number', 'trades', '/ ', 'overall', 'median', 'trades'), 'median number trades as fraction of overall median number of trades' ), )) for city in ordered_cities: c.append_detail(**make_detail_line(city)) c.append_legend(40) r = Report() r.append(title) r.append(' ') for line in c.iterlines(): r.append(line) return r
class ChartCDReport(object): def __init__(self, column_definitions, test): self._column_definitions = column_definitions self._test = test self._report = Report() cd = self._column_definitions.defs_for_columns( 'validation_month', 'rank', 'median_absolute_error', 'median_price', 'model', 'n_months_back', 'max_depth', 'n_estimators', 'max_features', 'learning_rate', 'alpha', 'l1_ratio', 'units_X', 'units_y', ) self._ct = ColumnsTable(columns=cd, verbose=True) self._header() def append(self, line): self._report.append(line) def write(self, path): self._ct.append_legend() for line in self._ct.iterlines(): self._report.append(line) if self._test: self._report.append('** TESTING: DISCARD') self._report.write(path) def _header(self): self._report.append( 'Median Absolute Error (MAE) by month for best-performing models and their hyperparameters' ) self._report.append(' ') def append_detail(self, **kwds): with_spaces = { k: (None if self._column_definitions.replace_by_spaces(k, v) else v) for k, v in kwds.iteritems() } self._ct.append_detail(**with_spaces)
def make_table_stats(data, control, in_report_p): 'return Report with statistics for years and months that obey the filter' r = Report() r.append('Prices by Month') r.append('') ct = ColumnsTable(( ('year', 4, '%4d', (' ', ' ', 'year'), 'year of transaction'), ('month', 5, '%5d', (' ', ' ', 'month'), 'month of transaction'), ('mean_price', 6, '%6.0f', (' ', ' mean', 'price'), 'mean price in dollars'), ('median_price', 6, '%6.0f', (' ', 'median', 'price'), 'median price in dollars'), ('mean_price_ratio', 6, '%6.3f', (' mean', ' price', ' ratio'), 'ratio of price in current month to prior month'), ('median_price_ratio', 6, '%6.3f', ('median', ' price', ' ratio'), 'ratio of price in current month to prior month'), ('number_trades', 6, '%6d', ('number', 'of', 'trades'), 'number of trades in the month'), )) prior_mean_price = None prior_median_price = None for year in (2003, 2004, 2005, 2006, 2007, 2008, 2009): for month in (1, 2, 3) if year == 2009 else (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12): if in_report_p(year, month): selected = data.month == Month(year, month) prices = data[selected].price mean_price = prices.mean() median_price = prices.median() number_trades = len(prices) ct.append_detail( year=year, month=month, mean_price=mean_price, median_price=median_price, mean_price_ratio=None if prior_mean_price is None else mean_price / prior_mean_price, median_price_ratio=None if prior_median_price is None else median_price / prior_median_price, number_trades=number_trades, ) prior_mean_price = mean_price prior_median_price = median_price ct.append_legend() for line in ct.iterlines(): r.append(line) return r
class ChartEReport(object): def __init__(self, k, ensemble_weighting, column_definitions, test): self._column_definitions = column_definitions self._test = test self._report = Report() self._header(k, ensemble_weighting) cd = self._column_definitions.defs_for_columns( 'validation_month', 'model', 'n_months_back', 'n_estimators', 'max_features', 'max_depth', 'learning_rate', 'rank', 'weight', 'mae_validation', 'mae_query', 'mae_ensemble', ) self._ct = ColumnsTable(columns=cd, verbose=True) def write(self, path): self._ct.append_legend() for line in self._ct.iterlines(): self._report.append(line) if self._test: self._report.append('** TESTING: DISCARD') self._report.write(path) def detail_line(self, **kwds): with_spaces = { k: (None if self._column_definitions.replace_by_spaces(k, v) else v) for k, v in kwds.iteritems() } self._ct.append_detail(**with_spaces) def _header(self, k, ensemble_weighting): self._report.append( 'Performance of Best Models Separately and as an Ensemble') self._report.append(' ') self._report.append('Considering Best K = %d models' % k) self._report.append('Ensemble weighting: %s' % ensemble_weighting)
class ChartHReport(object): def __init__(self, k, validation_month, ensemble_weighting, column_definitions, test): self._column_definitions = column_definitions self._report = Report() self._test = test self._header(k, validation_month, ensemble_weighting) cd = self._column_definitions.defs_for_columns( 'description', 'mae_validation', 'mae_query', 'mare_validation', 'mare_query', ) self._ct = ColumnsTable(columns=cd, verbose=True) def write(self, path): self._ct.append_legend() for line in self._ct.iterlines(): self._report.append(line) if self._test: self._report.append('** TESTING: DISCARD') self._report.write(path) def detail_line(self, **kwds): with_spaces = { k: (None if self._column_definitions.replace_by_spaces(k, v) else v) for k, v in kwds.iteritems() } self._ct.append_detail(**with_spaces) def preformatted_line(self, line): print line self._ct.append_line(line) def _header(self, k, validation_month, ensemble_weighting): self._report.append( 'Performance of Best Models Separately and as an Ensemble') self._report.append(' ') self._report.append('Considering Best K = %d models' % k) self._report.append('For validation month %s' % validation_month) self._report.append('Ensemble weighting: %s' % ensemble_weighting)
class ChartFReport(object): def __init__(self, k, ensemble_weighting, column_definitions, test): self._column_definitions = column_definitions self._test = test self._report = Report() self._header(k, ensemble_weighting) cd = self._column_definitions.defs_for_columns( 'validation_month', 'mae_index0', 'mae_ensemble', 'mae_best_next_month', 'median_price', 'fraction_median_price_next_month_index0', 'fraction_median_price_next_month_ensemble', 'fraction_median_price_next_month_best', ) self._ct = ColumnsTable(columns=cd, verbose=True) def write(self, path): self._ct.append_legend() for line in self._ct.iterlines(): self._report.append(line) if self._test: self._report.append('** TESTING: DISCARD') self._report.write(path) def detail_line(self, **kwds): with_spaces = { k: (None if self._column_definitions.replace_by_spaces(k, v) else v) for k, v in kwds.iteritems() } self._ct.append_detail(**with_spaces) def _header(self, k, ensemble_weighting): self._report.append( 'Comparison of Errors of Ensemble and Best Model That Know the Future' ) self._report.append(' ') self._report.append('Considering Best K = %d models' % k) self._report.append('Ensemble weighting: %s' % ensemble_weighting)
class ChartBReport(object): def __init__(self, validation_month, k, column_definitions, test): self._report = Report() self._header(validation_month, k) self._column_definitions = column_definitions self._test = test cd = self._column_definitions.defs_for_columns( 'median_absolute_error', 'model', 'n_months_back', 'max_depth', 'n_estimators', 'max_features', 'learning_rate', ) self._ct = ColumnsTable(columns=cd, verbose=True) def _header(self, validation_month, k): def a(line): self._report.append(line) a('MAE for %d best-performing models and their hyperparameters' % k) a('Validation month: %s' % validation_month) a(' ') def append_detail(self, **kwds): # replace NaN with None with_spaces = { k: (None if self._column_definitions.replace_by_spaces(k, v) else v) for k, v in kwds.iteritems() } self._ct.append_detail(**with_spaces) def write(self, path): self._ct.append_legend() for line in self._ct.iterlines(): self._report.append(line) if self._test: self._report.append('**TESTING: DISCARD') self._report.write(path)
def make_chart_stats(data, control, filter_f): 'return Report with statistics for years and months that obey the filter' r = Report() r.append('Prices by Month') r.append('') ct = ColumnsTable(( ('year', 4, '%4d', (' ', ' ', 'year'), 'year of transaction'), ('month', 5, '%5d', (' ', ' ', 'month'), 'month of transaction'), ('mean_price', 6, '%6.0f', (' ', ' mean', 'price'), 'mean price in dollars'), ('median_price', 6, '%6.0f', (' ', 'median', 'price'), 'median price in dollars'), ('mean_price_ratio', 6, '%6.3f', (' mean', ' price', ' ratio'), 'ratio of price in current month to prior month'), ('median_price_ratio', 6, '%6.3f', ('median', ' price', ' ratio'), 'ratio of price in current month to prior month'), ('number_trades', 6, '%6d', ('number', 'of', 'trades'), 'number of trades in the month'), )) prior_mean_price = None prior_median_price = None for year in xrange(2003, 2010): for month in xrange(1, 13): if filter_f(year, month): value = data[make_reduction_key(year, month)] mean_price = value['mean'] median_price = value['median'] number_trades = value['count'] ct.append_detail( year=year, month=month, mean_price=mean_price, median_price=median_price, mean_price_ratio=None if prior_mean_price is None else mean_price / prior_mean_price, median_price_ratio=None if prior_median_price is None else median_price / prior_median_price, number_trades=number_trades, ) prior_mean_price = mean_price prior_median_price = median_price ct.append_legend() for line in ct.iterlines(): r.append(line) return r