def __calculate_class_metrics__(self, table_name: str, predictor: str, label: str, pattern_type: str): x_train, y_train = self._access_layer_prediction.get_x_data_y_train_data_for_predictor( table_name, predictor, label, pattern_type) insert_dict_list = [] valid_date = MyDate.get_date_as_string_from_date_time() for model_name, model in self._model_dict.items(): self.__print_class_metrics_details__(model_name, table_name, predictor, label, pattern_type) model_performance = ModelPerformance(model, x_train, y_train, label) y_sorted_value_list = model_performance.y_sorted_value_list f1_score_dict = model_performance.f1_score_dict recall_dict = model_performance.recall_dict precision_dict = model_performance.precision_dict roc_auc_dict = model_performance.roc_auc_dict for y_value in y_sorted_value_list: insert_dict_list.append({ MDC.VALID_DT: valid_date, MDC.MODEL: model_name, MDC.TABLE: table_name, MDC.PREDICTOR: predictor, MDC.LABEL: label, MDC.PATTERN_TYPE: pattern_type, MDC.VALUE: y_value, MDC.F1_SCORE: f1_score_dict[y_value] if y_value in f1_score_dict else 0, MDC.PRECISION: precision_dict[y_value] if y_value in precision_dict else 0, MDC.RECALL: recall_dict[y_value] if y_value in recall_dict else 0, MDC.ROC_AUC: roc_auc_dict[y_value] if y_value in recall_dict else 0 }) if len(insert_dict_list) > 0: self._access_layer_metric.insert_data(insert_dict_list) self.__calculate_df_metrics__()
def __get_log_entry_numbers_for_log_type__(self, log_type: str, actual_day=True): today_str = MyDate.get_date_as_string_from_date_time() if log_type not in self._log_data_frame_dict: return 0 df = self._log_data_frame_dict[log_type] if actual_day: if DC.WAVE_END_TS in df.columns: today_ts = MyDate.get_epoch_seconds_for_date( ) - MyDate.get_seconds_for_period(days=1) # minus one day df = df[df[DC.WAVE_END_TS] >= today_ts] # print('max ts = {}, midnight={}'.format(df[DC.WAVE_END_TS].max(), today_ts) elif DC.TS_PATTERN_TICK_LAST in df.columns: today_ts = MyDate.get_epoch_seconds_for_date( ) - MyDate.get_seconds_for_period(days=1) # minus one day df = df[df[DC.TS_PATTERN_TICK_LAST] >= today_ts] elif PRDC.START_DT in df.columns: df = df[df[PRDC.START_DT] == today_str] elif LOGDC.DATE in df.columns: df = df[df[LOGDC.DATE] == today_str] if log_type == LOGT.TRADES: add_number = df[df[LOGDC.PROCESS_STEP] == 'Add'].shape[0] buy_number = df[df[LOGDC.PROCESS_STEP] == 'Buy'].shape[0] return '{}/{}'.format(add_number, buy_number) return df.shape[0]
def get_sale_by_file_row(self, row): sale_data_dict = {col: row[col] for col in row.index} sale_data_dict[SLDC.IS_MY_SALE] = True sale_data_dict[SLDC.LOCATION] = 'virtual' sale_data_dict[ SLDC.START_DATE] = MyDate.get_date_as_string_from_date_time() return self.__get_sale_by_data_dict__(sale_data_dict)
def is_process_available_for_today(self, process: str) -> pd.DataFrame: dt_today = MyDate.get_date_as_string_from_date_time( ) # e.g. dt_today = '2018-12-22' df = self.select_data_by_data_dict({ PRDC.PROCESS: process, PRDC.START_DT: dt_today }) return df.shape[0] > 0
def __get_best_trained_model_name_for_label__(self, label: str, model_category: str): dt_today = MyDate.get_date_as_string_from_date_time() data_dict = { MDC.VALID_DT: dt_today, MDC.PREDICTOR: PRED.FIBONACCI, MDC.LABEL: label, MDC.PATTERN_TYPE: model_category } df = self.select_data_by_data_dict(data_dict) if df.shape[0] == 0: return '' return df.iloc[0][MDC.MODEL]
def __adjust_log_df_to_selected_items__(self): if self._process_column != '' and self._selected_log_process != '': self._log_df = self._log_df[self._log_df[self._process_column] == self._selected_log_process] if self._process_step_column != '' and self._selected_log_process_step != '': self._log_df = self._log_df[self._log_df[self._process_step_column] == self._selected_log_process_step] if self._selected_date_range == DTRG.TODAY: if self._date_column == DC.WAVE_END_TS: offset_ts = MyDate.get_epoch_seconds_for_date( ) - 60 * 60 * 24 * 2 # minus 2 day... self._log_df = self._log_df[ self._log_df[self._date_column] >= offset_ts] else: today_str = MyDate.get_date_as_string_from_date_time() self._log_df = self._log_df[self._log_df[self._date_column] == today_str]
def __get_sales_data_dict_for_online_search_api__(self, api): region = self.sys_config.region_categorizer.get_category_for_value( api.region_value) # print('__get_sales_data_dict_for_online_search_api__: region={}'.format(region)) product_category, product_sub_category = '', '' if api.category_value != '': product_category = self.sys_config.product_categorizer.get_category_for_value( api.category_value) product_sub_category = self.sys_config.product_categorizer.get_sub_category_for_value( product_category, api.sub_category_value) sale_data_dict = { SLDC.SALE_ID: str(MyDate.time_stamp_now()), SLDC.LOCATION: 'online', SLDC.START_DATE: MyDate.get_date_as_string_from_date_time(), SLDC.TITLE: api.search_string, SLDC.REGION: region, SLDC.PRODUCT_CATEGORY: product_category, SLDC.PRODUCT_SUB_CATEGORY: product_sub_category, } # print('sale_data_dict={}'.format(sale_data_dict)) return sale_data_dict
def __adjust_log_df_to_selected_items__(self): if self._process_column != '' and self._selected_log_process != '': self._log_df = self._log_df[self._log_df[self._process_column] == self._selected_log_process] if self._process_step_column != '' and self._selected_log_process_step != '': self._log_df = self._log_df[self._log_df[self._process_step_column] == self._selected_log_process_step] if self._selected_date_range == DTRG.TODAY: if self._date_column == DC.WAVE_END_TS: offset_ts = MyDate.get_epoch_seconds_for_date( ) - MyDate.get_seconds_for_period(days=2) # minus 2 day self._log_df = self._log_df[ self._log_df[self._date_column] >= offset_ts] elif self._date_column == DC.TS_PATTERN_TICK_LAST: offset_ts = MyDate.get_epoch_seconds_for_date( ) - MyDate.get_seconds_for_period(days=2) # minus 2 day self._log_df = self._log_df[ self._log_df[self._date_column] >= offset_ts] else: today_str = MyDate.get_date_as_string_from_date_time() self._log_df = self._log_df[self._log_df[self._date_column] == today_str]
def __set_best_trained_model_name_for_label__(self, label: str, model_category: str, model_name: str, value: float): existing_best_model = self.__get_best_trained_model_name_for_label__( label, model_category) if existing_best_model != '': return # nothing to do - we already have one entry for today dt_today = MyDate.get_date_as_string_from_date_time() value = round(value, 2) data_dict = { MDC.VALID_DT: dt_today, MDC.MODEL: model_name, MDC.TABLE: self.table_name, MDC.PREDICTOR: PRED.FIBONACCI, MDC.LABEL: label, MDC.PATTERN_TYPE: model_category, MDC.VALUE: value, MDC.PRECISION: 0, MDC.RECALL: 0, MDC.F1_SCORE: 0, MDC.ROC_AUC: 0 } self.insert_data([data_dict])
def get_actual_metric_data_frame(self) -> pd.DataFrame: dt_today = MyDate.get_date_as_string_from_date_time() # dt_today = '2018-12-22' return self.select_data_by_data_dict({MDC.VALID_DT: dt_today})
offset_date = '2018-05-01' df_grouped_direct_daily = access_layer.get_grouped_by_for_wave_peak_plotting(WPDT.DAILY_DATE, 1, offset_date) df_grouped_direct_daily.to_excel(writer, sheet_name='Daily') print(MyDate.time_stamp_now()) df_grouped_direct_intraday = access_layer.get_grouped_by_for_wave_peak_plotting(WPDT.INTRADAY_DATE, 1, offset_date) df_grouped_direct_intraday.to_excel(writer, sheet_name='Intraday Date') df_grouped_direct_intraday = access_layer.get_grouped_by_for_wave_peak_plotting(WPDT.INTRADAY_15_TS, 15) df_grouped_direct_intraday.to_excel(writer, sheet_name='INTRADAY_15_TS') df_grouped_direct_intraday = access_layer.get_grouped_by_for_wave_peak_plotting(WPDT.INTRADAY_30_TS, 30) df_grouped_direct_intraday.to_excel(writer, sheet_name='INTRADAY_30_TS') writer.save() # for index, row in df_grouped_direct_intraday.iterrows(): # print(row) print(MyDate.time_stamp_now()) for_grouping = False if for_grouping: df['Date'] = df[DC.WAVE_END_DT].apply(MyDate.get_date_str_from_datetime) df_for_grouping = df[[DC.EQUITY_INDEX, DC.PERIOD, DC.WAVE_TYPE, 'Date', DC.TICKER_ID]] df_grouped = df_for_grouping.groupby([DC.EQUITY_INDEX, DC.PERIOD, DC.WAVE_TYPE, 'Date']).count() df_grouped_direct = access_layer.get_grouped_by_for_wave_plotting() pd.DataFrame.to_excel(df_grouped_direct, 'Wave_Grouped.xlsx') print('test') else: today_str = MyDate.get_date_as_string_from_date_time()