def log_published_preds(self, stmt_inf_outputs: List, tweet_inf_outputs: List) -> None: stmt_tups, tweet_tups = DCInfSvc.prep_logging(stmt_inf_outputs, tweet_inf_outputs) published_stmt_cnt, pub_stmt_errors = batch_execute_many( self.cnxp.get_connection(), self.config.experiment.infsvc.sql.stmts_pub_sql, stmt_tups) published_tweet_cnt, pub_tweet_errors = batch_execute_many( self.cnxp.get_connection(), self.config.experiment.infsvc.sql.tweets_pub_sql, tweet_tups) tweets_published.inc(published_tweet_cnt) stmts_published.inc(published_stmt_cnt) logger.info( f"published {published_stmt_cnt} statement and {published_tweet_cnt} tweet inference records" )
def exec_sql_invariant(self, recs: List[Tuple], sql_invariant: str) -> \ Union[Tuple[int, int], Tuple[int, int, List]]: if not self.debug_mode: # IGNORE performs efficient batch update but loses access to specific row failures w/o # writing a stored procedure which is overkill in this use case sql = f"INSERT IGNORE {sql_invariant}" commit_freq = self.config.db.db_commit_freq return batch_execute_many(self.cnxp.get_connection(), sql, recs, commit_freq) else: # if we're working in debug mode to track errors, sacrifice setting commit frequency inefficiently to 1 # to identify specific row errors while avoiding failure of remainder of batch insert sql = f"INSERT {sql_invariant}" commit_freq = 1 return batch_execute_many(self.cnxp.get_connection(), sql, recs, commit_freq, self.debug_mode)
def dist_to_db(self, converged_ds_iter: Sampler) -> int: stmts = [] for sample in converged_ds_iter: stmts.append((sample[0], sample[1], sample[2])) inserted_rowcnt, _ = db_utils.batch_execute_many( self.cnxp.get_connection(), self.config.data_source.sql.converge_truths, stmts, self.config.data_source.db_commit_freq) return inserted_rowcnt
def save_model_analysis(self, report_tups: List[Tuple], base_mode: bool = True) -> Tuple[int, List]: cand_save_sql = self.config.data_source.sql.base_model_based_cands_sql if base_mode \ else self.config.data_source.sql.dc_model_based_cands_sql # save analysis of candidate "false" truths to be deleted/deduped from truths statements source inserted_rowcnt, insert_errors = db_utils.batch_execute_many( self.cnxp.get_connection(), cand_save_sql, report_tups, self.config.data_source.db_commit_freq) return inserted_rowcnt, insert_errors
def persist_rpt_data(self, rpt_tups): inserted_model_rowcnt, _ = batch_execute_many( self.cnxp.get_connection(), self.config.inference.sql.save_model_rpt_sql, rpt_tups) logger.info( f"Generated {inserted_model_rowcnt} inference records for analysis of " f"model version {constants.APP_INSTANCE}") inserted_model_rowcnt, _ = single_execute( self.cnxp.get_connection(), self.config.inference.sql.save_model_sql) logger.info( f"Generated {inserted_model_rowcnt} global model performance summary for " f"model version {constants.APP_INSTANCE}") inserted_perf_rowcnt, _ = single_execute( self.cnxp.get_connection(), self.config.inference.sql.save_perf_sql) logger.info( f"Generated {inserted_perf_rowcnt} local performance summary records for " f"model version {constants.APP_INSTANCE}")
def log_report(self, reports: List[Tuple], target_type: str, pubtype: str) -> Tuple[int, int]: report_sql, rpts = self.pub_flow(target_type, reports) \ if pubtype == 'pub' else self.analyze_flow(target_type, reports) return batch_execute_many(self.cnxp.get_connection(), report_sql, rpts)