def init_logging(): """ init logging config :param debug: :return: """ base_logger = logging.getLogger("synch") debug = Settings.debug() if debug: base_logger.setLevel(logging.DEBUG) else: base_logger.setLevel(logging.INFO) fmt = logging.Formatter( fmt="%(asctime)s - %(name)s:%(lineno)d - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) sh = logging.StreamHandler(sys.stdout) sh.setLevel(logging.DEBUG) sh.setFormatter(fmt) base_logger.addHandler(sh) mail = Settings.get("mail") if mail: rate_limit = RateLimitingFilter(per=60) sh = logging.handlers.SMTPHandler( mailhost=mail.get("mailhost"), fromaddr=mail.get("fromaddr"), toaddrs=mail.get("toaddrs"), subject=mail.get("subject"), credentials=(mail.get("user"), mail.get("password")), ) sh.setLevel(logging.ERROR) sh.setFormatter(fmt) sh.addFilter(rate_limit) base_logger.addHandler(sh)
def continuous_etl( alias: str, schema: str, tables_pk: Dict, tables_dict: Dict, last_msg_id, skip_error: bool, ): """ continuous etl from broker and insert into clickhouse """ insert_interval = Settings.insert_interval() insert_num = Settings.insert_num() if not Settings.debug(): if insert_interval < 60 or insert_num < 20000: logger.warning( "If is recommended to set insert_interval=60 and insert_num=20000 when production." ) logger.info( f"start consumer for {schema} success at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}, last_msg_id={last_msg_id}, insert_interval={insert_interval}, insert_num={insert_num}" ) global len_event global event_list global is_insert signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) broker = get_broker(alias) for msg_id, msg in broker.msgs(schema, last_msg_id=last_msg_id, block=insert_interval * 1000): if not msg_id: if len_event > 0: is_insert = True alter_table = False query = None else: if is_stop: finish_continuous_etl(broker) continue else: alter_table = False query = None logger.debug(f"msg_id:{msg_id}, msg:{msg}") len_event += 1 event = msg table = event["table"] schema = event["schema"] action = event["action"] values = event["values"] if action == "query": alter_table = True query = values["query"] else: engine = tables_dict.get(table).get("clickhouse_engine") writer = get_writer(engine) event_list = writer.handle_event( tables_dict, tables_pk.get(table), schema, table, action, event_list, event, ) if len_event == insert_num: is_insert = True if is_insert or alter_table: for table, v in event_list.items(): table_event_num = 0 pk = tables_pk.get(table) if isinstance(v, dict): writer = get_writer(ClickHouseEngine.merge_tree) insert = v.get("insert") delete = v.get("delete") if delete: delete_pks = list(delete.keys()) else: delete_pks = [] if insert: insert_events = list( sorted(insert.values(), key=lambda x: x.get("event_unixtime")) ) else: insert_events = [] if skip_error: try: if delete_pks: writer.delete_events(schema, table, pk, delete_pks) if insert_events: writer.insert_events(schema, table, insert_events) except Exception as e: logger.error( f"insert event error,error: {e}", exc_info=True, stack_info=True ) else: if delete_pks: writer.delete_events(schema, table, pk, delete_pks) if insert_events: writer.insert_events(schema, table, insert_events) table_event_num += len(delete_pks) table_event_num += len(insert_events) elif isinstance(v, list): table_event_num += len(v) writer = get_writer(ClickHouseEngine.collapsing_merge_tree) if v: if skip_error: try: writer.insert_events(schema, table, v) except Exception as e: logger.error( f"insert event error,error: {e}", exc_info=True, stack_info=True ) else: writer.insert_events(schema, table, v) insert_log(alias, schema, table, table_event_num, 2) if alter_table: try: get_writer().execute(query) except Exception as e: logger.error(f"alter table error: {e}", exc_info=True, stack_info=True) broker.commit(schema) logger.info(f"success commit {len_event} events") event_list = {} is_insert = False len_event = 0 if is_stop: finish_continuous_etl(broker)
def test_settings(): assert isinstance(Settings.debug(), bool) assert isinstance(Settings.insert_num(), int) assert isinstance(Settings.insert_interval(), int)