def _process_list_data(ctx): logger = Logger("_process_list_data") logger.info("ETL process has begun") interactive = ctx.obj['interactive'] data = [] clean = ctx.obj['clean'] rules = ctx.obj['rules'] bclink_helpers = ctx.obj['bclink_helpers'] config_file = ctx.obj['conf'] conf = _load_config(config_file) rules_file = conf['rules'] rules_file_last_modified = os.path.getmtime(rules_file) bclink_helpers.print_summary() display_msg = True _clean = clean while True: re_execute = False try: conf = _load_config(config_file) except Exception as e: if not display_msg: logger.critical(e) logger.error( f"You've misconfigured your file '{config_file}'!! Please fix!" ) time.sleep(5) display_msg = True continue current_rules_file = conf['rules'] new_rules_file = rules_file != current_rules_file if new_rules_file: #if there's a new rules file logger.info( f"Detected a new rules file.. old was '{rules_file}' and new is '{current_rules_file}'" ) rules_file = current_rules_file rules = coconnect.tools.load_json_delta(rules_file, rules) rules_file_last_modified = os.path.getmtime(rules_file) re_execute = True else: #otherwise check for changes in the existing file new_rules_file_last_modified = os.path.getmtime(current_rules_file) change_in_rules = rules_file_last_modified != new_rules_file_last_modified if change_in_rules: logger.info( f"Detected a change/update in the rules file '{rules_file}'" ) rules = coconnect.tools.load_json_delta( current_rules_file, rules) re_execute = True current_data = conf['data'] if not data == current_data: logger.debug(f"old {data}") logger.debug(f"new {current_data}") new_data = [obj for obj in current_data if obj not in data] logger.info(f"New data found! {new_data}") re_execute = True else: new_data = data logger.debug(f"re-execute {re_execute}") if re_execute: current_data = copy.deepcopy(new_data) #loop over any new data for item in new_data: if isinstance(item['input'], list): inputs = item['input'] else: input_folder = item['input'] if not os.path.isdir(input_folder): raise Exception( f"{input_folder} is not a directory containing files!" ) inputs = coconnect.tools.get_files(input_folder, type='csv') filtered_rules = coconnect.tools.remove_missing_sources_from_rules( rules, inputs) _execute(ctx, data=item, rules=filtered_rules, clean=_clean) _clean = False data += [x for x in current_data if x not in data] display_msg = True if new_rules_file or change_in_rules: #if there's a new rules file or rules delta, #need to pick up the full rules for the next loop #incase we insert new data # --> we dont want to just apply the delta to the new data rules = coconnect.tools.load_json(current_rules_file) if ctx.obj['listen_for_changes'] == False: break if display_msg: logger.info( f"Finished!... Listening for changes to data in {config_file}") if display_msg: display_msg = False time.sleep(5)