def fill_data_gaps(self, **context) -> None: """Fill data without dt_ref Example: input: ('ef57132e-0d25-4e75-bee6-158c01c5b360', 50000, None) output: ('e85e10b5-43af-4721-b832-9b9b4bb366fe', 10708, '01-01-1900') """ list_records = self.get_list_redis(context['redis_key']) pipe = RedisHook(self.redis_conn_id).get_conn().pipeline() df = pd.DataFrame(data=list_records) col_date_ref = list(df.columns)[-1] df[col_date_ref].replace(to_replace=[None], value='01-01-1900', inplace=True) records = [tuple(x) for x in df.to_numpy()] [pipe.lpush(context['current_dag_name'], str(row)) for row in records] pipe.execute() self.log.info(f"\nSample rows:\n{df.head(5)}")
def split_id_by_date(self, **context) -> None: """Create redis key by date example (redis keys): input: file_name_000000000000000 output: file_name_000000000000000_01-01-1900, file_name_000000000000000_24-12-2020 """ logging.info(f'Spliting IDs by date ...') pipe = RedisHook(self.redis_conn_id).get_conn().pipeline() list_records = self.get_list_redis(context['redis_key']) for date in context['list_current_dates']: list_item = [item for item in list_records if date in item[-1]] name_redis_key = context['current_dag_name'] + '_' + date [pipe.lpush(name_redis_key, str(row)) for row in list_item] pipe.execute() self.log.info(f'{date} - Storaged at redis {name_redis_key} = {len(list_item)}')