def generate_table_sql(self, source_id, targets, extract_date): tables = (db.session.query(ExtTableInfo).filter( ExtTableInfo.source_id == source_id, ExtTableInfo.weight == 1, ExtTableInfo.inventory_table != 1).options( joinedload(ExtTableInfo.datasource)).all()) tables_sqls = { "type": "tables", "query_date": extract_date, "source_id": source_id, } total_sqls = { table_name.split(".")[-1]: sqls for table_name, sqls in self._generate_by_correct_mould( tables, extract_date).items() } target_sqls = {target: total_sqls[target] for target in targets} tables_sqls["sqls"] = target_sqls file_name = str(now_timestamp()) + ".json" key = SQL_PREFIX.format(source_id=source_id, date=extract_date) + file_name upload_body_to_s3(S3_BUCKET, key, json.dumps(tables_sqls)) return file_name
def generate_full_sql(self, source_id, extract_date): """ 生成sql,全量的首次抓取sql where条件中日期字段需要处理 :param source_id: :param extract_date: :return: """ tables = (db.session.query(ExtTableInfo).filter( ExtTableInfo.source_id == source_id, ExtTableInfo.weight == 1, ExtTableInfo.inventory_table != 1).options( joinedload(ExtTableInfo.datasource)).all()) tables_sqls = { "type": "full", "query_date": extract_date, "source_id": source_id, "sqls": self._generate_by_correct_mould(tables, extract_date), } file_name = str(now_timestamp()) + ".json" key = SQL_PREFIX.format(source_id=source_id, date=extract_date) + file_name upload_body_to_s3(S3_BUCKET, key, json.dumps(tables_sqls)) return file_name
def store_target_table_info(self): table_info = ExtTargetInfo.query.all() data = { info.target_table: { "target_table": info.target_table, "remark": info.remark, "sync_column": info.sync_column if not info.sync_column else info.sync_column.split(","), "date_column": info.date_column, } for info in table_info } upload_body_to_s3(S3_BUCKET, TARGET_TABLE_KEY, json.dumps(data)) return data
def generate_inventory_sql(self, source_id, extract_date): tables = (db.session.query(ExtTableInfo).filter( ExtTableInfo.source_id == source_id, ExtTableInfo.weight == 1, ExtTableInfo.inventory_table == 1, ).options(joinedload(ExtTableInfo.datasource)).all()) tables_sqls = { "type": "inventory", "source_id": source_id, "query_date": extract_date, "inv_sqls": self._generate_by_correct_mould(tables, extract_date), } file_name = str(now_timestamp()) + ".json" key = SQL_INV_PREFIX.format(source_id=source_id, date=extract_date) + file_name upload_body_to_s3(S3_BUCKET, key, json.dumps(tables_sqls)) return file_name
def generate_target_sql(self, source_id, extract_date, target_table): """ 根据目标表生产sql,用于只抓取特定的目标表的数据 """ ext_clean_info_models = (db.session.query(ExtCleanInfo).filter( ExtCleanInfo.source_id == source_id, ExtCleanInfo.deleted == False, ExtCleanInfo.target_table.in_(target_table), ).all()) origin_table = set() for model in ext_clean_info_models: if model.origin_table: origin_table.update(model.origin_table.keys()) total_table = [] for table_name in origin_table: tables = (db.session.query(ExtTableInfo).filter( ExtTableInfo.source_id == source_id, ExtTableInfo.weight == 1).filter( or_( ExtTableInfo.table_name.ilike(f"%.{table_name}"), ExtTableInfo.table_name.ilike(table_name), ExtTableInfo.alias_table_name == table_name, )).options(joinedload(ExtTableInfo.datasource)).all()) total_table.extend(tables) tables_sqls = { "type": "tables", "query_date": extract_date, "source_id": source_id, "sqls": self._generate_by_correct_mould(total_table, extract_date), } file_name = str(now_timestamp()) + ".json" key = SQL_PREFIX.format(source_id=source_id, date=extract_date) + file_name upload_body_to_s3(S3_BUCKET, key, json.dumps(tables_sqls)) return file_name