Beispiel #1
0
    def generate_table_sql(self, source_id, targets, extract_date):
        tables = (db.session.query(ExtTableInfo).filter(
            ExtTableInfo.source_id == source_id, ExtTableInfo.weight == 1,
            ExtTableInfo.inventory_table != 1).options(
                joinedload(ExtTableInfo.datasource)).all())
        tables_sqls = {
            "type": "tables",
            "query_date": extract_date,
            "source_id": source_id,
        }

        total_sqls = {
            table_name.split(".")[-1]: sqls
            for table_name, sqls in self._generate_by_correct_mould(
                tables, extract_date).items()
        }

        target_sqls = {target: total_sqls[target] for target in targets}

        tables_sqls["sqls"] = target_sqls

        file_name = str(now_timestamp()) + ".json"
        key = SQL_PREFIX.format(source_id=source_id,
                                date=extract_date) + file_name
        upload_body_to_s3(S3_BUCKET, key, json.dumps(tables_sqls))
        return file_name
Beispiel #2
0
    def generate_full_sql(self, source_id, extract_date):
        """
        生成sql,全量的首次抓取sql
        where条件中日期字段需要处理
        :param source_id:
        :param extract_date:
        :return:
        """
        tables = (db.session.query(ExtTableInfo).filter(
            ExtTableInfo.source_id == source_id, ExtTableInfo.weight == 1,
            ExtTableInfo.inventory_table != 1).options(
                joinedload(ExtTableInfo.datasource)).all())

        tables_sqls = {
            "type": "full",
            "query_date": extract_date,
            "source_id": source_id,
            "sqls": self._generate_by_correct_mould(tables, extract_date),
        }

        file_name = str(now_timestamp()) + ".json"
        key = SQL_PREFIX.format(source_id=source_id,
                                date=extract_date) + file_name
        upload_body_to_s3(S3_BUCKET, key, json.dumps(tables_sqls))
        return file_name
 def store_target_table_info(self):
     table_info = ExtTargetInfo.query.all()
     data = {
         info.target_table: {
             "target_table": info.target_table,
             "remark": info.remark,
             "sync_column": info.sync_column
             if not info.sync_column
             else info.sync_column.split(","),
             "date_column": info.date_column,
         }
         for info in table_info
     }
     upload_body_to_s3(S3_BUCKET, TARGET_TABLE_KEY, json.dumps(data))
     return data
Beispiel #4
0
 def generate_inventory_sql(self, source_id, extract_date):
     tables = (db.session.query(ExtTableInfo).filter(
         ExtTableInfo.source_id == source_id,
         ExtTableInfo.weight == 1,
         ExtTableInfo.inventory_table == 1,
     ).options(joinedload(ExtTableInfo.datasource)).all())
     tables_sqls = {
         "type": "inventory",
         "source_id": source_id,
         "query_date": extract_date,
         "inv_sqls": self._generate_by_correct_mould(tables, extract_date),
     }
     file_name = str(now_timestamp()) + ".json"
     key = SQL_INV_PREFIX.format(source_id=source_id,
                                 date=extract_date) + file_name
     upload_body_to_s3(S3_BUCKET, key, json.dumps(tables_sqls))
     return file_name
Beispiel #5
0
    def generate_target_sql(self, source_id, extract_date, target_table):
        """
        根据目标表生产sql,用于只抓取特定的目标表的数据
        """
        ext_clean_info_models = (db.session.query(ExtCleanInfo).filter(
            ExtCleanInfo.source_id == source_id,
            ExtCleanInfo.deleted == False,
            ExtCleanInfo.target_table.in_(target_table),
        ).all())
        origin_table = set()
        for model in ext_clean_info_models:
            if model.origin_table:
                origin_table.update(model.origin_table.keys())
        total_table = []
        for table_name in origin_table:
            tables = (db.session.query(ExtTableInfo).filter(
                ExtTableInfo.source_id == source_id,
                ExtTableInfo.weight == 1).filter(
                    or_(
                        ExtTableInfo.table_name.ilike(f"%.{table_name}"),
                        ExtTableInfo.table_name.ilike(table_name),
                        ExtTableInfo.alias_table_name == table_name,
                    )).options(joinedload(ExtTableInfo.datasource)).all())
            total_table.extend(tables)

        tables_sqls = {
            "type": "tables",
            "query_date": extract_date,
            "source_id": source_id,
            "sqls": self._generate_by_correct_mould(total_table, extract_date),
        }
        file_name = str(now_timestamp()) + ".json"
        key = SQL_PREFIX.format(source_id=source_id,
                                date=extract_date) + file_name
        upload_body_to_s3(S3_BUCKET, key, json.dumps(tables_sqls))
        return file_name