Example #1
0
 def sync_to_db(cls, name):
     datasource = db.session.query(cls).filter_by(datasource_name=name).first()
     if not datasource:
         db.session.add(cls(datasource_name=name))
     cols = cls.latest_metadata(name)
     if not cols:
         return
     for col in cols:
         col_obj = (
             db.session
             .query(Column)
             .filter_by(datasource_name=name, column_name=col)
             .first()
         )
         datatype = cols[col]['type']
         if not col_obj:
             col_obj = Column(datasource_name=name, column_name=col)
             db.session.add(col_obj)
         if datatype == "STRING":
             col_obj.groupby = True
             col_obj.filterable = True
         if col_obj:
             col_obj.type = cols[col]['type']
         col_obj.generate_metrics()
     db.session.commit()
Example #2
0
    def sync_to_db(cls, name, cluster):
        session = get_session()
        datasource = session.query(cls).filter_by(datasource_name=name).first()
        if not datasource:
            datasource = cls(datasource_name=name)
            session.add(datasource)
        datasource.cluster = cluster

        cols = datasource.latest_metadata()
        if not cols:
            return
        for col in cols:
            col_obj = (
                session
                .query(Column)
                .filter_by(datasource_name=name, column_name=col)
                .first()
            )
            datatype = cols[col]['type']
            if not col_obj:
                col_obj = Column(datasource_name=name, column_name=col)
                session.add(col_obj)
            if datatype == "STRING":
                col_obj.groupby = True
                col_obj.filterable = True
            if col_obj:
                col_obj.type = cols[col]['type']
            col_obj.datasource = datasource
            col_obj.generate_metrics()
Example #3
0
    def sync_to_db(cls, name, cluster):
        session = get_session()
        datasource = session.query(cls).filter_by(datasource_name=name).first()
        if not datasource:
            datasource = cls(datasource_name=name)
            session.add(datasource)
        datasource.cluster = cluster

        cols = datasource.latest_metadata()
        if not cols:
            return
        for col in cols:
            col_obj = (
                session
                .query(Column)
                .filter_by(datasource_name=name, column_name=col)
                .first()
            )
            datatype = cols[col]['type']
            if not col_obj:
                col_obj = Column(datasource_name=name, column_name=col)
                session.add(col_obj)
            if datatype == "STRING":
                col_obj.groupby = True
                col_obj.filterable = True
            if col_obj:
                col_obj.type = cols[col]['type']
            col_obj.datasource = datasource
            col_obj.generate_metrics()
Example #4
0
def ge_suite_to_sqla_columns(suite: str) -> dict:
    expectations = suite["expectations"]

    table_name = suite["expectation_suite_name"].split(".")[0]
    column_names = get_column_names(expectations)

    sqla_columns = []
    for column_name in column_names:
        column = Column(name=column_name)

        all_columns_type_expectations = filter(
            lambda x: x["expectation_type"] ==
            "expect_column_values_to_be_of_type", expectations)
        column_type_expectations = filter(
            lambda x: x["kwargs"]["column"] == column_name,
            all_columns_type_expectations)
        ge_type = list(column_type_expectations)[0]["kwargs"]["type_"]
        kwargs = {}

        if ge_type == "str":
            all_columns_length_expectations = filter(
                lambda x: x["expectation_type"] ==
                "expect_column_value_lengths_to_be_between", expectations)
            column_length_expectations = list(
                filter(lambda x: x["kwargs"]["column"] == column_name,
                       all_columns_length_expectations))

            if len(column_length_expectations) == 1:
                length = column_length_expectations[0]["kwargs"]["max_value"]
            else:
                length = 100

            kwargs = {"length": length}

        column.type = ge_to_sqla_types(ge_type, **kwargs)

        if column_name == "id" or column_name.endswith("_id"):
            column.primary_key = True

        if column_name.endswith(
                "_id") and column_name.split("_")[0] != table_name:
            foreign_table = column_name.split("_")[0]
            column.foreign_keys = [ForeignKey(f"{foreign_table}.id")]

        sqla_columns.append(column)

    return sqla_columns
Example #5
0
 def sync_to_db(cls, name):
     datasource = db.session.query(cls).filter_by(
         datasource_name=name).first()
     if not datasource:
         db.session.add(cls(datasource_name=name))
     cols = cls.latest_metadata(name)
     if not cols:
         return
     for col in cols:
         col_obj = (db.session.query(Column).filter_by(
             datasource_name=name, column_name=col).first())
         datatype = cols[col]['type']
         if not col_obj:
             col_obj = Column(datasource_name=name, column_name=col)
             db.session.add(col_obj)
         if datatype == "STRING":
             col_obj.groupby = True
             col_obj.filterable = True
         if col_obj:
             col_obj.type = cols[col]['type']
         col_obj.generate_metrics()
     db.session.commit()