def get_columns(self, connection, table_name, schema=None, **kw):
        schema = schema or self.default_schema_name
        c = connection.execute(
            """SELECT colname, coltype, collength, t3.default, t1.colno FROM
                "informix".syscolumns AS t1 , "informix".systables AS t2 , OUTER "informix".sysdefaults AS t3
                WHERE t1.tabid = t2.tabid AND t2.tabname=? AND t2.owner=?
                  AND t3.tabid = t2.tabid AND t3.colno = t1.colno
                ORDER BY t1.colno""", table_name, schema)

        pk_constraint = self.get_pk_constraint(connection, table_name, schema,
                                               **kw)
        primary_cols = pk_constraint['constrained_columns']

        columns = []
        rows = c.fetchall()
        for name, colattr, collength, default, colno in rows:
            name = name.lower()

            autoincrement = False
            primary_key = False

            if name in primary_cols:
                primary_key = True

            # in 7.31, coltype = 0x000
            #                       ^^-- column type
            #                      ^-- 1 not null, 0 null
            not_nullable, coltype = divmod(colattr, 256)
            if coltype not in (0, 13) and default:
                default = default.split()[-1]

            if coltype == 6:  # Serial, mark as autoincrement
                autoincrement = True

            if coltype == 0 or coltype == 13:  # char, varchar
                coltype = ischema_names[coltype](collength)
                if default:
                    default = "'%s'" % default
            elif coltype == 5:  # decimal
                precision, scale = (collength & 0xFF00) >> 8, collength & 0xFF
                if scale == 255:
                    scale = 0
                coltype = sqltypes.Numeric(precision, scale)
            else:
                try:
                    coltype = ischema_names[coltype]
                except KeyError:
                    util.warn("Did not recognize type '%s' of column '%s'" %
                              (coltype, name))
                    coltype = sqltypes.NULLTYPE

            column_info = dict(name=name,
                               type=coltype,
                               nullable=not not_nullable,
                               default=default,
                               autoincrement=autoincrement,
                               primary_key=primary_key)
            columns.append(column_info)
        return columns
Example #2
0
    def test_success(self):
        column = Column("fees", sqltypes.Numeric(precision=10, scale=5))

        _, django_field = fields.to_django_field(TestTable, column)

        assert isinstance(django_field, models.DecimalField)
        assert django_field.max_digits == 10
        assert django_field.decimal_places == 5
Example #3
0
    def test_fail_is_precision_or_scale_argument_is_missing(self):
        column = Column("fees", sqltypes.Numeric())

        with pytest.raises(errors.MissingDecimalFieldArgument) as err:
            fields.to_django_field(TestTable, column)

        assert err.value.args[0] == (
            "Table `test_table` column `fees`: \n"
            "To define a Decimal column, the argument `precision` and `scale` must be set.\n"
            "Example: Column('fees', Numeric(precision=10, scale=5))\n")
Example #4
0
    def test_fail_if_asdecimal_is_false(self):
        column = Column(
            "fees", sqltypes.Numeric(precision=10, scale=5, asdecimal=False))

        with pytest.raises(errors.InvalidDecimalFieldArgument) as err:
            fields.to_django_field(TestTable, column)

        assert err.value.args[0] == (
            "Table `test_table` column `fees`: \n"
            "To define a Decimal column, the argument `asdecimal` must be kept unset or set to `True` "
            "to ensure that values are returned as python Decimal objects.\n"
            "Example: Column('fees', Numeric(precision=10, scale=5))\n")
Example #5
0
def sql_round(col, n):
    return sql.func.round(sql.cast(col, sa_types.Numeric()), n)
class CorrectionsOutputViewTest(BaseViewTest):
    """Tests the Corrections output view."""

    INPUT_SCHEMA = MockTableSchema({
        **METRIC_CALCULATOR_SCHEMA.data_types,
        "compare_date_partition":
        sqltypes.Date(),
        "compare_value":
        sqltypes.Numeric(),
        "state_code":
        sqltypes.String(255),
    })

    def test_recent_population(self) -> None:
        """Tests the basic use case of calculating population"""
        # Arrange
        self.create_mock_bq_table(
            dataset_id="justice_counts",
            table_id="source_materialized",
            mock_schema=MockTableSchema.from_sqlalchemy_table(
                schema.Source.__table__),
            mock_data=pd.DataFrame([[1, "XX"], [2, "YY"], [3, "ZZ"]],
                                   columns=["id", "name"]),
        )
        self.create_mock_bq_table(
            dataset_id="justice_counts",
            table_id="report_materialized",
            mock_schema=MockTableSchema.from_sqlalchemy_table(
                schema.Report.__table__),
            mock_data=pd.DataFrame(
                [
                    [
                        1,
                        1,
                        "_",
                        "All",
                        "2021-01-01",
                        "xx.gov",
                        "MANUALLY_ENTERED",
                        "John",
                    ],
                    [
                        2,
                        2,
                        "_",
                        "All",
                        "2021-01-02",
                        "yy.gov",
                        "MANUALLY_ENTERED",
                        "Jane",
                    ],
                    [
                        3,
                        3,
                        "_",
                        "All",
                        "2021-01-02",
                        "zz.gov",
                        "MANUALLY_ENTERED",
                        "Jude",
                    ],
                ],
                columns=[
                    "id",
                    "source_id",
                    "type",
                    "instance",
                    "publish_date",
                    "url",
                    "acquisition_method",
                    "acquired_by",
                ],
            ),
        )
        self.create_mock_bq_table(
            dataset_id="justice_counts",
            table_id="metric_calculator",
            mock_schema=self.INPUT_SCHEMA,
            mock_data=pd.DataFrame(
                [
                    row(
                        1,
                        "2021-01-01",
                        "2020-11-30",
                        (FakeState("US_XX"), ),
                        ["A", "B", "A"],
                        3000,
                        measurement_type="INSTANT",
                    ) + (None, None, "US_XX"),
                    row(
                        1,
                        "2021-01-01",
                        "2020-12-31",
                        (FakeState("US_XX"), ),
                        ["B", "B", "C"],
                        4000,
                        measurement_type="INSTANT",
                    ) + (None, None, "US_XX"),
                    row(
                        2,
                        "2021-01-01",
                        "2020-11-30",
                        (FakeState("US_YY"), ),
                        ["A", "B", "A"],
                        1000,
                        measurement_type="INSTANT",
                    ) + (None, None, "US_YY"),
                    row(
                        2,
                        "2021-01-01",
                        "2020-12-31",
                        (FakeState("US_YY"), ),
                        ["B", "B", "C"],
                        1020,
                        measurement_type="INSTANT",
                    ) + (None, None, "US_YY"),
                    row(
                        3,
                        "2021-01-01",
                        "2020-11-30",
                        (FakeState("US_ZZ"), ),
                        ["A", "B", "A"],
                        400,
                        measurement_type="INSTANT",
                    ) + (None, None, "US_ZZ"),
                    row(
                        3,
                        "2021-01-01",
                        "2020-12-31",
                        (FakeState("US_ZZ"), ),
                        ["C", "C", "B"],
                        500,
                        measurement_type="INSTANT",
                    ) + (None, None, "US_ZZ"),
                ],
                columns=self.INPUT_SCHEMA.data_types.keys(),
            ),
        )

        # Act
        dimensions = ["state_code", "metric", "year", "month"]
        prison_population_metric = metric_calculator.CalculatedMetric(
            system=schema.System.CORRECTIONS,
            metric=schema.MetricType.POPULATION,
            filtered_dimensions=[manual_upload.PopulationType.PRISON],
            aggregated_dimensions={
                "state_code":
                metric_calculator.Aggregation(dimension=manual_upload.State,
                                              comprehensive=False)
            },
            output_name="POP",
        )
        results = self.query_view_for_builder(
            corrections_metrics.CorrectionsOutputViewBuilder(
                dataset_id="fake-dataset",
                metric_to_calculate=prison_population_metric,
                input_view=SimpleBigQueryViewBuilder(
                    dataset_id="justice_counts",
                    view_id="metric_calculator",
                    description="metric_calculator view",
                    view_query_template="",
                ),
            ),
            data_types={
                "year": int,
                "month": int,
                "value": int
            },
            dimensions=dimensions,
        )

        # Assert
        expected = pd.DataFrame(
            [
                [
                    "US_XX",
                    "POP",
                    2020,
                    11,
                    datetime.date.fromisoformat("2020-11-30"),
                    "XX",
                    "xx.gov",
                    "_",
                    datetime.date.fromisoformat("2021-01-01"),
                    "INSTANT",
                    ["A", "B"],
                    3000,
                ] + [None] * 4,
                [
                    "US_XX",
                    "POP",
                    2020,
                    12,
                    datetime.date.fromisoformat("2020-12-31"),
                    "XX",
                    "xx.gov",
                    "_",
                    datetime.date.fromisoformat("2021-01-01"),
                    "INSTANT",
                    ["B", "C"],
                    4000,
                ] + [None] * 4,
                [
                    "US_YY",
                    "POP",
                    2020,
                    11,
                    datetime.date.fromisoformat("2020-11-30"),
                    "YY",
                    "yy.gov",
                    "_",
                    datetime.date.fromisoformat("2021-01-02"),
                    "INSTANT",
                    ["A", "B"],
                    1000,
                ] + [None] * 4,
                [
                    "US_YY",
                    "POP",
                    2020,
                    12,
                    datetime.date.fromisoformat("2020-12-31"),
                    "YY",
                    "yy.gov",
                    "_",
                    datetime.date.fromisoformat("2021-01-02"),
                    "INSTANT",
                    ["B", "C"],
                    1020,
                ] + [None] * 4,
                [
                    "US_ZZ",
                    "POP",
                    2020,
                    11,
                    datetime.date.fromisoformat("2020-11-30"),
                    "ZZ",
                    "zz.gov",
                    "_",
                    datetime.date.fromisoformat("2021-01-02"),
                    "INSTANT",
                    ["A", "B"],
                    400,
                ] + [None] * 4,
                [
                    "US_ZZ",
                    "POP",
                    2020,
                    12,
                    datetime.date.fromisoformat("2020-12-31"),
                    "ZZ",
                    "zz.gov",
                    "_",
                    datetime.date.fromisoformat("2021-01-02"),
                    "INSTANT",
                    ["B", "C"],
                    500,
                ] + [None] * 4,
            ],
            columns=[
                "state_code",
                "metric",
                "year",
                "month",
                "date_reported",
                "source_name",
                "source_url",
                "report_name",
                "date_published",
                "measurement_type",
                "raw_source_categories",
                "value",
                "compared_to_year",
                "compared_to_month",
                "value_change",
                "percentage_change",
            ],
        )
        expected = expected.set_index(dimensions)
        assert_frame_equal(expected, results)

    def test_comparisons(self) -> None:
        """Tests that percentage change is correct, or null when the prior value was zero"""
        # Arrange
        self.create_mock_bq_table(
            dataset_id="justice_counts",
            table_id="source_materialized",
            mock_schema=MockTableSchema.from_sqlalchemy_table(
                schema.Source.__table__),
            mock_data=pd.DataFrame([[1, "XX"]], columns=["id", "name"]),
        )
        self.create_mock_bq_table(
            dataset_id="justice_counts",
            table_id="report_materialized",
            mock_schema=MockTableSchema.from_sqlalchemy_table(
                schema.Report.__table__),
            mock_data=pd.DataFrame(
                [[
                    1,
                    1,
                    "_",
                    "All",
                    "2021-01-01",
                    "xx.gov",
                    "MANUALLY_ENTERED",
                    "John",
                ]],
                columns=[
                    "id",
                    "source_id",
                    "type",
                    "instance",
                    "publish_date",
                    "url",
                    "acquisition_method",
                    "acquired_by",
                ],
            ),
        )
        self.create_mock_bq_table(
            dataset_id="justice_counts",
            table_id="metric_calculator",
            mock_schema=self.INPUT_SCHEMA,
            mock_data=pd.DataFrame(
                [
                    row(1, "2021-01-01", "2022-01-01",
                        (FakeState("US_XX"), ), [], 3) +
                    (datetime.date.fromisoformat("2021-02-01"), 0, "US_XX"),
                    row(1, "2021-01-01", "2021-01-01",
                        (FakeState("US_XX"), ), [], 0) +
                    (datetime.date.fromisoformat("2020-02-01"), 2, "US_XX"),
                    row(1, "2021-01-01", "2020-01-01",
                        (FakeState("US_XX"), ), [], 2) + (None, None, "US_XX"),
                ],
                columns=self.INPUT_SCHEMA.data_types.keys(),
            ),
        )

        # Act
        dimensions = ["state_code", "metric", "year", "month"]
        parole_population = metric_calculator.CalculatedMetric(
            system=schema.System.CORRECTIONS,
            metric=schema.MetricType.ADMISSIONS,
            filtered_dimensions=[],
            aggregated_dimensions={
                "state_code":
                metric_calculator.Aggregation(dimension=manual_upload.State,
                                              comprehensive=False)
            },
            output_name="ADMISSIONS",
        )
        results = self.query_view_for_builder(
            corrections_metrics.CorrectionsOutputViewBuilder(
                dataset_id="fake-dataset",
                metric_to_calculate=parole_population,
                input_view=SimpleBigQueryViewBuilder(
                    dataset_id="justice_counts",
                    view_id="metric_calculator",
                    description="metric_calculator view",
                    view_query_template="",
                ),
            ),
            data_types={
                "year": int,
                "month": int,
                "value": int
            },
            dimensions=dimensions,
        )

        # Assert
        expected = pd.DataFrame(
            [
                [
                    "US_XX",
                    "ADMISSIONS",
                    2020,
                    1,
                    datetime.date.fromisoformat("2020-01-31"),
                    "XX",
                    "xx.gov",
                    "_",
                    datetime.date.fromisoformat("2021-01-01"),
                    "INSTANT",
                    [],
                    2,
                    None,
                    None,
                    None,
                    None,
                ],
                [
                    "US_XX",
                    "ADMISSIONS",
                    2021,
                    1,
                    datetime.date.fromisoformat("2021-01-31"),
                    "XX",
                    "xx.gov",
                    "_",
                    datetime.date.fromisoformat("2021-01-01"),
                    "INSTANT",
                    [],
                    0,
                    2020,
                    1,
                    -2,
                    -1.00,
                ],
                # Percentage change is None as prior value was 0
                [
                    "US_XX",
                    "ADMISSIONS",
                    2022,
                    1,
                    datetime.date.fromisoformat("2022-01-31"),
                    "XX",
                    "xx.gov",
                    "_",
                    datetime.date.fromisoformat("2021-01-01"),
                    "INSTANT",
                    [],
                    3,
                    2021,
                    1,
                    3,
                    None,
                ],
            ],
            columns=[
                "state_code",
                "metric",
                "year",
                "month",
                "date_reported",
                "source_name",
                "source_url",
                "report_name",
                "date_published",
                "measurement_type",
                "raw_source_categories",
                "value",
                "compared_to_year",
                "compared_to_month",
                "value_change",
                "percentage_change",
            ],
        )
        expected = expected.set_index(dimensions)
        assert_frame_equal(expected, results)