Ejemplo n.º 1
0
class TestBQIntegration():
    def setup_class(self):
        self.dataset_name = "gfluent_bq_dataset"
        self.project_id = os.environ.get("PROJECT_ID")

        self.sql = """
            SELECT
                exchange,
                symbol,
                enabled
            FROM
                market_data.required_products
            LIMIT 5
        """

        self.table_name = f"{self.dataset_name}.testing_bq_table"

        self.bq = BQ(project=self.project_id)

        self.bq.delete_dataset(self.dataset_name)
        self.bq.create_dataset(self.dataset_name, location="EU")

    def teardown_class(self):
        self.bq.delete_dataset(self.dataset_name)

    def test_query(self):
        rows = self.bq.sql(self.sql).query()
        assert rows.total_rows == 5

    def test_query_load(self):

        # load the query result to table
        row_count = (self.bq.table(self.table_name).sql(self.sql).query())

        # should only 5 rows
        assert row_count == 5

        # load again with append
        row_count = (self.bq.table(self.table_name).sql(
            self.sql).mode("WRITE_APPEND").query())

        rows = BQ(
            self.project_id).sql(f"select * from {self.table_name}").query()

        assert rows.total_rows == 10

    def test_truncate(self):
        self.bq.table(self.table_name).truncate()

        rows = BQ(
            self.project_id).sql(f"select * from {self.table_name}").query()

        assert rows.total_rows == 0

    def test_delete(self):
        self.bq.delete()
        assert self.bq.is_exist() is False

    def test_drop(self):
        row_count = (self.bq.table(self.table_name).sql(self.sql).query())
        assert self.bq.is_exist() is True

        self.bq.drop()
        assert self.bq.is_exist() is False

    def test_is_exists(self):
        assert self.bq.is_exist() is False
        row_count = (self.bq.table(self.table_name).sql(self.sql).query())
        assert self.bq.is_exist() is True
        assert row_count == 5

    def test_create_table(self):

        table = f"{self.dataset_name}.students"
        schema = [
            bigquery.SchemaField(name="name",
                                 field_type="STRING",
                                 mode="REQUIRED",
                                 description="student name"),
            bigquery.SchemaField(name="age",
                                 field_type="INTEGER",
                                 mode="REQUIRED",
                                 description="student age"),
        ]
        bq = BQ(project=self.project_id).table(table).schema(schema)

        assert bq.is_exist() is False

        bq.create()

        assert bq.is_exist() is True
Ejemplo n.º 2
0
class TestSheetIntegration():
    def setup_class(self):
        self.dataset_name = "gfluent_sheet_dataset"
        self.project_id = os.environ.get("PROJECT_ID")

        self.bq = BQ(project=self.project_id)

        self.bq.delete_dataset(self.dataset_name)
        self.bq.create_dataset(self.dataset_name, location="EU")

    def teardown_class(self):
        self.bq.delete_dataset(self.dataset_name)

    def test_sheet_load(self):
        sheet = Sheet(os.getenv("GOOGLE_APPLICATION_CREDENTIALS")).sheet_id(
            TEST_UID).worksheet("data!A1:B4")

        self.bq.table(f"{self.dataset_name}.target_table")

        sheet.bq(self.bq).load(location="EU")

        rows = BQ(project=self.project_id).sql(
            f"select * from {self.dataset_name}.target_table").query()

        assert rows.total_rows == 3

    def test_sheet_load_with_schema(self):
        _sheet = "no-header"
        _range = "A:B"
        _tb = "no_header"

        sheet = Sheet(os.getenv("GOOGLE_APPLICATION_CREDENTIALS")).sheet_id(
            TEST_UID).worksheet(_sheet).range(_range).schema([
                bigquery.SchemaField(name="name",
                                     field_type="STRING",
                                     mode="REQUIRED",
                                     description="student name"),
                bigquery.SchemaField(name="age",
                                     field_type="STRING",
                                     mode="REQUIRED",
                                     description="student age as string"),
            ])

        self.bq.table(f"{self.dataset_name}.{_tb}")

        sheet.bq(self.bq).load(location="EU")

        rows = BQ(project=self.project_id).sql(
            f"select * from {self.dataset_name}.{_tb}").query()

        assert rows.total_rows == 3

    def test_sheet_load_exist_exception(self):
        _tb = "duplicated_table_name"

        sheet = Sheet(os.getenv("GOOGLE_APPLICATION_CREDENTIALS")).sheet_id(
            TEST_UID).worksheet("data!A1:B4")

        self.bq.table(f"{self.dataset_name}.{_tb}")

        # create the table
        table = f"{self.dataset_name}.{_tb}"
        schema = [
            bigquery.SchemaField(name="name",
                                 field_type="STRING",
                                 mode="REQUIRED",
                                 description="student name"),
            bigquery.SchemaField(name="age",
                                 field_type="INTEGER",
                                 mode="REQUIRED",
                                 description="student age"),
        ]
        bq = BQ(project=self.project_id).table(table).schema(schema).create()

        with pytest.raises(Conflict):
            sheet.bq(self.bq).load(location="EU")
Ejemplo n.º 3
0
    def test_bq_load_multiple(self):
        bq = BQ(self.project_id)
        bq.table("temp.batch-json").gcs(self.uris)

        bq.load(location="EU")
Ejemplo n.º 4
0
class TestBQ():
    def setup_class(self):
        project = "here-is-project-id"

        self.schema = [
            bigquery.SchemaField("exec_id", "INTEGER", "desc"),
            bigquery.SchemaField("name", "STRING", "desc"),
        ]

        self.bq = BQ(project,
                     table="dataset.table",
                     gcs="gs://abc",
                     sql="select *",
                     schema=self.schema,
                     mode="WRITE_APPEND",
                     create_mode="CREATE_NEVER",
                     ignore="ignored",
                     format="CSV")

    def test_init_with_project_id(self):
        project = "here-is-project-id"
        bq = BQ(project)

        assert project == bq._project
        assert bq._mode == "WRITE_APPEND"
        assert bq._create_mode == "CREATE_IF_NEEDED"
        assert bq._format == "NEWLINE_DELIMITED_JSON"

    def test_init_with_kwargs(self):
        assert self.bq._table == "dataset.table"
        assert self.bq._gcs == "gs://abc"
        assert self.bq._sql == "select *"
        assert self.bq._schema == self.schema
        assert self.bq._mode == "WRITE_APPEND"
        assert self.bq._create_mode == "CREATE_NEVER"
        assert self.bq._format == "CSV"

        with pytest.raises(AttributeError):
            self.bq.ignore

    def test_change_attribute_after_init(self):
        self.bq.table("new.table")
        assert self.bq._table == "new.table"

        self.bq.gcs("gs://new")
        assert self.bq._gcs == "gs://new"

        self.bq.sql("with new")
        assert self.bq._sql == "with new"

        self.bq.mode("WRITE_TRUNCATE")
        assert self.bq._mode == "WRITE_TRUNCATE"

        self.bq.format("NEWLINE_DELIMITED_JSON")
        assert self.bq._format == "NEWLINE_DELIMITED_JSON"

    def test_invalid_gcs(self):
        with pytest.raises(ValueError):
            self.bq.gcs("not_a_gcs_prefix")

        with pytest.raises(ValueError):
            _ = BQ(project="proj_id", gcs="not_a_gcs_prefix")

    def test_invalid_sql(self):
        with pytest.raises(ValueError):
            self.bq.sql("insert into")

        with pytest.raises(ValueError):
            _ = BQ(project="proj_id", sql="delete from ...")

    def test_invalid_schema(self):
        with pytest.raises(TypeError):
            self.bq.schema("not a list")

        with pytest.raises(TypeError):
            _ = BQ(project="proj_id", schema="not a list")

    def test_invalid_mode(self):
        with pytest.raises(ValueError):
            self.bq.mode("wrong mode")

        with pytest.raises(ValueError):
            _ = BQ(project="proj_id", mode="wrong mode")

    def test_invalid_create_mode(self):
        with pytest.raises(ValueError):
            self.bq.create_mode("wrong mode")

        with pytest.raises(ValueError):
            _ = BQ(project="proj_id", create_mode="wrong mode")

    def test_invalid_format(self):
        with pytest.raises(ValueError):
            self.bq.format("wrong format")

        with pytest.raises(ValueError):
            _ = BQ(project="proj_id", format="wrong mode")

    def test_invalid_table_name(self):
        with pytest.raises(ValueError):
            self.bq.table("no_dot_in_table")

        with pytest.raises(ValueError):
            _ = BQ(project="proj_id", table="wrong table name")
Ejemplo n.º 5
0
    def test_bq_load(self):
        bq = BQ(self.project_id)
        bq.table("temp.temp_table").gcs(self.uri)

        bq.load(location="EU")