コード例 #1
0
 def setUp(self):
     self.project_id = os.getenv('PROJECT_ID')
     self.dataset_id = os.getenv('DATASET_ID', 'test_dataset')
     if self.project_id is None:
         print('PROJECT_ID is not defined.')
         sys.exit(1)
     self.bq = BigQuery(self.project_id)
コード例 #2
0
 def setUp(self):
     self.project_id = os.getenv('PROJECT_ID')
     self.dataset_id = os.getenv('DATASET_ID', 'test_dataset')
     if self.project_id is None:
         print('PROJECT_ID is not defined.')
         sys.exit(1)
     self.bq = BigQuery(self.project_id)
     if self.bq.exists_dataset(self.dataset_id):
         self.bq.drop_dataset(self.dataset_id, delete_contents=True)
     self.bq.create_dataset(self.dataset_id)
     self.bq.dataset_id = self.dataset_id    # Set default datasetId
コード例 #3
0
    def setUp(self):
        self.project_id = os.getenv('PROJECT_ID')
        self.dataset_id = os.getenv('DATASET_ID', 'test_dataset')
        self.nested_table_id = os.getenv('TABLE_ID', 'test_table') + '_nested_' + str(int(time.time()))
        self.flat_table_id = os.getenv('TABLE_ID', 'test_table') + '_flat_' + str(int(time.time()))
        self.bucket = os.getenv('BUCKET')
        if self.project_id is None:
            print('PROJECT_ID is not defined.')
            sys.exit(1)
        if self.bucket is None:
            print('BCUKET is not defined.')
            sys.exit(1)
        self.bq = BigQuery(self.project_id)
        if self.bq.exists_dataset(self.dataset_id):
            self.bq.drop_dataset(self.dataset_id, delete_contents=True)
        self.bq.create_dataset(self.dataset_id)
        self.bq.dataset_id = self.dataset_id    # Set default datasetId

        # create & load nested table
        schema = [
            { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' },
            { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' },
            { 'name': 'birth', 'type': 'RECORD', 'mode': 'NULLABLE', 'fields': [
                { 'name': 'year', 'type': 'INTEGER', 'mode': 'REQUIRED' },
                { 'name': 'month', 'type': 'INTEGER', 'mode': 'REQUIRED' },
                { 'name': 'day', 'type': 'INTEGER', 'mode': 'REQUIRED' },
            ]},
            { 'name': 'url', 'type': 'STRING', 'mode': 'REPEATED' },
        ]
        self.bq.create_table(self.nested_table_id, schema=schema)
        rows = [
            { 'id': 1, 'name': 'foo' },
            { 'id': 2, 'name': 'bar', 'birth': { 'year': 2015, 'month': 10, 'day': 28 } },
            { 'id': 3, 'name': 'baz', 'url': [
                'http://www.yahoo.co.jp/',
                'http://www.google.co.jp/',
            ]}
        ]
        self.bq.load(self.nested_table_id, rows)

        # create & load flat table
        schema = [
            { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' },
            { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' },
        ]
        self.bq.create_table(self.flat_table_id, schema=schema)
        rows = [
            { 'id': 1, 'name': 'foo' },
            { 'id': 2, 'name': 'bar' },
            { 'id': 3, 'name': 'baz' },
        ]
        self.bq.load(self.flat_table_id, rows)
 def setUp(self):
     self.project_id = os.getenv('PROJECT_ID')
     self.dataset_id = os.getenv('DATASET_ID', 'test_dataset')
     self.table_id = os.getenv('TABLE_ID', 'test_table') + '_' + str(int(time.time()))
     self.view_id = os.getenv('VIEW_ID', 'test_view') + '_' + str(int(time.time()))
     if self.project_id is None:
         print('PROJECT_ID is not defined.')
         sys.exit(1)
     self.bq = BigQuery(self.project_id)
     if self.bq.exists_dataset(self.dataset_id):
         self.bq.drop_dataset(self.dataset_id, delete_contents=True)
     self.bq.create_dataset(self.dataset_id)
     self.bq.dataset_id = self.dataset_id    # Set default datasetId
コード例 #5
0
 def setUp(self):
     self.project_id = os.getenv("PROJECT_ID")
     self.dataset_id = os.getenv("DATASET_ID", "test_dataset")
     self.table_id = os.getenv("TABLE_ID", "test_table")
     self.view_id = os.getenv("VIEW_ID", "test_view")
     if self.project_id is None:
         print("PROJECT_ID is not defined.")
         sys.exit(1)
     self.bq = BigQuery(self.project_id)
     if self.bq.exists_dataset(self.dataset_id):
         self.bq.drop_dataset(self.dataset_id, delete_contents=True)
     self.bq.create_dataset(self.dataset_id)
     self.bq.dataset_id = self.dataset_id  # Set default datasetId
コード例 #6
0
 def setUp(self):
     self.project_id = os.getenv('PROJECT_ID')
     self.dataset_id = os.getenv('DATASET_ID', 'test_dataset')
     self.table_id = os.getenv('TABLE_ID', 'test_table') + '_' + str(int(time.time()))
     self.view_id = os.getenv('VIEW_ID', 'test_view') + '_' + str(int(time.time()))
     if self.project_id is None:
         print('PROJECT_ID is not defined.')
         sys.exit(1)
     self.bq = BigQuery(self.project_id)
     if self.bq.exists_dataset(self.dataset_id):
         self.bq.drop_dataset(self.dataset_id, delete_contents=True)
     self.bq.create_dataset(self.dataset_id)
     self.bq.dataset_id = self.dataset_id    # Set default datasetId
     schema = [
         { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' },
         { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' },
         { 'name': 'birth', 'type': 'RECORD', 'mode': 'NULLABLE', 'fields': [
             { 'name': 'year', 'type': 'INTEGER', 'mode': 'REQUIRED' },
             { 'name': 'month', 'type': 'INTEGER', 'mode': 'REQUIRED' },
             { 'name': 'day', 'type': 'INTEGER', 'mode': 'REQUIRED' },
         ]},
         { 'name': 'url', 'type': 'STRING', 'mode': 'REPEATED' },
     ]
     self.bq.create_table(self.table_id, schema=schema)
コード例 #7
0
class BigQueryTest(unittest.TestCase):

    def setUp(self):
        self.project_id = os.getenv('PROJECT_ID')
        self.dataset_id = os.getenv('DATASET_ID', 'test_dataset')
        self.nested_table_id = os.getenv('TABLE_ID', 'test_table') + '_nested_' + str(int(time.time()))
        self.flat_table_id = os.getenv('TABLE_ID', 'test_table') + '_flat_' + str(int(time.time()))
        self.bucket = os.getenv('BUCKET')
        if self.project_id is None:
            print('PROJECT_ID is not defined.')
            sys.exit(1)
        if self.bucket is None:
            print('BCUKET is not defined.')
            sys.exit(1)
        self.bq = BigQuery(self.project_id)
        if self.bq.exists_dataset(self.dataset_id):
            self.bq.drop_dataset(self.dataset_id, delete_contents=True)
        self.bq.create_dataset(self.dataset_id)
        self.bq.dataset_id = self.dataset_id    # Set default datasetId

        # create & load nested table
        schema = [
            { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' },
            { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' },
            { 'name': 'birth', 'type': 'RECORD', 'mode': 'NULLABLE', 'fields': [
                { 'name': 'year', 'type': 'INTEGER', 'mode': 'REQUIRED' },
                { 'name': 'month', 'type': 'INTEGER', 'mode': 'REQUIRED' },
                { 'name': 'day', 'type': 'INTEGER', 'mode': 'REQUIRED' },
            ]},
            { 'name': 'url', 'type': 'STRING', 'mode': 'REPEATED' },
        ]
        self.bq.create_table(self.nested_table_id, schema=schema)
        rows = [
            { 'id': 1, 'name': 'foo' },
            { 'id': 2, 'name': 'bar', 'birth': { 'year': 2015, 'month': 10, 'day': 28 } },
            { 'id': 3, 'name': 'baz', 'url': [
                'http://www.yahoo.co.jp/',
                'http://www.google.co.jp/',
            ]}
        ]
        self.bq.load(self.nested_table_id, rows)

        # create & load flat table
        schema = [
            { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' },
            { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' },
        ]
        self.bq.create_table(self.flat_table_id, schema=schema)
        rows = [
            { 'id': 1, 'name': 'foo' },
            { 'id': 2, 'name': 'bar' },
            { 'id': 3, 'name': 'baz' },
        ]
        self.bq.load(self.flat_table_id, rows)

    def TearDown(self):
        self.bq.drop_table(self.nested_table_id)
        self.bq.drop_dataset(self.dataset_id, delete_contents=True)

    def test_normal(self):
        # json
        destination_uris = [
            self.bucket + '/test-*.json'
        ]
        print("extract start (json)")
        res = self.bq.extract(self.nested_table_id, destination_uris)
        pprint(res)
        print("extract end (json)")

        # avro
        destination_uris = [
            self.bucket + '/test-*.avro'
        ]
        print("extract start (avro)")
        res = self.bq.extract(self.nested_table_id, destination_uris)
        pprint(res)
        print("extract end (avro)")

        # csv
        destination_uris = [
            self.bucket + '/test-*.csv'
        ]
        print("extract start (csv)")
        res = self.bq.extract(self.flat_table_id, destination_uris)
        pprint(res)
        print("extract end (csv)")

        # tsv
        destination_uris = [
            self.bucket + '/test-*.tsv'
        ]
        print("extract start (tsv)")
        res = self.bq.extract(self.flat_table_id, destination_uris)
        pprint(res)
        print("extract end (tsv)")

        # json + gz
        destination_uris = [
            self.bucket + '/test-*.json.gz'
        ]
        print("extract start (json+gz)")
        res = self.bq.extract(self.nested_table_id, destination_uris)
        pprint(res)
        print("extract end (json+gz)")

        # csv + gz
        destination_uris = [
            self.bucket + '/test-*.csv.gz'
        ]
        print("extract start (csv+gz)")
        res = self.bq.extract(self.flat_table_id, destination_uris)
        pprint(res)
        print("extract end (csv+gz)")

        # tsv + gz
        destination_uris = [
            self.bucket + '/test-*.tsv.gz'
        ]
        print("extract start (tsv+gz)")
        res = self.bq.extract(self.flat_table_id, destination_uris)
        pprint(res)
        print("extract end (tsv+gz)")
class BigQueryTest(unittest.TestCase):

    def setUp(self):
        self.project_id = os.getenv('PROJECT_ID')
        self.dataset_id = os.getenv('DATASET_ID', 'test_dataset')
        self.table_id = os.getenv('TABLE_ID', 'test_table') + '_' + str(int(time.time()))
        self.view_id = os.getenv('VIEW_ID', 'test_view') + '_' + str(int(time.time()))
        if self.project_id is None:
            print('PROJECT_ID is not defined.')
            sys.exit(1)
        self.bq = BigQuery(self.project_id)
        if self.bq.exists_dataset(self.dataset_id):
            self.bq.drop_dataset(self.dataset_id, delete_contents=True)
        self.bq.create_dataset(self.dataset_id)
        self.bq.dataset_id = self.dataset_id    # Set default datasetId
        #schema = [
        #    { 'name': 'title', 'type': 'STRING', 'mode': 'REQUIRED' },
        #    { 'name': 'count', 'type': 'INTEGER', 'mode': 'REQUIRED' }
        #]
        #self.bq.create_table(self.table_id, schema=schema)

    def TearDown(self):
        self.bq.drop_table(self.table_id)
        self.bq.drop_dataset(self.dataset_id, delete_contents=True)

#    def test_error_allow_large_results(self):
#        schema = [
#            { 'name': 'word', 'type': 'STRING', 'mode': 'REQUIRED' },
#            { 'name': 'word_count', 'type': 'INTEGER', 'mode': 'REQUIRED' },
#        ]
#        self.bq.create_table(self.table_id, schema=schema)
#
#        query = 'SELECT word,word_count FROM [publicdata:samples.shakespeare] LIMIT 10'
#
#        # Cannnot append to required fields when allowLargeResults is True
#        with self.assertRaises(BigQueryError):
#            res = self.bq.insert_from_select(self.table_id, query, allow_large_results=True)

    def test_error_no_required_field(self):
        schema = [
            { 'name': 'title', 'type': 'STRING', 'mode': 'REQUIRED' },
            { 'name': 'unique_words', 'type': 'INTEGER', 'mode': 'REQUIRED' },
            { 'name': 'required_field', 'type': 'STRING', 'mode': 'REQUIRED' },
        ]
        self.bq.create_table(self.table_id, schema=schema)

        query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \
            + 'FROM [publicdata:samples.shakespeare]'

        # No Required Field
        with self.assertRaises(BigQueryError):
            self.bq.insert_from_select(self.table_id, query)

    def test_error_schema_mismatch(self):
        schema = [
            { 'name': 'title', 'type': 'STRING', 'mode': 'REQUIRED' },
            { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' },
        ]
        self.bq.create_table(self.table_id, schema=schema)

        query = 'SELECT title,id FROM [publicdata:samples.wikipedia] LIMIT 10'

        # Can't change type form NULLABLE to REQUIRED
        with self.assertRaises(BigQueryError):
            self.bq.insert_from_select(self.table_id, query)

    def test_normal_allow_large_results(self):
        schema = [
            { 'name': 'word', 'type': 'STRING', 'mode': 'NULLABLE' },
            { 'name': 'word_count', 'type': 'INTEGER', 'mode': 'NULLABLE' },
        ]
        self.bq.create_table(self.table_id, schema=schema)

        query = 'SELECT word,word_count FROM [publicdata:samples.shakespeare] LIMIT 10'
        res = self.bq.insert_from_select(self.table_id, query, allow_large_results=True)
        self.assertTrue(bool(res))
        pprint(res)

        res = self.bq.dump_table(self.table_id)
        self.assertEqual(10, len(res))
        pprint(res)

    def test_normal_insert_into_exists_table(self):
        schema = [
            { 'name': 'word', 'type': 'STRING', 'mode': 'REQUIRED' },
            { 'name': 'word_count', 'type': 'INTEGER', 'mode': 'REQUIRED' },
        ]
        self.bq.create_table(self.table_id, schema=schema)

        query = 'SELECT word,word_count FROM [publicdata:samples.shakespeare] LIMIT 10'
        res = self.bq.insert_from_select(self.table_id, query)
        self.assertTrue(bool(res))
        pprint(res)

        res = self.bq.dump_table(self.table_id)
        self.assertEqual(10, len(res))
        pprint(res)

    def test_normal_with_args(self):
        query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words FROM shakespeare'
        res = self.bq.insert_from_select(
            dest_project_id=self.project_id, dest_dataset_id=self.dataset_id, dest_table_id=self.table_id, query=query,
            src_project_id='publicdata', src_dataset_id='samples'
        )
        self.assertTrue(bool(res))
        pprint(res)

        res = self.bq.dump_table(self.table_id)
        self.assertEqual(10, len(res))
        pprint(res)

    def test_normal_async(self):
        query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \
            + 'FROM [publicdata:samples.shakespeare]'
        res = self.bq.insert_from_select(self.table_id, query, async=True)
        self.assertTrue(re.match(r'job_', res))
        print(res)

        self.bq.wait_job(res)

        res = self.bq.dump_table(self.table_id)
        self.assertEqual(10, len(res))
        pprint(res)

    def test_normal(self):
        query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \
            + 'FROM [publicdata:samples.shakespeare]'
        res = self.bq.insert_from_select(self.table_id, query)
        self.assertTrue(bool(res))
        pprint(res)

        res = self.bq.dump_table(self.table_id)
        self.assertEqual(10, len(res))
        pprint(res)
コード例 #9
0
class BigQueryTest(unittest.TestCase):

    def setUp(self):
        self.project_id = os.getenv('PROJECT_ID')
        self.dataset_id = os.getenv('DATASET_ID', 'test_dataset')
        if self.project_id is None:
            print('PROJECT_ID is not defined.')
            sys.exit(1)
        self.bq = BigQuery(self.project_id)
        if self.bq.exists_dataset(self.dataset_id):
            self.bq.drop_dataset(self.dataset_id, delete_contents=True)
        self.bq.create_dataset(self.dataset_id)
        self.bq.dataset_id = self.dataset_id    # Set default datasetId

    def TearDown(self):
        self.bq.drop_dataset(self.dataset_id, delete_contents=True)

    def test_normal_page_token(self):
        query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \
            + 'FROM [publicdata:samples.shakespeare] '
        res = self.bq.select(query, max_results=1)
        self.assertEqual(10, len(res))
        pprint(res)

    def test_normal_empty(self):
        query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \
            + 'FROM [publicdata:samples.shakespeare] ' \
            + 'WHERE corpus = "hoge" '
        res = self.bq.select(query)
        self.assertEqual(0, len(res))
        pprint(res)

    def test_normal_async(self):
        query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \
            + 'FROM [publicdata:samples.shakespeare]'
        res = self.bq.select(query, async=True)
        self.assertTrue(re.match(r'job_', res))
        pprint(res)

    def test_normal(self):
        query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \
            + 'FROM [publicdata:samples.shakespeare]'
        res = self.bq.select(query)
        self.assertEqual(10, len(res))
        pprint(res)
コード例 #10
0
class BigQueryTest(unittest.TestCase):
    def setUp(self):
        self.project_id = os.getenv("PROJECT_ID")
        self.dataset_id = os.getenv("DATASET_ID", "test_dataset")
        self.table_id = os.getenv("TABLE_ID", "test_table")
        self.view_id = os.getenv("VIEW_ID", "test_view")
        if self.project_id is None:
            print("PROJECT_ID is not defined.")
            sys.exit(1)
        self.bq = BigQuery(self.project_id)
        if self.bq.exists_dataset(self.dataset_id):
            self.bq.drop_dataset(self.dataset_id, delete_contents=True)
        self.bq.create_dataset(self.dataset_id)
        self.bq.dataset_id = self.dataset_id  # Set default datasetId

    def TearDown(self):
        self.bq.drop_dataset(self.dataset_id, delete_contents=True)

    def test_error(self):
        query = "SELECT * FROM " + self.dataset_id + "." + self.table_id
        with self.assertRaises(NotFoundError):
            self.bq.create_view(self.view_id, query)

        schema = [
            {"name": "id", "type": "INTEGER", "mode": "REQUIRED"},
            {"name": "name", "type": "STRING", "mode": "REQUIRED"},
            {
                "name": "birth",
                "type": "RECORD",
                "mode": "NULLABLE",
                "fields": [
                    {"name": "year", "type": "INTEGER", "mode": "REQUIRED"},
                    {"name": "month", "type": "INTEGER", "mode": "REQUIRED"},
                    {"name": "day", "type": "INTEGER", "mode": "REQUIRED"},
                ],
            },
            {"name": "url", "type": "STRING", "mode": "REPEATED"},
        ]

        with self.assertRaises(NotFoundError):
            self.bq.create_table(self.table_id, schema=schema, dataset_id="not_found_dataset")

        self.bq.create_table(self.table_id, schema=schema)

        # "Schema field shouldn't be used as input with a view"
        with self.assertRaises(Http4xxError):
            self.bq.create_view(self.view_id, query, schema=schema)

    def test_normal_with_args(self):
        print("exists table?")
        if self.bq.exists_table(project_id=self.project_id, dataset_id=self.dataset_id, table_id=self.table_id):
            print("exists")

            print("drop table")
            res = self.bq.drop_table(project_id=self.project_id, dataset_id=self.dataset_id, table_id=self.table_id)
            self.assertTrue(bool(res))
        else:
            print("no exists")

        print("create table")
        schema = [
            {"name": "id", "type": "INTEGER", "mode": "REQUIRED"},
            {"name": "name", "type": "STRING", "mode": "REQUIRED"},
            {
                "name": "birth",
                "type": "RECORD",
                "mode": "NULLABLE",
                "fields": [
                    {"name": "year", "type": "INTEGER", "mode": "REQUIRED"},
                    {"name": "month", "type": "INTEGER", "mode": "REQUIRED"},
                    {"name": "day", "type": "INTEGER", "mode": "REQUIRED"},
                ],
            },
            {"name": "url", "type": "STRING", "mode": "REPEATED"},
        ]
        expiration_time = str(int(time.time()) + 86400) + "000"
        res = self.bq.create_table(
            project_id=self.project_id,
            dataset_id=self.dataset_id,
            table_id=self.table_id,
            schema=schema,
            description="Description",
            expiration_time=expiration_time,
            friendly_name="Friendly Name",
        )
        self.assertTrue(bool(res))

        print("info table")
        res = self.bq.info_table(project_id=self.project_id, dataset_id=self.dataset_id, table_id=self.table_id)
        self.assertEqual("Description", res["description"])
        self.assertEqual(expiration_time, res["expirationTime"])
        self.assertEqual("Friendly Name", res["friendlyName"])
        pprint(res)

        print("create view")
        query = "SELECT * FROM " + self.dataset_id + "." + self.table_id
        res = self.bq.create_view(
            project_id=self.project_id, dataset_id=self.dataset_id, table_id=self.view_id, query=query
        )
        self.assertTrue(bool(res))

        print("show tables")
        res = self.bq.show_tables(project_id=self.project_id, dataset_id=self.dataset_id, max_results=1)
        self.assertIn(self.table_id, res)
        self.assertIn(self.view_id, res)
        print("\n".join(res))

        print("drop view")
        res = self.bq.drop_table(self.view_id)
        self.assertFalse(bool(res))

        print("drop table")
        res = self.bq.drop_table(self.table_id)
        self.assertFalse(bool(res))

    def test_normal(self):
        print("exists table?")
        if self.bq.exists_table(self.table_id):
            print("exists")

            print("drop table")
            res = self.bq.drop_table(self.table_id)
            self.assertTrue(bool(res))
        else:
            print("no exists")

        print("create table")
        schema = [
            {"name": "id", "type": "INTEGER", "mode": "REQUIRED"},
            {"name": "name", "type": "STRING", "mode": "REQUIRED"},
            {
                "name": "birth",
                "type": "RECORD",
                "mode": "NULLABLE",
                "fields": [
                    {"name": "year", "type": "INTEGER", "mode": "REQUIRED"},
                    {"name": "month", "type": "INTEGER", "mode": "REQUIRED"},
                    {"name": "day", "type": "INTEGER", "mode": "REQUIRED"},
                ],
            },
            {"name": "url", "type": "STRING", "mode": "REPEATED"},
        ]
        res = self.bq.create_table(self.table_id, schema=schema)
        self.assertTrue(bool(res))

        print("create exists table")
        res = self.bq.create_table(self.table_id, schema=schema)
        self.assertFalse(bool(res))

        print("info table")
        res = self.bq.info_table(self.table_id)
        self.assertTrue(bool(res))
        pprint(res)

        print("create view")
        query = "SELECT * FROM " + self.dataset_id + "." + self.table_id
        self.bq.create_view(self.view_id, query)
        self.assertTrue(bool(res))

        print("info view")
        res = self.bq.info_table(self.view_id)
        self.assertTrue(bool(res))
        pprint(res)

        print("show tables")
        res = self.bq.show_tables()
        self.assertIn(self.table_id, res)
        self.assertIn(self.view_id, res)
        print("\n".join(res))

        print("drop view")
        res = self.bq.drop_table(self.view_id)
        self.assertFalse(bool(res))

        print("drop table")
        res = self.bq.drop_table(self.table_id)
        self.assertFalse(bool(res))

        print("drop no exists table")
        res = self.bq.drop_table(self.table_id)
        self.assertFalse(bool(res))
コード例 #11
0
class BigQueryTest(unittest.TestCase):

    def setUp(self):
        self.project_id = os.getenv('PROJECT_ID')
        self.dataset_id = os.getenv('DATASET_ID', 'test_dataset')
        if self.project_id is None:
            print('PROJECT_ID is not defined.')
            sys.exit(1)
        self.bq = BigQuery(self.project_id)

    def TearDown(self):
        pass

    def test_normal(self):
        print('exists dataset')
        if self.bq.exists_dataset(self.dataset_id):
            print('delete dataset')
            res = self.bq.drop_dataset(self.dataset_id, delete_contents=True)
            self.assertTrue(bool(res))

        print('create dataset')
        res = self.bq.create_dataset(self.dataset_id)
        self.assertTrue(bool(res))

        print('create exists dataset')
        res = self.bq.create_dataset(self.dataset_id)
        self.assertFalse(bool(res))

        print('show datasets')
        res = self.bq.show_datasets()
        self.assertIn(self.dataset_id, res)
        print('\n'.join(res))

        print('delete dataset')
        res = self.bq.drop_dataset(self.dataset_id)
        self.assertFalse(bool(res))

        print('delete no exists dataset')
        res = self.bq.drop_dataset(self.dataset_id)
        self.assertFalse(bool(res))

    def test_normal_with_args(self):
        print('exists dataset: ' + self.dataset_id)
        if self.bq.exists_dataset(self.dataset_id, project_id=self.project_id):
            print('exists')

            print('delete dataset: ' + self.dataset_id)
            res = self.bq.drop_dataset(self.dataset_id, project_id=self.project_id, delete_contents=True)
            self.assertTrue(bool(res))
        else:
            print('no exists')

        print('create dataset')
        access = [
            { 'role': 'OWNER', 'specialGroup': 'projectOwners' },
        ]
        res = self.bq.create_dataset(self.dataset_id, project_id=self.project_id, access=access,
            default_table_expiration_ms=3600000, description='Description', friendly_name='Friendly Name',
            location='EU')
        self.assertTrue(bool(res))

        print('info dataset')
        res = self.bq.info_dataset(self.dataset_id, project_id=self.project_id)
        self.assertEqual(1, len(res['access']))
        self.assertEqual('OWNER', res['access'][0]['role'])
        self.assertEqual('projectOwners', res['access'][0]['specialGroup'])
        self.assertEqual(3600000, int(res['defaultTableExpirationMs']))
        self.assertEqual('Description', res['description'])
        self.assertEqual('Friendly Name', res['friendlyName'])
        self.assertEqual('EU', res['location'])
        pprint(res)

        print('show datasets')
        res = self.bq.show_datasets(project_id=self.project_id, all=True, max_results=10)
        self.assertIn(self.dataset_id, res)
        print('\n'.join(res))

        print('delete dataset: ' + self.dataset_id)
        res = self.bq.drop_dataset(self.dataset_id, delete_contents=True)
        self.assertFalse(bool(res))

    def test_error(self):
        with self.assertRaises(TypeError):
            self.bq.create_dataset()

        with self.assertRaises(ParameterError):
            self.bq.create_dataset(dataset_id=None)
コード例 #12
0
class BigQueryTest(unittest.TestCase):

    def setUp(self):
        self.project_id = os.getenv('PROJECT_ID')
        self.dataset_id = os.getenv('DATASET_ID', 'test_dataset')
        self.table_id = os.getenv('TABLE_ID', 'test_table') + '_' + str(int(time.time()))
        self.view_id = os.getenv('VIEW_ID', 'test_view') + '_' + str(int(time.time()))
        if self.project_id is None:
            print('PROJECT_ID is not defined.')
            sys.exit(1)
        self.bq = BigQuery(self.project_id)
        if self.bq.exists_dataset(self.dataset_id):
            self.bq.drop_dataset(self.dataset_id, delete_contents=True)
        self.bq.create_dataset(self.dataset_id)
        self.bq.dataset_id = self.dataset_id    # Set default datasetId
        schema = [
            { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' },
            { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' },
            { 'name': 'birth', 'type': 'RECORD', 'mode': 'NULLABLE', 'fields': [
                { 'name': 'year', 'type': 'INTEGER', 'mode': 'REQUIRED' },
                { 'name': 'month', 'type': 'INTEGER', 'mode': 'REQUIRED' },
                { 'name': 'day', 'type': 'INTEGER', 'mode': 'REQUIRED' },
            ]},
            { 'name': 'url', 'type': 'STRING', 'mode': 'REPEATED' },
        ]
        self.bq.create_table(self.table_id, schema=schema)

    def TearDown(self):
        self.bq.drop_table(self.table_id)
        self.bq.drop_dataset(self.dataset_id, delete_contents=True)

    def test_error_invalid_rows(self):
        rows = [
            { 'id': 1, 'name': 'foo' },
            { 'id': 2 },
            { 'id': 'three', 'name': 'baz' },
        ]
        with self.assertRaises(BigQueryError):
            self.bq.load(self.table_id, rows)

        res = self.bq.dump_table(self.table_id)
        self.assertEqual(0, len(res))
        pprint(res)

    def test_error_unknown_values(self):
        rows = [
            { 'id': 1, 'name': 'foo', 'unknown_field': 'unknown_value' },
        ]
        with self.assertRaises(BigQueryError):
            self.bq.load(self.table_id, rows)

    def test_normal_unknown_values(self):
        rows = [
            { 'id': 1, 'name': 'foo', 'unknown_field': 'unknown_value' },
        ]
        self.bq.load(self.table_id, rows, ignore_unknown_values=True)

        res = self.bq.dump_table(self.table_id)
        self.assertEqual(1, len(res))
        pprint(res)

    def test_normal_async(self):
        filepath = os.path.dirname(os.path.abspath(__file__)) + '/data.json'
        job_id = self.bq.load(self.table_id, filepath, async=True)
        self.assertTrue(re.match(r'job_', job_id))
        print(job_id)

        while True:
            res = self.bq.info_job(job_id)
            state = res['status']['state']
            print(state)
            if state == 'DONE': break
            time.sleep(2)

        res = self.bq.dump_table(self.table_id)
        self.assertEqual(3, len(res))
        pprint(res)

    def test_normal_from_csv(self):
        filepath = os.path.dirname(os.path.abspath(__file__)) + '/data.csv'
        schema = [
            { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' },
            { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' },
        ]
        res = self.bq.load(self.table_id, filepath, schema=schema, skip_leading_rows=1)
        self.assertTrue(bool(res))

        res = self.bq.dump_table(self.table_id)
        self.assertEqual(3, len(res))
        pprint(res)

    def test_normal_from_json(self):
        filepath = os.path.dirname(os.path.abspath(__file__)) + '/data.json'
        res = self.bq.load(self.table_id, filepath)
        self.assertTrue(bool(res))

        res = self.bq.dump_table(self.table_id)
        self.assertEqual(3, len(res))
        pprint(res)

    def test_normal_from_obj(self):
        rows = [
            { 'id': 1, 'name': 'foo' },
            { 'id': 2, 'name': 'bar', 'birth': { 'year': 2015, 'month': 10, 'day': 28 } },
            { 'id': 3, 'name': 'baz', 'url': [
                'http://www.yahoo.co.jp/',
                'http://www.google.co.jp/',
            ]}
        ]
        res = self.bq.load(self.table_id, rows)
        self.assertTrue(bool(res))

        res = self.bq.dump_table(self.table_id)
        self.assertEqual(3, len(res))
        pprint(res)
コード例 #13
0
class BigQueryTest(unittest.TestCase):

    def setUp(self):
        self.project_id = os.getenv('PROJECT_ID')
        self.dataset_id = os.getenv('DATASET_ID', 'test_dataset')
        self.table_id = os.getenv('TABLE_ID', 'test_table') + '_' + str(int(time.time()))
        self.view_id = os.getenv('VIEW_ID', 'test_view') + '_' + str(int(time.time()))
        if self.project_id is None:
            print('PROJECT_ID is not defined.')
            sys.exit(1)
        self.bq = BigQuery(self.project_id)
        if self.bq.exists_dataset(self.dataset_id):
            self.bq.drop_dataset(self.dataset_id, delete_contents=True)
        self.bq.create_dataset(self.dataset_id)
        self.bq.dataset_id = self.dataset_id    # Set default datasetId
        schema = [
            { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' },
            { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' },
            { 'name': 'birth', 'type': 'RECORD', 'mode': 'NULLABLE', 'fields': [
                { 'name': 'year', 'type': 'INTEGER', 'mode': 'REQUIRED' },
                { 'name': 'month', 'type': 'INTEGER', 'mode': 'REQUIRED' },
                { 'name': 'day', 'type': 'INTEGER', 'mode': 'REQUIRED' },
            ]},
            { 'name': 'url', 'type': 'STRING', 'mode': 'REPEATED' },
        ]
        self.bq.create_table(self.table_id, schema=schema)

    def TearDown(self):
        self.bq.drop_table(self.table_id)
        self.bq.drop_dataset(self.dataset_id, delete_contents=True)

    @staticmethod
    def sigalrm_handler(x, y):
        raise Exception("Timeout")

    def wait_insert(self):
        signal.signal(signal.SIGALRM, BigQueryTest.sigalrm_handler)
        signal.alarm(120)
        while True:
            res = self.bq.dump_table(self.table_id)
            if bool(res): 
                signal.alarm(0)
                return res
            print('sleep...')
            time.sleep(2)

    def test_error(self):
        rows = [
            { 'id': 1, 'name': 'foo' },         # normal
            { 'id': 2 },                        # missing required field
            { 'id': 'three', 'name': 'baz' },   # invalid data type
        ]
        with self.assertRaises(BigQueryError):
            self.bq.insert(self.table_id, rows, ignore_unknown_values=True)

        rows = [
            { 'id': 1, 'name': 'foo' },
            { 'id': 2, 'name': 'bar', 'unknown_fields': 'unknown_value' },
        ]
        with self.assertRaises(BigQueryError):
            self.bq.insert(self.table_id, rows, skip_invalid_rows=True)

    def test_normal_skip_invalid_rows(self):
        rows = [
            { 'id': 1, 'name': 'foo' },         # normal
            { 'id': 2 },                        # missing required field
            { 'id': 'three', 'name': 'baz' },   # invalid data type
        ]
        res = self.bq.insert(self.table_id, rows, skip_invalid_rows=True)
        self.assertTrue(bool(res))

        res = self.wait_insert()
        self.assertEqual(1, len(res))
        pprint(res)

    def test_normal_ignore_unknown_values(self):
        rows = [
            { 'id': 1, 'name': 'foo' },         # normal
            { 'id': 2, 'name': 'bar', 'unknown_field': 'unknown_value' },
        ]
        res = self.bq.insert(self.table_id, rows, ignore_unknown_values=True)
        self.assertTrue(bool(res))
        
        res = self.wait_insert()
        self.assertEqual(2, len(res))
        pprint(res)

    def test_normal(self):
        rows = [
            { 'id': 1, 'name': 'foo' },
            { 'id': 2, 'name': 'bar', 'birth': { 'year': 2015, 'month': 10, 'day': 28 } },
            { 'id': 3, 'name': 'baz', 'url': [
                'http://www.yahoo.co.jp/',
                'http://www.google.co.jp/',
            ]}
        ]
        res = self.bq.insert(self.table_id, rows)
        self.assertTrue(bool(res))

        res = self.wait_insert()
        self.assertEqual(3, len(res))
        pprint(res)