コード例 #1
0
class BigqueryRunQuery(bigquery.BigQueryRunQueryTask):
    """Runs a query on Bigquery and saves it to a table.

    Parameters
    ----------
    client: `luigi.contrib.bigquery.BigQueryClient()` instance, optional
        (default is a new instance)
    project: str, optional
        E.g. "my-project-id" (default is PROJECT_ID)
    dataset: str
        Dataset to write to e.g. "my_dataset_id"
    table: str
        Table to write to e.g. "my_table_id"
    query: str
        Query to run e.g. "SELECT 'foo' AS bar"

    References
    ----------
    https://luigi.readthedocs.io/en/stable/api/luigi.contrib.bigquery.html
    """
    client = luigi.Parameter(default=bigquery.BigQueryClient())
    project = luigi.Parameter(default=PROJECT_ID)
    dataset = luigi.Parameter()
    table = luigi.Parameter()
    query = luigi.Parameter(default="""SELECT 'foo' AS bar""")

    def output(self):
        return bigquery.BigQueryTarget(self.project, self.dataset, self.table, client=self.client)
コード例 #2
0
    def setUp(self):
        super(BigQueryGcloudTest, self).setUp()
        self.bq_client = bigquery.BigQueryClient(gcs_test.CREDENTIALS)

        self.table = bigquery.BQTable(project_id=PROJECT_ID,
                                      dataset_id=DATASET_ID,
                                      table_id=self.id().split('.')[-1])
        self.addCleanup(self.bq_client.delete_table, self.table)
コード例 #3
0
    def setUp(self):
        self.gcs_client = gcs.GCSClient(CREDENTIALS)
        self.bq_client = bigquery.BigQueryClient(CREDENTIALS)

        self.table_id = "avro_bq_table"
        self.gcs_dir_url = 'gs://' + BUCKET_NAME + "/foo"
        self.addCleanup(self.gcs_client.remove, self.gcs_dir_url)
        self.addCleanup(
            self.bq_client.delete_dataset,
            bigquery.BQDataset(PROJECT_ID, DATASET_ID, EU_LOCATION))
        self._produce_test_input()
コード例 #4
0
    def setUp(self):
        self.bq_client = bigquery.BigQueryClient(CREDENTIALS)
        self.gcs_client = gcs.GCSClient(CREDENTIALS)

        # Setup GCS input data
        try:
            self.gcs_client.client.buckets().insert(project=PROJECT_ID,
                                                    body={
                                                        'name': BUCKET_NAME,
                                                        'location': EU_LOCATION
                                                    }).execute()
        except googleapiclient.errors.HttpError as ex:
            # todo verify that existing dataset is not US
            if ex.resp.status != 409:  # bucket already exists
                raise

        self.gcs_client.remove(bucket_url(''), recursive=True)
        self.gcs_client.mkdir(bucket_url(''))

        text = '\n'.join(
            map(json.dumps, [{
                'field1': 'hi',
                'field2': 1
            }, {
                'field1': 'bye',
                'field2': 2
            }]))
        self.gcs_file = bucket_url(self.id())
        self.gcs_client.put_string(text, self.gcs_file)

        # Setup BigQuery datasets
        self.table = bigquery.BQTable(project_id=PROJECT_ID,
                                      dataset_id=DATASET_ID,
                                      table_id=self.id().split('.')[-1],
                                      location=None)
        self.table_eu = bigquery.BQTable(project_id=PROJECT_ID,
                                         dataset_id=EU_DATASET_ID,
                                         table_id=self.id().split('.')[-1] +
                                         '_eu',
                                         location=EU_LOCATION)

        self.addCleanup(self.gcs_client.remove, bucket_url(''), recursive=True)
        self.addCleanup(self.bq_client.delete_dataset, self.table.dataset)
        self.addCleanup(self.bq_client.delete_dataset, self.table_eu.dataset)

        self.bq_client.delete_dataset(self.table.dataset)
        self.bq_client.delete_dataset(self.table_eu.dataset)
        self.bq_client.make_dataset(self.table.dataset, body={})
        self.bq_client.make_dataset(self.table_eu.dataset, body={})