Exemple #1
0
def update_person(client, project_id, dataset_id):
    """
    Populates person table with two additional columns from the ext table

    :param client: bigquery client
    :param project_id: identifies the project
    :param dataset_id: identifies the dataset
    :return:
    """
    person_table_id = f'{project_id}.{dataset_id}.person'
    person_ext_table_id = f'{project_id}.{dataset_id}.person_ext'
    person = client.get_table(person_table_id)
    person_ext = client.get_table(person_ext_table_id)

    # update person schema
    person_ext_schema = person_ext.schema
    person_schema = person.schema
    new_person_schema = person_schema[:]  # Creates a copy of the schema.
    for schema_field in person_ext_schema:
        if schema_field.name == 'state_of_residence_concept_id' or schema_field.name == 'state_of_residence_source_value':
            new_person_schema.append(schema_field)

    person.schema = new_person_schema
    person = client.update_table(person, ["schema"])
    LOGGER.info(f'Updated person table schema to {person.schema}')

    query_job = client.query(query=bq.get_create_or_replace_table_ddl(
        project_id=project_id,
        dataset_id=dataset_id,
        table_id='person',
        schema=person.schema,
        as_query=NEW_PERSON_QUERY_TMPL.render(person=person,
                                              person_ext=person_ext)), )
    query_job.result()
Exemple #2
0
    def test_get_table_ddl(self):
        # Schema is determined by table name
        ddl = bq.get_create_or_replace_table_ddl(self.project_id,
                                                 self.dataset_id,
                                                 'observation').strip()
        self.assertTrue(
            ddl.startswith(
                f'CREATE OR REPLACE TABLE `{self.project_id}.{self.dataset_id}.observation`'
            ))
        self.assertTrue(ddl.endswith(')'))

        # Explicitly provided table name and schema are rendered
        observation_schema = bq.get_table_schema('observation')
        ddl = bq.get_create_or_replace_table_ddl(
            self.project_id,
            self.dataset_id,
            table_id='custom_observation',
            schema=observation_schema).strip()
        self.assertTrue(
            ddl.startswith(
                f'CREATE OR REPLACE TABLE `{self.project_id}.{self.dataset_id}.custom_observation`'
            ))
        # Sanity check that observation schema is rendered
        self.assertTrue(
            all(field.description in ddl for field in observation_schema))
        self.assertTrue(ddl.endswith(')'))

        # Parameter as_query is rendered
        fake_as_query = "SELECT 1 FROM fake"
        ddl = bq.get_create_or_replace_table_ddl(
            self.project_id,
            self.dataset_id,
            'observation',
            as_query=fake_as_query).strip()
        self.assertTrue(
            ddl.startswith(
                f'CREATE OR REPLACE TABLE `{self.project_id}.{self.dataset_id}.observation`'
            ))
        self.assertTrue(ddl.endswith(fake_as_query))
Exemple #3
0
    def backup_rows_to_drop_ddl(self) -> str:
        """
        Get a DDL statement which loads a backup table with rows to be dropped

        :return: the DDL statement
        """
        observation_schema = bq.get_table_schema(OBSERVATION)
        query = BACKUP_ROWS_QUERY.render(lookup_table=self.lookup_table,
                                         src_table=self.observation_table)
        return bq.get_create_or_replace_table_ddl(
            project_id=self.backup_table.project,
            dataset_id=self.backup_table.dataset_id,
            table_id=self.backup_table.table_id,
            schema=observation_schema,
            as_query=query)
Exemple #4
0
    def stage_to_target_ddl(self) -> str:
        """
        Get a DDL statement which drops and creates the observation
        table with rows from stage

        :return: the DDL statement
        """
        observation_schema = bq.get_table_schema(OBSERVATION)
        stage = self.stage_table
        query = f'''SELECT * FROM `{stage.project}.{stage.dataset_id}.{stage.table_id}`'''
        return bq.get_create_or_replace_table_ddl(
            project_id=self.observation_table.project,
            dataset_id=self.observation_table.dataset_id,
            schema=observation_schema,
            table_id=self.observation_table.table_id,
            as_query=query)
Exemple #5
0
    def stage_cleaned_table_ddl(self) -> str:
        """
        Get a DDL statement which stages cleaned table

        Note: This avoids potential partitioning mismatch error
              when directly overwriting observation table

        :return: the DDL statement
        """
        observation_schema = bq.get_table_schema(OBSERVATION)
        query = CLEANED_ROWS_QUERY.render(src=self.observation_table,
                                          backup=self.backup_table)
        return bq.get_create_or_replace_table_ddl(
            project_id=self.stage_table.project,
            dataset_id=self.stage_table.dataset_id,
            table_id=self.stage_table.table_id,
            schema=observation_schema,
            as_query=query)