Python Client.load_table_from_dataframe 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: google.cloud.bigquery

클래스/타입: Client

메소드/함수: load_table_from_dataframe

hotexamples.com에서의 예제들: 5

Python Client.load_table_from_dataframe - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 google.cloud.bigquery.Client.load_table_from_dataframe에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Client(30)

query(30)

get_table(30)

create_table(30)

dataset(30)

create_dataset(20)

delete_table(17)

list_tables(13)

from_service_account_json(11)

delete_dataset(9)

get_dataset(8)

load_table_from_file(8)

list_datasets(8)

load_table_from_uri(7)

list_rows(6)

load_table_from_dataframe(5)

get_job(4)

extract_table(4)

insert_rows(3)

copy_table(3)

update_table(3)

list_jobs(2)

load_table_from_json(2)

schema_from_json(2)

list_projects(1)

insert_rows_from_dataframe(1)

cancel_job(1)

close(1)

run_async_query(1)

예제 #1

파일 보기

def load_parquet(
    client: bigquery.Client,
    dataframe: pandas.DataFrame,
    destination_table_ref: bigquery.TableReference,
    location: Optional[str],
    schema: Optional[Dict[str, Any]],
    billing_project: Optional[str] = None,
):
    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = "WRITE_APPEND"
    job_config.create_disposition = "CREATE_NEVER"
    job_config.source_format = "PARQUET"

    if schema is not None:
        schema = pandas_gbq.schema.remove_policy_tags(schema)
        job_config.schema = pandas_gbq.schema.to_google_cloud_bigquery(schema)
        dataframe = cast_dataframe_for_parquet(dataframe, schema)

    try:
        client.load_table_from_dataframe(
            dataframe,
            destination_table_ref,
            job_config=job_config,
            location=location,
            project=billing_project,
        ).result()
    except pyarrow.lib.ArrowInvalid as exc:
        raise exceptions.ConversionError(
            "Could not convert DataFrame to Parquet.") from exc

예제 #2

파일 보기

def _upload_entity_df_into_bigquery(
    client: Client,
    project: str,
    dataset_name: str,
    entity_df: Union[pandas.DataFrame, str],
) -> Table:
    """Uploads a Pandas entity dataframe into a BigQuery table and returns the resulting table"""

    table_id = _get_table_id_for_new_entity(client, project, dataset_name)

    if type(entity_df) is str:
        job = client.query(f"CREATE TABLE {table_id} AS ({entity_df})")
        job.result()
    elif isinstance(entity_df, pandas.DataFrame):
        # Drop the index so that we dont have unnecessary columns
        entity_df.reset_index(drop=True, inplace=True)

        # Upload the dataframe into BigQuery, creating a temporary table
        job_config = bigquery.LoadJobConfig()
        job = client.load_table_from_dataframe(entity_df,
                                               table_id,
                                               job_config=job_config)
        job.result()
    else:
        raise ValueError(
            f"The entity dataframe you have provided must be a Pandas DataFrame or BigQuery SQL query, "
            f"but we found: {type(entity_df)} ")

    # Ensure that the table expires after some time
    table = client.get_table(table=table_id)
    table.expires = datetime.utcnow() + timedelta(minutes=30)
    client.update_table(table, ["expires"])

    return table

예제 #3

파일 보기

def _upload_entity_df_and_get_entity_schema(
    client: Client, table_name: str, entity_df: Union[pd.DataFrame, str],
) -> Dict[str, np.dtype]:
    """Uploads a Pandas entity dataframe into a BigQuery table and returns the resulting table"""

    if type(entity_df) is str:
        job = client.query(f"CREATE TABLE {table_name} AS ({entity_df})")
        block_until_done(client, job)

        limited_entity_df = (
            client.query(f"SELECT * FROM {table_name} LIMIT 1").result().to_dataframe()
        )

        entity_schema = dict(zip(limited_entity_df.columns, limited_entity_df.dtypes))
    elif isinstance(entity_df, pd.DataFrame):
        # Drop the index so that we dont have unnecessary columns
        entity_df.reset_index(drop=True, inplace=True)
        job = client.load_table_from_dataframe(entity_df, table_name)
        block_until_done(client, job)
        entity_schema = dict(zip(entity_df.columns, entity_df.dtypes))
    else:
        raise InvalidEntityType(type(entity_df))

    # Ensure that the table expires after some time
    table = client.get_table(table=table_name)
    table.expires = datetime.utcnow() + timedelta(minutes=30)
    client.update_table(table, ["expires"])

    return entity_schema

예제 #4

파일 보기

파일: bigquery.py 프로젝트: feast-dev/feast

def _upload_entity_df(
    client: Client,
    table_name: str,
    entity_df: Union[pd.DataFrame, str],
) -> Table:
    """Uploads a Pandas entity dataframe into a BigQuery table and returns the resulting table"""

    if isinstance(entity_df, str):
        job = client.query(f"CREATE TABLE {table_name} AS ({entity_df})")

    elif isinstance(entity_df, pd.DataFrame):
        # Drop the index so that we don't have unnecessary columns
        entity_df.reset_index(drop=True, inplace=True)
        job = client.load_table_from_dataframe(entity_df, table_name)
    else:
        raise InvalidEntityType(type(entity_df))

    block_until_done(client, job)

    # Ensure that the table expires after some time
    table = client.get_table(table=table_name)
    table.expires = datetime.utcnow() + timedelta(minutes=30)
    client.update_table(table, ["expires"])

    return table

예제 #5

파일 보기

파일: ppi_branching.py 프로젝트: all-of-us/curation

    def load_rules_lookup(self,
                          client: bigquery.Client) -> bigquery.job.LoadJob:
        """
        Load rule csv files to a BigQuery table

        :param client: active BigQuery Client object
        :return: the completed LoadJob object
        """
        job_config = bigquery.LoadJobConfig()
        rules_dataframe = self.create_rules_dataframe()
        job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
        job = client.load_table_from_dataframe(rules_dataframe,
                                               destination=self.lookup_table,
                                               job_config=job_config)
        return job.result()