Esempio n. 1
0
def stream_df_to_remote(con,
                        df,
                        overwrite_method='fail',
                        projectId=None,
                        datasetId=None,
                        tableId=None):
    '''write pandas dataframe as bigquery table'''
    schema = {"fields": bqutil.bqjson_from_df(df, dumpjson=False)}
    dataset_ref = {'datasetId': datasetId, 'projectId': projectId}
    table_ref = {
        'tableId': tableId,
        'datasetId': datasetId,
        'projectId': projectId
    }
    table = {
        "kind": "bigquery#table",
        'tableReference': table_ref,
        'schema': schema
    }
    try:
        con.client._apiclient.tables().insert(body=table,
                                              **dataset_ref).execute()
    except:
        pass
    datarows = []
    for i, row in df.iterrows():
        jsondata = {col: row[col] for col in df.columns}
        datarows.append({"json": jsondata})

    body = {'kind': 'bigquery#tableDataInsertAllRequest', 'rows': datarows}
    update = con.client._apiclient.tabledata().insertAll(
        body=body, **table_ref).execute()
    return con, bqutil.stringify(table_ref)
Esempio n. 2
0
def batch_df_to_remote(con,
                       df,
                       overwrite_method='fail',
                       delete='True',
                       name=None,
                       projectId=None,
                       datasetId=None,
                       tableId=None):
    '''write pandas dataframe as bigquery table'''
    schema = {"fields": bqutil.bqjson_from_df(df, dumpjson=False)}
    table_ref = {
        'tableId': tableId,
        'datasetId': datasetId,
        'projectId': projectId
    }
    if overwrite_method == 'append':
        write_disposition = 'WRITE_APPEND'
    elif overwrite_method == 'overwrite':
        write_disposition = 'WRITE_TRUNCATE'
    else:
        write_disposition = 'WRITE_EMPTY'
    df.to_csv(tableId + '.csv', index=False)
    filename = os.path.join(os.getcwd(), tableId + '.csv')
    project = bqutil.dictify(self.remote)['projectId']
    if name is None:
        name = datasetId + tableId
    bqutil.file_to_bucket(con, project, self.bucket, filename, name=name)
    jobref = bucket_to_bq(con,
                          table_ref,
                          projectId,
                          bucket,
                          name,
                          schema=schema,
                          write_disposition=write_disposition,
                          wait=True)
    if delete:
        delete_from_bucket(con, project, bucket, name)
    return con, bqutil.stringify(table_ref)
Esempio n. 3
0
File: core.py Progetto: askerry/bqpy
def batch_df_to_remote(con, df, overwrite_method='fail', delete='True', name=None, projectId=None, datasetId=None, tableId=None):
    '''write pandas dataframe as bigquery table'''
    schema = {"fields": bqutil.bqjson_from_df(df, dumpjson=False)}
    table_ref = {'tableId': tableId,
                 'datasetId': datasetId,
                 'projectId': projectId}
    if overwrite_method == 'append':
        write_disposition = 'WRITE_APPEND'
    elif overwrite_method == 'overwrite':
        write_disposition = 'WRITE_TRUNCATE'
    else:
        write_disposition = 'WRITE_EMPTY'
    df.to_csv(tableId + '.csv', index=False)
    filename = os.path.join(os.getcwd(), tableId + '.csv')
    project = bqutil.dictify(self.remote)['projectId']
    if name is None:
        name = datasetId + tableId
    bqutil.file_to_bucket(con, project, self.bucket, filename, name=name)
    jobref = bucket_to_bq(con, table_ref, projectId, bucket, name,
                          schema=schema, write_disposition=write_disposition, wait=True)
    if delete:
        delete_from_bucket(con, project, bucket, name)
    return con, bqutil.stringify(table_ref)
Esempio n. 4
0
File: core.py Progetto: askerry/bqpy
def stream_df_to_remote(con, df, overwrite_method='fail', projectId=None, datasetId=None, tableId=None):
    '''write pandas dataframe as bigquery table'''
    schema = {"fields": bqutil.bqjson_from_df(df, dumpjson=False)}
    dataset_ref = {'datasetId': datasetId,
                   'projectId': projectId}
    table_ref = {'tableId': tableId,
                 'datasetId': datasetId,
                 'projectId': projectId}
    table = {"kind": "bigquery#table",
             'tableReference': table_ref, 'schema': schema}
    try:
        con.client._apiclient.tables().insert(
            body=table, **dataset_ref).execute()
    except:
        pass
    datarows = []
    for i, row in df.iterrows():
        jsondata = {col: row[col] for col in df.columns}
        datarows.append({"json": jsondata})

    body = {'kind': 'bigquery#tableDataInsertAllRequest', 'rows': datarows}
    update = con.client._apiclient.tabledata().insertAll(
        body=body, **table_ref).execute()
    return con, bqutil.stringify(table_ref)