def query(self, querystr, fetch=cfg.FETCH_BY_DEFAULT, dest=None, fill=True, overwrite_method='fail'): '''execute any arbitary query on the associated table''' self.fetched = fetch with bqutil.Mask_Printing(): output, source, exceeds_max = raw_query( self.con, querystr, self.last_modified, dest=dest, fetch=fetch, overwrite_method=overwrite_method) new_bqdf = BQDF(self.con, '%s' % bqutil.stringify(source), fill=fill) new_bqdf.local = output new_bqdf.fetched = fetch if exceeds_max: pass # TODO figure how why exceeds_max isn't behaving as expected # print "Number of rows in remote table exceeds bqdf object's # max_rows. Only max_rows have been fetched locally" return new_bqdf
def stream_df_to_remote(con, df, overwrite_method='fail', projectId=None, datasetId=None, tableId=None): '''write pandas dataframe as bigquery table''' schema = {"fields": bqutil.bqjson_from_df(df, dumpjson=False)} dataset_ref = {'datasetId': datasetId, 'projectId': projectId} table_ref = { 'tableId': tableId, 'datasetId': datasetId, 'projectId': projectId } table = { "kind": "bigquery#table", 'tableReference': table_ref, 'schema': schema } try: con.client._apiclient.tables().insert(body=table, **dataset_ref).execute() except: pass datarows = [] for i, row in df.iterrows(): jsondata = {col: row[col] for col in df.columns} datarows.append({"json": jsondata}) body = {'kind': 'bigquery#tableDataInsertAllRequest', 'rows': datarows} update = con.client._apiclient.tabledata().insertAll( body=body, **table_ref).execute() return con, bqutil.stringify(table_ref)
def batch_df_to_remote(con, df, overwrite_method='fail', delete='True', name=None, projectId=None, datasetId=None, tableId=None): '''write pandas dataframe as bigquery table''' schema = {"fields": bqutil.bqjson_from_df(df, dumpjson=False)} table_ref = { 'tableId': tableId, 'datasetId': datasetId, 'projectId': projectId } if overwrite_method == 'append': write_disposition = 'WRITE_APPEND' elif overwrite_method == 'overwrite': write_disposition = 'WRITE_TRUNCATE' else: write_disposition = 'WRITE_EMPTY' df.to_csv(tableId + '.csv', index=False) filename = os.path.join(os.getcwd(), tableId + '.csv') project = bqutil.dictify(self.remote)['projectId'] if name is None: name = datasetId + tableId bqutil.file_to_bucket(con, project, self.bucket, filename, name=name) jobref = bucket_to_bq(con, table_ref, projectId, bucket, name, schema=schema, write_disposition=write_disposition, wait=True) if delete: delete_from_bucket(con, project, bucket, name) return con, bqutil.stringify(table_ref)
def batch_df_to_remote(con, df, overwrite_method='fail', delete='True', name=None, projectId=None, datasetId=None, tableId=None): '''write pandas dataframe as bigquery table''' schema = {"fields": bqutil.bqjson_from_df(df, dumpjson=False)} table_ref = {'tableId': tableId, 'datasetId': datasetId, 'projectId': projectId} if overwrite_method == 'append': write_disposition = 'WRITE_APPEND' elif overwrite_method == 'overwrite': write_disposition = 'WRITE_TRUNCATE' else: write_disposition = 'WRITE_EMPTY' df.to_csv(tableId + '.csv', index=False) filename = os.path.join(os.getcwd(), tableId + '.csv') project = bqutil.dictify(self.remote)['projectId'] if name is None: name = datasetId + tableId bqutil.file_to_bucket(con, project, self.bucket, filename, name=name) jobref = bucket_to_bq(con, table_ref, projectId, bucket, name, schema=schema, write_disposition=write_disposition, wait=True) if delete: delete_from_bucket(con, project, bucket, name) return con, bqutil.stringify(table_ref)
def stream_df_to_remote(con, df, overwrite_method='fail', projectId=None, datasetId=None, tableId=None): '''write pandas dataframe as bigquery table''' schema = {"fields": bqutil.bqjson_from_df(df, dumpjson=False)} dataset_ref = {'datasetId': datasetId, 'projectId': projectId} table_ref = {'tableId': tableId, 'datasetId': datasetId, 'projectId': projectId} table = {"kind": "bigquery#table", 'tableReference': table_ref, 'schema': schema} try: con.client._apiclient.tables().insert( body=table, **dataset_ref).execute() except: pass datarows = [] for i, row in df.iterrows(): jsondata = {col: row[col] for col in df.columns} datarows.append({"json": jsondata}) body = {'kind': 'bigquery#tableDataInsertAllRequest', 'rows': datarows} update = con.client._apiclient.tabledata().insertAll( body=body, **table_ref).execute() return con, bqutil.stringify(table_ref)