Beispiel #1
0
def _make_bqstorage_client(use_bqstorage_api, credentials, client_options):
    if not use_bqstorage_api:
        return None

    try:
        from google.cloud import bigquery_storage
    except ImportError as err:
        customized_error = ImportError(
            "The default BigQuery Storage API client cannot be used, install "
            "the missing google-cloud-bigquery-storage and pyarrow packages "
            "to use it. Alternatively, use the classic REST API by specifying "
            "the --use_rest_api magic option.")
        six.raise_from(customized_error, err)

    try:
        from google.api_core.gapic_v1 import client_info as gapic_client_info
    except ImportError as err:
        customized_error = ImportError(
            "Install the grpcio package to use the BigQuery Storage API.")
        six.raise_from(customized_error, err)

    return bigquery_storage.BigQueryReadClient(
        credentials=credentials,
        client_info=gapic_client_info.ClientInfo(
            user_agent=IPYTHON_USER_AGENT),
        client_options=client_options,
    )
Beispiel #2
0
def compute_bal_for_gas(start_block_timestamp,
                        end_block_timestamp,
                        gas_whitelist,
                        plot=True,
                        verbose=True):
    sql = ''
    with open('src/bal4gas_V1.sql', 'r') as file:
        sql = (file.read().format(start_block_timestamp, end_block_timestamp,
                                  '\',\''.join(gas_whitelist)))
    if verbose:
        print(
            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) +
            ' - Querying Bigquery for eligible V1 swaps and reimbursement values ...'
        )
    client = bigquery.Client()
    bqstorageclient = bigquery_storage.BigQueryReadClient()
    reimbursements = (client.query(sql).result().to_dataframe(
        bqstorage_client=bqstorageclient))
    if verbose:
        print(
            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' - Done!')
    if plot:
        reimbursements.groupby('datetime').mean(
        )['block_median_gas_price'].plot(title='Median gas price')
        plt.show()

    if verbose:
        print(
            f'ETH reimbursements for the week (V1): {sum(reimbursements.eth_reimbursement)}'
        )

    # get BAL:ETH price feed from Coingecko
    bal_eth_coingecko = 'https://api.coingecko.com/api/v3/coins/ethereum/contract/0xba100000625a3754423978a60c9317c58a424e3d/market_chart/range?vs_currency=eth&from={0}&to={1}'.format(
        start_block_timestamp - 7200, end_block_timestamp + 7200)

    baleth_feed = pd.read_json(bal_eth_coingecko)['prices']
    baleth_feed = pd.DataFrame(baleth_feed.tolist(),
                               index=baleth_feed.index,
                               columns=['timestamp', 'price'])
    baleth_feed['datetime'] = pd.to_datetime(baleth_feed['timestamp'] / 1000,
                                             unit='s',
                                             utc=True)
    if plot:
        baleth_feed.plot(x='datetime', y='price', title='BAL:ETH')
        plt.show()

    merge = pd.merge_asof(reimbursements.sort_values(by='datetime'),
                          baleth_feed.sort_values(by='datetime'),
                          on='datetime',
                          direction='nearest')

    merge['bal_reimbursement'] = merge['eth_reimbursement'] / merge['price']
    if verbose:
        print(
            f'BAL reimbursements for the week (V1): {sum(merge.bal_reimbursement)}'
        )
    merge['address'] = merge['address'].apply(Web3.toChecksumAddress)
    return merge
Beispiel #3
0
    def fetch_BQ(self):
        credentials = service_account.Credentials.from_service_account_file('../API/BQ_api.json')
        bqclient = bigquery.Client(credentials=credentials, project=credentials.project_id)
        bqstorageclient = bigquery_storage.BigQueryReadClient(credentials=credentials)

        query_string = f"SELECT Date, Traffic_source, Data_Source_type, Cost, Clicks, Impressions \
                       FROM funnel-248216.marketing_spend.all_funnel_data_view \
                       WHERE Date >= DATE ({self.start_date.year}, {self.start_date.month}, {self.start_date.day}) \
                       AND Date <= DATE ({self.end_date.year}, {self.end_date.month}, {self.end_date.day}) \
                       AND (Campaign_name__TikTok NOT LIKE '%no%' OR Campaign_name__TikTok IS NULL)"

        self.funnel_df = bqclient.query(query_string).result().to_dataframe(bqstorage_client=bqstorageclient)
        print('Read ', len(self.funnel_df), ' datapoints from BigQuery Funnel')
Beispiel #4
0
def get_bq_storage_client():
    """
    Build a BigQueryStorage Python client.

    Returns
    -------
    bigquery_storage.BigQueryReadClient instance
    """
    credentials, project_id = google.auth.default(
        scopes=SCOPES
    )

    bqstorageclient = bigquery_storage.BigQueryReadClient(
        credentials=credentials
    )

    return bqstorageclient
def get_reddit_comments_table():
    """Get data from the BigQuery Reddit comments dataset."""
    # https://www.reddit.com/r/bigquery/comments/3cej2b/17_billion_reddit_comments_loaded_on_bigquery/
    client = bigquery.Client()
    storage_client = bigquery_storage.BigQueryReadClient()
    # 2005 & 2006 have no samples
    table_names = [
        *list(map(str, range(2007, 2015))), *[
            f"{year}_{month:02d}" for year in range(2015, 2020)
            for month in range(1, 13)
        ]
    ]
    for table_name in tqdm(table_names):
        # tqdm.write(f"{table_name}")
        df = client.query(
            QUERY_STRING.format(table_name=table_name)).result().to_dataframe(
                bqstorage_client=storage_client)
        df.to_parquet(DATA_PATH / f"{table_name}.parquet")
    def __init__(self):

        try:
            self.bigquery = bigquery
            self.params = get_db_config(section='gcp')
            credentials = service_account.Credentials.from_service_account_file(
                self.params['credentials_file_path'],
                scopes=["https://www.googleapis.com/auth/cloud-platform"],
            )
            self.client = bigquery.Client(
                credentials=credentials,
                project=credentials.project_id,
            )
            self.storage = bigquery_storage.BigQueryReadClient(
                credentials=credentials)

        except Exception as error:
            raise (error)
Beispiel #7
0
def get_survey_responses(surveyid, client=None):
  """Get data from survey"""
  google.cloud.bigquery.magics.context.use_bqstorage_api = True
  project_id = os.environ.get('PROJECT_ID')
  table_id = os.environ.get('TABLE_ID')
  if client is None:
    client = bigquery.Client(project=project_id)
  bqstorageclient = bigquery_storage.BigQueryReadClient()
  query = f"""
        SELECT CreatedAt, Segmentation, Response
        FROM `{table_id}`
        WHERE ID = @survey_id
    """
  job_config = bigquery.QueryJobConfig(query_parameters=[
      bigquery.ScalarQueryParameter('survey_id', 'STRING', surveyid),
  ])
  query_job = client.query(query, job_config=job_config)
  df = query_job.result().to_dataframe(bqstorage_client=bqstorageclient)
  return df
Beispiel #8
0
def main():
    args = parse_args()

    bq_client = bigquery.Client()
    bqs_client = bigquery_storage.BigQueryReadClient()

    TableInfo.client = bq_client

    dataset = get_dataset(args.project_id, args.dataset_id)

    table_refs = get_table_refs(bq_client, bqs_client, dataset)
    tables_info = get_tables_info(table_refs)
    tables_info = filter_latest_tables_info(tables_info)

    for info in tables_info:
        info.create_dir()
        with info.path.open('w') as f:
            print(f'write {info.clear_name}.view.lkml')
            write_look_ml(f, info)
def test_constructor_w_client_info():
    from google.cloud import bigquery_storage

    class MyTransport:
        def __init__(self, *args, **kwargs):
            self.args = args
            self.kwargs = kwargs

    transport_class_patcher = mock.patch.object(
        bigquery_storage.BigQueryReadClient,
        "get_transport_class",
        return_value=MyTransport,
    )

    with transport_class_patcher:
        client_under_test = bigquery_storage.BigQueryReadClient(
            client_info=client_info.ClientInfo(
                client_library_version="test-client-version"), )

    transport_client_info = client_under_test._transport.kwargs["client_info"]
    user_agent = transport_client_info.to_user_agent()
    assert "test-client-version" in user_agent
def update_blocks(db_engine, config: Config) -> None:
    latest_timestamp = get_latest_timestamp(db_engine, config)
    print(f'Latest Timestamp: {latest_timestamp}')

    print('Connecting to Google Big Query...')
    # This needs the env variable GOOGLE_APPLICATION_CREDENTIALS filled
    # with the path to your credentials file
    credentials, your_project_id = google.auth.default(
        scopes=["https://www.googleapis.com/auth/cloud-platform"])
    bqclient = bigquery.Client(
        credentials=credentials,
        project=your_project_id,
    )
    bqstorageclient = bigquery_storage.BigQueryReadClient(
        credentials=credentials)
    print('Successful')

    query_string = f"""
    SELECT
      timestamp,
      number
    FROM 
      `bigquery-public-data.crypto_ethereum.blocks` 
    WHERE 
      timestamp > '{latest_timestamp}'
    ;
    """

    print("Querying Block Information")
    query_result = (bqclient.query(query_string).result().to_dataframe(
        bqstorage_client=bqstorageclient))
    print(f"Succesful, {len(query_result)} new Blocks found!")

    print("Writing Result to Database")
    query_result.to_sql(config.BLOCKS_TABLE,
                        db_engine,
                        if_exists='append',
                        index=False)
    print("Update completed successfully!")
Beispiel #11
0
def load_data_from_bq(bq_uri: str) -> pd.DataFrame:
    '''
    Loads data from BigQuery table (BQ) to a dataframe

            Parameters:
                    bq_uri (str): bq table uri. i.e: example_project.example_dataset.example_table
            Returns:
                    pandas.DataFrame: a dataframe with the data from GCP loaded
    '''
    if not bq_uri.startswith('bq://'):
        raise Exception(
            "uri is not a BQ uri. It should be bq://project_id.dataset.table")
    logging.info("reading bq data: {}".format(bq_uri))
    project, dataset, table = bq_uri.split(".")
    bqclient = bigquery.Client(project=project[5:])
    bqstorageclient = bigquery_storage.BigQueryReadClient()
    query_string = """
    SELECT * from {ds}.{tbl}
    """.format(ds=dataset, tbl=table)

    return (bqclient.query(query_string).result().to_dataframe(
        bqstorage_client=bqstorageclient))
Beispiel #12
0
def clients():
    # [START bigquerystorage_pandas_tutorial_all]
    # [START bigquerystorage_pandas_tutorial_create_client]
    import google.auth
    from google.cloud import bigquery
    from google.cloud import bigquery_storage

    # Explicitly create a credentials object. This allows you to use the same
    # credentials for both the BigQuery and BigQuery Storage clients, avoiding
    # unnecessary API calls to fetch duplicate authentication tokens.
    credentials, your_project_id = google.auth.default(
        scopes=["https://www.googleapis.com/auth/cloud-platform"])

    # Make clients.
    bqclient = bigquery.Client(
        credentials=credentials,
        project=your_project_id,
    )
    bqstorageclient = bigquery_storage.BigQueryReadClient(
        credentials=credentials)
    # [END bigquerystorage_pandas_tutorial_create_client]
    # [END bigquerystorage_pandas_tutorial_all]
    return bqclient, bqstorageclient
Beispiel #13
0
def query_gbq(_network, _week_number, _pool_list, _excluded_lps_list=[]):
    LOGGER.debug('query_gbq')

    _excluded_lps_list = list(set(_excluded_lps_list + BASE_LP_EXCLUSION_LIST))

    with open(SQL_FILE_PATH, 'r') as file:
        sql = file.read()

    _days_in_week = '3'

    sql = sql.format(
        week_number=_week_number,
        pool_addresses="','".join(_pool_list),
        blocks_table=TABLES_CONFIGS[_network]['blocks'],
        lm_transfers_table=TABLES_CONFIGS[_network]['lm_transfers'],
        lm_state_table=TABLES_CONFIGS[_network]['lm_state'],
        excluded_lps="','".join(_excluded_lps_list),
        days_in_week=_days_in_week)
    client = bigquery.Client()
    bqstorageclient = bigquery_storage.BigQueryReadClient()
    df = (client.query(sql).result().to_dataframe(
        bqstorage_client=bqstorageclient))
    df = df.groupby(['pool_address', 'lp_address', 'block_timestamp']).sum()
    return df
def client_under_test(mock_transport):
    from google.cloud import bigquery_storage

    return bigquery_storage.BigQueryReadClient(transport=mock_transport)
Beispiel #15
0
def bqstorage_client(bigquery_client):
    from google.cloud import bigquery_storage

    return bigquery_storage.BigQueryReadClient(
        credentials=bigquery_client._credentials)
from google.cloud import bigquery_storage
import os

os.environ[
    "GOOGLE_APPLICATION_CREDENTIALS"] = 'D:\medium\example-apis\key\key_bqsa.json'

# Create credentials object for both the BigQuery and BigQuery Storage clients
credentials, project_id = google.auth.default(
    scopes=["https://www.googleapis.com/auth/cloud-platform"])

# Init clients.
bqclient = bigquery.Client(
    credentials=credentials,
    project=project_id,
)
bqstorageclient = bigquery_storage.BigQueryReadClient(credentials=credentials)

# Write a query.
query_string = """
SELECT title, SUM(views) AS views 
FROM `bigquery-public-data.wikipedia.pageviews_2020` 
WHERE DATE(datehour) = '2020-12-25' 
AND WIKI = 'es'
AND TITLE NOT IN 
('Wikipedia:Portada', 'Especial:Buscar')
GROUP BY title
ORDER BY views DESC LIMIT 20
"""

#Get Dataframe
dataframe = (bqclient.query(query_string).result().to_dataframe(
Beispiel #17
0
def download(limit: int = 1000,
             lead=0,
             within: Polygon = None,
             min_samples: int = 4,
             mmsi: list = None,
             min_knots: int = None,
             project_id: str = "master-thesis-305112",
             credentials=None,
             shuffle: bool = False,
             crs="epsg:3857") -> gpd.GeoDataFrame:
    """Creates a query job in Bigquery and downloades the result into a GeoPandas Dataframe
    

    Keyword Arguments:
    limit -- number of results to include. None returns all results.
    within -- coordinate filter, only points within the this polygon is included. None returns all results.
    mmsi -- list-like containing mmsi values to include in the result. None returns all.
    credentials -- google cloud credentials object. None use the google.auth.default.
    project_id -- google cloud project id to use for billing.
    """

    if credentials is None:
        credentials, _ = google.auth.default(
            scopes=["https://www.googleapis.com/auth/cloud-platform"], )

    # Make clients.
    bq = bigquery.Client(
        credentials=credentials,
        project=project_id,
    )
    bqstorage = bigquery_storage.BigQueryReadClient(credentials=credentials)
    lead += 1

    leads = [f"LEAD(sample, {l}) OVER w AS sample_{l}" for l in range(lead)]
    query = f"""
        WITH with_lead AS (
            SELECT mmsi, {", ".join(leads)} FROM `master-thesis-305112.ais.samples` WINDOW w AS (PARTITION BY mmsi ORDER BY sample.timestamp) 
        ) 
    """
    samples = ", ".join([f"sample_{l}.*" for l in range(lead)])
    # Select samples
    query += f"SELECT mmsi, {samples} FROM with_lead "

    # Filter out bad samples
    query += "WHERE TRUE"
    for l in range(lead):
        query += f" AND sample_{l}.timestamp IS NOT NULL "
        if min_knots is not None:
            query += f" AND sample_{l}.sog >= {min_knots} "
        if 0 < l < min_samples:
            query += f"AND TIMESTAMP_DIFF(sample_{l}.timestamp, sample_{l-1}.timestamp, MINUTE) < 15 "

    within = f"AND ST_WITHIN(sample_0.position, ST_GEOGFROMTEXT('{str(within)}'))" if within is not None else ""
    mmsi = "'" + "','".join(mmsi) + "'" if mmsi is not None else None
    mmsi = f"AND CAST(mmsi AS STRING) IN ({mmsi})" if mmsi is not None else ""

    # Additional filters
    query += f"""
                {within}
                {mmsi}
            """
    # Window
    query += "WINDOW w AS (PARTITION BY mmsi ORDER BY sample.timestamp) "
    if shuffle:
        query += "ORDER BY RAND()"
    else:
        query += "ORDER BY mmsi, sample_0.timestamp "
    if limit is not None: query += "LIMIT " + str(limit)
    df = bq.query(query).result().to_dataframe(bqstorage_client=bqstorage)

    # Convert timestamps and positions to correct dtypes
    df.position = gpd.GeoSeries.from_wkt(df.position, crs="wgs84").to_crs(crs)
    df.timestamp = pd.to_datetime(df.timestamp)
    for l in range(1, lead):
        df[f"position_{l}"] = gpd.GeoSeries.from_wkt(df[f"position_{l}"],
                                                     crs="wgs84").to_crs(crs)
        df[f"timestamp_{l}"] = pd.to_datetime(df[f"timestamp_{l}"])
    df = gpd.GeoDataFrame(df, geometry="position")
    return df
import numpy as np, pandas as pd
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score

_PROJECT_ID = os.getenv('GOOGLE_CLOUD_PROJECT')
_DATASET_ID_EQUITY = 'daily_market_data_equity'
_TABLE_ID_DAILY = 'daily'
_FULL_TABLE_ID = '{p}.{d}.{t}'.format(p=_PROJECT_ID,
                                      d=_DATASET_ID_EQUITY,
                                      t=_TABLE_ID_DAILY)
_WRITE_QUEUE_SIZE_THRESHOLD = 4000
_POLYGON_API_KEY = os.environ['API_KEY_POLYGON']
_FINNHUB_API_KEY = os.environ['API_KEY_FINNHUB']

_bigquery_client = bigquery.Client(project=os.getenv('GOOGLE_CLOUD_PROJECT'))
_bqstorage_client = bigquery_storage.BigQueryReadClient()

from polygon import RESTClient
_polygon_client = RESTClient(_POLYGON_API_KEY)

_QUERY = """
    SELECT *
    FROM `trading-290017.daily_market_data_equity.daily_snp500` 
    WHERE TRUE
    AND date >= DATE_SUB(CURRENT_DATE(), INTERVAL 100 DAY)
    ORDER BY date ASC, symbol
"""

_QUERY_SIMFIN = """
    SELECT date, ticker as symbol, close
    FROM `trading-290017.daily_market_data_equity.daily_simfin`
def client(credentials):
    return bigquery_storage.BigQueryReadClient(credentials=credentials)