Example #1
0
 def get_connection(self):
     config = conf('config')
     if self.data_source in ['postgresql', 'awsredshift', 'mysql']:
         server, db, user, pw, port = str(config['db_connection']['server']), str(config['db_connection']['db']), \
                                      str(config['db_connection']['user']), str(config['db_connection']['password']),\
                                      int(config['db_connection']['port'])
     if self.data_source == 'mysql':
         from mysql import connector
         self.conn = connector.connect(host=server,
                                       database=db,
                                       user=user,
                                       password=pw)
     if self.data_source in ['postgresql', 'awsredshift']:
         import psycopg2
         self.conn = psycopg2.connect(user=user,
                                      password=pw,
                                      host=server,
                                      port=port,
                                      database=db)
     if self.data_source == 'googlebigquery':
         from google.cloud.bigquery.client import Client
         os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = join(
             conf('data_main_path'), "", config['db_connection']['db'])
         self.conn = Client()
     print("db connection is done!")
Example #2
0
def _diff_tables(work_item: DbDiffWorkItem) -> None:
    bigquery_client = Client(_http=authorized_session())

    table_name = work_item.table_name
    primary_key = f"{work_item.table_name}_id"
    except_clause = f",".join(work_item.except_fields)
    query = f"""
            WITH argo AS (
              SELECT * EXCEPT ({except_clause}) FROM `broad-jade-dev-data.datarepo_{work_item.argo_dataset_name}.{table_name}`
            ),
            dagster AS (
             SELECT * EXCEPT ({except_clause}) FROM `broad-jade-dev-data.datarepo_{work_item.dagster_dataset_name}.{table_name}`
             )
            SELECT * FROM argo FULL OUTER JOIN dagster
            ON argo.{primary_key} = dagster.{primary_key}
            WHERE to_json_string(argo) != to_json_string(dagster)
            """

    result = bigquery_client.query(query, None).result()
    rows = [row for row in result]
    if rows:
        print(f"❌ diff in {table_name}")
        for row in rows:
            print(row)
    else:
        print(f"✅ no diff in {table_name}")
Example #3
0
def _make_client(project="test-project", connection=None):
    from google.cloud.bigquery.client import Client

    if connection is None:
        connection = _make_connection()

    client = Client(project=project,
                    credentials=_make_credentials(),
                    _http=object())
    client._connection = connection
    return client
    def _configure_gcp_client(self, query_job_config):
        """Configure GCP client."""
        logger.info('Storing BigQuery Auth Credentials')
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = SETTINGS.bigquery_credentials_filepath
        logger.info('Creating new query job configuration')
        if query_job_config:
            self.query_job_config = query_job_config
        else:
            self.query_job_config = QueryJobConfig(use_legacy_sql=False,
                                                   use_query_cache=True)

        self.client = Client(default_query_job_config=self.query_job_config)
Example #5
0
def get_google_bigquery_api_client():
    """
    Return the Google BigQuery API client.

    Returns:
        google_bigquery_client: google.cloud.bigquery.client.Client instance.
    """
    service_account_credentials = getattr(
        settings, 'OPR_GOOGLE_SERVICE_ACCOUNT_CREDENTIALS', {})
    google_project_id = getattr(settings, 'OPR_GOOGLE_CLOUD_PROJECT_ID', '')

    if not (service_account_credentials or google_project_id):
        logger.error(
            'Google Service Account credentials or porject ID were not provided.'
        )
        raise GoogleBigQueryInformationError(
            'Google Service Account credentials or porject ID were not provided.'
        )

    credentials = service_account.Credentials.from_service_account_info(
        service_account_credentials,
        scopes=BIGQUERY_API_SCOPES,
    )

    return Client(
        project=google_project_id,
        credentials=credentials,
    )
def bqtk_client() -> Client:
    """BigQuery client as a fixture

    Returns:
        Client: BigQuery client
    """
    return Client(location="EU")
class Bigquery():
    """Base big query class."""

    def __init__(self, query_job_config=None):
        """Initialize big query object."""
        self.client = None
        self.job_query_obj = None

        self._configure_gcp_client(query_job_config)

    def _configure_gcp_client(self, query_job_config):
        """Configure GCP client."""
        logger.info('Storing BigQuery Auth Credentials')
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = SETTINGS.bigquery_credentials_filepath
        logger.info('Creating new query job configuration')
        if query_job_config:
            self.query_job_config = query_job_config
        else:
            self.query_job_config = QueryJobConfig(use_legacy_sql=False,
                                                   use_query_cache=True)

        self.client = Client(default_query_job_config=self.query_job_config)

    def run(self, query):
        """Run the bigquery synchronously."""
        if self.client and query:
            self.job_query_obj = self.client.query(query, job_config=self.query_job_config)
            return self.job_query_obj.job_id
        else:
            raise Exception('Client or query missing')

    def get_result(self):
        """Get the result of the job."""
        assert self.job_query_obj is not None, 'Job is not initialized'
        yield from self.job_query_obj.result()
Example #8
0
def get_gcp_daily_total_cost(client: Client) -> dict:
    """Gets cost data for the past day for all projects.

    Returns:
       Cost data, including currency and date (ISO 8601).

    """
    query = (f"""
        SELECT
          SUM(cost) as cost_sum,
          currency as currency,
          DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY) as date,
        FROM {SETTINGS.PROJECT_ID}.{SETTINGS.DATA_SET}.{SETTINGS.TABLE_NAME}
        WHERE
          CAST(DATE(
            _PARTITIONTIME) AS DATE) = DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
        GROUP BY currency
        LIMIT 1;
        """)
    query_job = client.query(query)
    rows_iter = query_job.result(max_results=1)
    rows = list(rows_iter)

    if rows:
        return {
            'cost_sum': round_cost_value(rows[0].cost_sum),
            'currency': rows[0].currency,
            'date': rows[0].date.isoformat()
        }
    one_month_ago = date.today().replace(day=1) - timedelta(days=1)
    return {  # Default, when there's no data.
        'cost_sum': 0.0,
        'currency': '',
        'date': f'{one_month_ago.year}-{one_month_ago.month}'
    }
Example #9
0
def get_gcp_monthly_total_cost(client: Client) -> dict:
    """Gets cost data for the past month for all projects.

    Returns:
       Cost data, including currency and (month) date (ISO 8601).

    """
    query = (f"""
        SELECT
          SUM(cost) as cost_sum,
          currency as currency,
          TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), MONTH, 'UTC') as month
        FROM {SETTINGS.PROJECT_ID}.{SETTINGS.DATA_SET}.{SETTINGS.TABLE_NAME}
        WHERE
          _PARTITIONTIME BETWEEN TIMESTAMP_TRUNC(
            CURRENT_TIMESTAMP(), MONTH, 'UTC')
          AND TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), DAY, 'UTC')
        GROUP BY currency
        LIMIT 1;
        """)
    query_job = client.query(query)
    rows_iter = query_job.result(max_results=1)
    rows = list(rows_iter)
    return {
        'cost_sum': round_cost_value(rows[0].cost_sum),
        'currency': rows[0].currency,
        'date': rows[0].month.isoformat()
    }
Example #10
0
def main():
    client = Client()
    for example in _find_examples():
        to_delete = []
        print('%-30s: %s' % (example.func_name, example.func_doc))
        try:
            example(client, to_delete)
        except AssertionError as e:
            print('   FAIL: %s' % (e, ))
        except Exception as e:  # pylint: disable=broad-except
            print('  ERROR: %r' % (e, ))
        for item in to_delete:
            item.delete()
Example #11
0
def bigquery_client(
    db: typing.Union[str, mara_db.dbs.BigQueryDB]
) -> 'google.cloud.bigquery.client.Client':
    """Get an bigquery client for a bq database alias"""
    from google.cloud.bigquery.client import Client

    if isinstance(db, str):
        db = mara_db.dbs.db(db)

    credentials = bigquery_credentials(db)

    return Client(project=credentials.project_id,
                  credentials=credentials,
                  location=db.location)
Example #12
0
def text_analytic():
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = (r"C:\Users\MAIH004\My Project-4f1de5feb39a.json")
    bq_client = Client()
    #result = GetSentimentScore()
    text_list = SearchResult.query.all()
    a = []
    sum = 0;
    density = [0] *10
    print(density)
    for i in text_list:
        record = {}
        sum += i.score;
        if (i.score * 100) <= 10:
            density[0] += 1
        if ((i.score * 100) > 10) and ((i.score * 100) <= 20):
            density[1] += 1
        if ((i.score * 100) > 20) and ((i.score * 100) <= 30):
            density[2] += 1
        if ((i.score * 100) > 30) and ((i.score * 100) <= 40):
            density[3] += 1
        if ((i.score * 100) > 40) and ((i.score * 100) <= 50):
            density[4] += 1
        if ((i.score * 100) > 50) and ((i.score * 100) <= 60):
            density[5] += 1
        if ((i.score * 100) > 60) and ((i.score * 100) <= 70):
            density[6] += 1
        if ((i.score * 100) > 70) and ((i.score * 100) <= 80):
            density[7] += 1
        if ((i.score * 100) > 80) and ((i.score * 100) <= 90):
            density[8] += 1
        if ((i.score * 100) > 90) and ((i.score * 100) <= 100):
            density[9] += 1
        record['id'] = i.id
        record['source'] = i.sourceName
        record['title'] = i.title
        record['score'] = i.score
        record['content'] = i.content
        record['url'] = i.url
        record['description'] = i.description
        a.append(record)
    print(density)
    averageScore = round(float(sum / 20),2)
    JSONResult = {}
    JSONResult['average'] = averageScore
    JSONResult['list'] = a
    JSONResult['chartData'] = density
    return json.dumps(JSONResult)
Example #13
0
def get_cost_filter_project_daily_interval(client: Client, project_id: str,
                                           days_ago: int) -> dict:
    """Gets cost data for a specified number of days ago for a specified project.

    Args:
        project_id: The project ID of which to retrieve cost data
        days_ago: The number of days ago from today to retrieve cost data

    Returns:
       Cost data, including currency and date.

    """
    query = (f"""
        SELECT
          SUM(cost) as cost,
          currency as currency,
          TIMESTAMP_TRUNC(TIMESTAMP_SUB(
            CURRENT_TIMESTAMP(), INTERVAL {days_ago} DAY),DAY
          ) as date
        FROM {SETTINGS.PROJECT_ID}.{SETTINGS.DATA_SET}.{SETTINGS.TABLE_NAME}
        WHERE
          _PARTITIONTIME BETWEEN TIMESTAMP_TRUNC(TIMESTAMP_SUB(
            CURRENT_TIMESTAMP(), INTERVAL {days_ago} DAY),DAY
          )
          AND TIMESTAMP_TRUNC(TIMESTAMP_SUB(
            CURRENT_TIMESTAMP(), INTERVAL {days_ago} DAY),DAY
          )
          AND project.id='{project_id}'
        GROUP BY currency
        LIMIT 1;
        """)
    query_job = client.query(query)
    rows_iter = query_job.result(max_results=1)
    rows = list(rows_iter)

    if rows:
        return {
            'cost': round_cost_value(rows[0].cost),
            'currency': rows[0].currency,
            'date': rows[0].date.isoformat()
        }
    return {  # Default, when there's no data.
        'cost': 0.0,
        'currency': '',
        'date': (date.today() - timedelta(days=days_ago)).isoformat()
    }
Example #14
0
def get_project_ids_with_monthly_cost(client: Client) -> list:
    """Gets a list of all project IDs within the billing data from BigQuery.
    Does not include projects for which cost was 0 in the past month.

    Returns:
        Project ID(s)

    """
    query = (f"""
        SELECT DISTINCT project.id AS pid
        FROM {SETTINGS.PROJECT_ID}.{SETTINGS.DATA_SET}.{SETTINGS.TABLE_NAME}
        WHERE
          _PARTITIONTIME BETWEEN TIMESTAMP_TRUNC(
            CURRENT_TIMESTAMP(), MONTH, 'UTC')
          AND TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), DAY, 'UTC')
          AND project.id IS NOT NULL
          AND cost > 0;
        """)
    query_job = client.query(query)
    return [row.pid for row in query_job.result() if row.pid is not None]
Example #15
0
def upload_blob(bucket_name, source_file_name, destination_blob_name):
    from google.cloud import storage
    from google.cloud import bigquery
    from google.cloud.bigquery.client import Client
    creds = '/home/pi/developemnt/vive-gpio/googlecreds.json'
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = creds
    bq_client = Client()

    #client = bigquery.Client.from_service_account_json(os.environ['HOME'] +
    #"/.google/googlecreds.json"
    """Uploads a file to the bucket."""
    print("Connecting to Google Cloud  Stoarage")
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    print("Uploading file")
    blob.upload_from_filename(source_file_name)
    blob.make_public()

    print('File {} uploaded to {}.'.format(source_file_name,
                                           destination_blob_name))
Example #16
0
def get_gcp_project_daily_top_services(
        client: Client,
        project_id: str,
        number: int = SETTINGS.NUMBER_OF_TOP_SERVICES_TO_INVESTIGATE) -> list:
    """Gets data on daily top `number` of the highest costing services on the
    given `project_id`.

    Returns:
       Data on top services and cost

    """
    query = (f"""
        SELECT
          SUM(cost) as cost,
          service.description as service_desc,
          currency as currency,
        FROM {SETTINGS.PROJECT_ID}.{SETTINGS.DATA_SET}.{SETTINGS.TABLE_NAME}
        WHERE
          _PARTITIONTIME BETWEEN TIMESTAMP_TRUNC(
            TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 24 HOUR),DAY)
          AND  TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), DAY)
          AND project.id='{project_id}'
        GROUP BY service_desc, currency
        ORDER BY(cost) DESC
        LIMIT {number};
        """)
    query_job = client.query(query)
    rows_iter = query_job.result()
    top_services = []
    for row in rows_iter:
        top_services.append({
            'service_name': row['service_desc'],
            'cost': round_cost_value(row['cost']),
            'currency': row['currency']
        })
    return top_services
from __future__ import division

import re
import sys
import os
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
import pyaudio
from six.moves import queue
from google.cloud.bigquery.client import Client
import time

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'../auth-goog.json'
bq_client = Client()
# Audio recording parameters
RATE = 24000
CHUNK = int(RATE / 10)  # 100ms


class MicrophoneStream(object):
    """Opens a recording stream as a generator yielding the audio chunks."""
    def __init__(self, rate, chunk):
        self._rate = rate
        self._chunk = chunk

        # Create a thread-safe buffer of audio data
        self._buff = queue.Queue()
        self.closed = True

    def __enter__(self):
Example #18
0
class GetData:
    def __init__(self,
                 data_source=None,
                 date=None,
                 data_query_path=None,
                 time_indicator=None,
                 feature=None,
                 test=None):
        self.data_source = data_source if data_source is not None else 'csv'
        self.data_query_path = data_query_path
        self.time_indicator = time_indicator
        self.feature = feature
        self.conn = None
        self.data = pd.DataFrame()
        self.nrows = test
        self.date = date
        self.query = data_query_path

    def get_connection(self):
        config = conf('config')
        if self.data_source in ['postgresql', 'awsredshift', 'mysql']:
            server, db, user, pw, port = str(config['db_connection']['server']), str(config['db_connection']['db']), \
                                         str(config['db_connection']['user']), str(config['db_connection']['password']),\
                                         int(config['db_connection']['port'])
        if self.data_source == 'mysql':
            from mysql import connector
            self.conn = connector.connect(host=server,
                                          database=db,
                                          user=user,
                                          password=pw)
        if self.data_source in ['postgresql', 'awsredshift']:
            import psycopg2
            self.conn = psycopg2.connect(user=user,
                                         password=pw,
                                         host=server,
                                         port=port,
                                         database=db)
        if self.data_source == 'googlebigquery':
            from google.cloud.bigquery.client import Client
            os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = join(
                conf('data_main_path'), "", config['db_connection']['db'])
            self.conn = Client()
        print("db connection is done!")

    def convert_date(self):
        try:
            self.data[self.time_indicator] = self.data[
                self.time_indicator].apply(lambda x: parse(str(x)))
        except Exception as e:
            print(e)

    def convert_feature(self):
        try:
            self.data[self.feature] = self.data[self.feature].apply(
                lambda x: convert_feature(x))
        except Exception as e:
            print(e)

    def check_data_with_filtering(self):
        if self.data_source in ('json', 'yaml', 'csv'):
            self.query = lambda x: x.query(self.time_indicator + " > '" + str(
                self.date) + "'") if self.date is not None else x
        if self.data_source in [
                'mysql', 'postgresql', 'awsredshift', 'googlebigquery'
        ]:
            if self.date:
                self.query = "SELECT * FROM (" + self.data_query_path + ") AS T WHERE T." + self.time_indicator + " >= '" + str(
                    self.date) + "'   "

    def query_data_source(self):
        self.check_data_with_filtering()

        # import data via pandas
        if self.data_source in ['mysql', 'postgresql', 'awsredshift']:
            self.get_connection()

            self.data = pd.read_sql(
                self.query + " LIMIT " +
                str(self.nrows) if self.nrows else self.query, self.conn)

        # import data via google
        if self.data_source == 'googlebigquery':
            self.get_connection()
            self.data = self.conn.query(
                self.query + " LIMIT " +
                str(self.nrows) if self.nrows else self.query).to_dataframe()

        # import via pandas
        if self.data_source == 'csv':
            try:
                for sep in [',', ';', ':']:

                    self.data = pd.read_csv(filepath_or_buffer=join(
                        conf('data_main_path'), self.data_query_path),
                                            error_bad_lines=False,
                                            encoding="ISO-8859-1",
                                            sep=sep,
                                            nrows=self.nrows)
                    if len(self.data.columns) > 1:
                        break
            except Exception as e:
                print(e)

        if self.data_source == 'json':
            self.data = read_write_to_json(conf('directory'),
                                           self.data_query_path,
                                           None,
                                           is_writing=False)

        if self.data_source == 'yaml':
            self.data = read_yaml(conf('data_main_path'), self.data_query_path)

        if self.data_source in ('json', 'yaml', 'csv'):
            self.data = self.query(self.data)

    def data_execute(self):
        self.query_data_source()
        self.convert_date()
        self.convert_feature()
Example #19
0
import os
from google.cloud.bigquery.client import Client
import google.auth
from google.cloud.bigquery.schema import SchemaField
from google.cloud import bigquery
import datetime, time
import json
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import json

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = 'connect_bigquery.json'
credentials, project = google.auth.default()
credentials = credentials.with_scopes(
    ['https://www.googleapis.com/auth/bigquery'])
client = Client(credentials=credentials)
dataset = client.dataset('ahamove_archive')
sql = """
select 
accept_time,
board_time,
accept_distance as distance,
json_extract_scalar(_extra_props,'$.accept_lat') as accept_lat,
json_extract_scalar(_extra_props,'$.accept_lng')  as accept_lng,
json_extract_scalar(_extra_props,'$.pickup_lat')  as pickup_lat,
json_extract_scalar(_extra_props,'$.pickup_lng')  as pickup_lng,
date_diff (date (o.order_time),date(s.first_complete_time),day) as experience
from ahamove_archive.order_archive o left join  ahamove_archive.supplier s on o.supplier_id = s.id 
where order_time >= '2019-03-01'
and order_time<='2019-03-02'
and o.status = 'COMPLETED'
Example #20
0
from google.cloud.bigquery.client import Client

from pointz import settings

client = Client.from_service_account_json(settings.BIGQUERY_SECRET_JSON)

dataset_pointz = client.dataset('pointz')
import os
import csv

from google.cloud import bigquery
from google.cloud.bigquery.client import Client

###########  Bibliotecas ###############

#import commons functions
from configuration_functions import *

####################################################################

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = get_google_credentials()
client = Client()

hive_conn = hive_connect()


def get_name_database_bigquery():
    return "hive"


def get_name_table_bigquery():
    return "email_hadoop"


def get_schema_email_bigquery():
    return [
        bigquery.SchemaField('id_email', 'STRING', mode='required'),
Example #22
0
# #print(start_object)


# # current date and time
# now = datetime.now()
# timestamp = int(datetime.timestamp(now))
# with open("time.txt", "w") as f:
#     f.write(str(timestamp))
# c = datetime.fromtimestamp(timestamp)
# current_object = c.strftime('%Y-%m-%d %H:%M:%S')
# #print(c)
# print(current_object)


os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'api_key.json'
bq_client = Client()
query = "select full_content,url, publishedAt from `jadgs-262219.newsapi.news_articles`"
# query = "select url, full_content from `jadgs-262219.newsapi.news_articles` where publishedAt >= '" + start_object + "' and publishedAt < '" + current_object + "'"
query_job = bq_client.query(query)
results = query_job.result()

line_counter = 0
embedding_counter = 0

for row in results:
    if row[0] is not None:
        text = row[0]
        url = row[1]
        publishedAt = datetime.timestamp(row[2])
        doc = nlp(text)
        clusters = doc._.coref_clusters
from gensim.sklearn_api import hdp
from gensim.test.utils import datapath
import os
from google.cloud.bigquery.client import Client
from gensim.corpora.dictionary import Dictionary
from gensim.models.ldamodel import LdaModel
import re
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from gensim.models.wrappers import LdaMallet
import json

######################## Fetch Data#########################33
os.environ[
    'GOOGLE_APPLICATION_CREDENTIALS'] = 'School of CIS Publication-c642ecab1ca7.json'
bq_client = Client()

query = (
    "select eid, title, abstract, area from cis_publications.pubs_metadata_by_scopus"
)
pubs_table = bq_client.query(
    query,
    # Location must match that of the dataset(s) referenced in the query.
    location="US",
)  # API request - starts the query

################################# Data Input ######################################
# with open("20191021-pubs.json","r",encoding='utf8') as file:
#     pubs_table = json.load(file)
# Get needed data columns from table into dict:
pubs_data = OrderedDict()
Example #24
0
from google.cloud.bigquery.client import Client
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'projectig.json'
bq_client = Client()
query = """SELECT DISTINCT *  FROM `projectig-247222.projectig.historical`"""
df = bq_client.query(query).to_dataframe()
Example #25
0
             features_speechiness= features[0]['speechiness']
             features_tempo= features[0]['tempo']
             features_time_signature = features[0]['time_signature']
             features_valence = features[0]['valence']
             # Fila a insertar en la tabla
             rows_to_insert = [(artista,artist_followers,str(artist_genres),artist_id,artist_popularity,
                                album_type,album_genres,album_id,album_label,album_name,album_popularity,
                                album_release_date,album_realease_date_precision,track_name,str(track_avalible_markets),
                                str(track_explicit),track_id,track_popularity,track_number,features_acousticness,features_analysis_url,
                                features_danceability,features_duration_ms,features_energy,features_instrumentalness,features_key,features_liveness,
                                features_loudness,features_mode,features_speechiness,features_tempo,features_time_signature,features_valence)]
             # Autenticación
             os.environ[
                 'GOOGLE_APPLICATION_CREDENTIALS'] = '../datos/credenciales/MusicProjectTest-042a5c317e41.json'
             # Conexion con la API de Big Query
             big_query_client = Client()
             # Se establece la BBDD
             dataset_ref = big_query_client.dataset('musicData')
             # establecemos la tabla
             table_ref = dataset_ref.table('datos_spotify_{}'.format(letra_fichero))
             #table_ref = dataset_ref.table('Test') #Para hacer pruebas de insercciones
             table = big_query_client.get_table(table_ref)
             # Insertar datos en Big Query
             errors = big_query_client.insert_rows(table, rows_to_insert)  # API request
             for error in errors:
                 logging.error('Big Query: ' + error)
         except Exception as e:
             logging.error(e)
             logging.error('-- Artista -> %s, album -> %s y cancion -> %s' %(artista, album_id, track_id))
 except Exception as e:
     logging.error(e)
Example #26
0
def tdr_bigquery_client():
    return Client()
Example #27
0
def twitter(keyword):
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = (r"C:\Users\MAIH004\My Project-4f1de5feb39a.json")
    bq_client = Client()
    #search_result = SearchResult.query.all()
    printResearch(keyword)
    return json.dumps('abc')
Example #28
0
def run():
    """Executes the analysis and posts to slack
    """
    client = Client()
    SETTINGS.load_from_environment()
    slack_notify(client)
Example #29
0
def query_bigquery(query_file_location: str, bq_client: Client) -> List[Row]:
    query = read_sql_query(query_file_location)
    query_job = bq_client.query(query)

    return retrieve_query_results(query_job)
Example #30
0
# 
#
# To update data in the already existing BQ table use the code below
#
#

import os
from datetime import timedelta
from datetime import date
from google.cloud import bigquery

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "C:/.../Credentials.json"

from google.cloud.bigquery.client import Client

client = Client()

sql = """
    DELETE FROM `dataset.table_id`
    WHERE date >= '""" + str(start_date) + """'
"""

# Start the query, passing in the extra configuration.
query_job = client.query(sql)  # Make an API request.
query_job.result()  # Wait for the job to complete.

table_id = "project.dataset.table_id"

job_config = bigquery.LoadJobConfig(
    schema=[
        bigquery.SchemaField("name", "STRING"),