Example #1
0
def live_df_to_gbq():
    traverse_live_df = live_json_to_df()
    
	# TO DO: uncomment the below line and define where to store your table.

	# project_id = ""
	# dataset_id = ""
	# table_id = ""
	dataset_table_id = dataset_id + "." + table_id

	if not doesTableExist(project_id,dataset_id,table_id):
		try:
			pandas_gbq.to_gbq(traverse_live_df, dataset_table_id, project_id=project_id, if_exists='fail')
		except:
			pass
	else:
		old_traverse_live_df = pandas_gbq.read_gbq("SELECT * from {}".format(dataset_table_id), project_id=project_id)

		for index, row in traverse_live_df.iterrows():
			traverse_live_last_update = row["traverse_end_time"]
			traverse_name = row["traverse_name"]
			old_traverse_live_last_update = old_traverse_live_df.loc[old_traverse_live_df["traverse_name"] == traverse_name, 
																	 "traverse_end_time"].max().tz_localize(None)
			if traverse_live_last_update <= old_traverse_live_last_update:
				traverse_live_df.drop(index, axis=0, inplace=True)

		pandas_gbq.to_gbq(traverse_live_df, dataset_table_id, project_id=project_id, if_exists='append')
Example #2
0
def CB_ATS_Data_import():
    storage_client = storage.Client()

    for blob in storage_client.list_blobs("hc_crackerbarrel_ats", prefix="File_Temp/"):
        match = re.search("/(.+?).csv", str(blob))
        if match:
            dataframe = pd.read_csv('gs://hc_crackerbarrel_ats/File_Upload/' + match.group(1) + ".csv")
            pandas_gbq.to_gbq(dataframe, 'crackerbarrel.crackerbarrel_ats_TEST', project_id='hireclix',
                              if_exists='replace',
                              table_schema=[{'name': 'Person_FullName_FirstLast', 'type': 'STRING'},
                                            {'name': 'ApplicationDate', 'type': 'DATE'},
                                            {'name': 'Job_PositionTitle', 'type': 'STRING'},
                                            {'name': 'SourceChannel', 'type': 'STRING'},
                                            {'name': 'Source', 'type': 'STRING'},
                                            {'name': 'SourceName', 'type': 'STRING'},
                                            {'name': 'Location_Store', 'type': 'INTEGER'},
                                            {'name': 'Location_City', 'type': 'STRING'},
                                            {'name': 'Location_StateProvince', 'type': 'STRING'},
                                            {'name': 'FirstNewSubmissions', 'type': 'DATE'},
                                            {'name': 'FirstInterview', 'type': 'DATE'},
                                            {'name': 'FirstHired', 'type': 'DATE'},
                                            {'name': 'CurrentApplicationStep', 'type': 'STRING'},
                                            {'name': 'Job_RequisitionID', 'type': 'STRING'},
                                            {'name': 'Region', 'type': 'STRING'},
                                            {'name': 'minifiedReqID', 'type': 'STRING'},
                                            {'name': 'SkillPosition', 'type': 'STRING'},
                                            {'name': 'CityState', 'type': 'STRING'}])
Example #3
0
def CreateTable(client, data, dataset_name, table_name, project_id, table_desc, table_annotation=None):
    '''
     Description: This function creates a dataset named dataset_name into the project given
     project_id, with the data_description provided, it overwrites if a table exists with the same name
     Inputs:
         client:BigQueryClient, the Bigquery client that will create the dataset
         data:dataframe, the data that will be saved in the BigQuery table
         dataset_name:string, the dataset where the table will be saved into 
         table_name:string, the name of table that will be created
         project_id:string, the project that the dataset will be saved.
         table_desc:string, the description of the table
         table_annotation:dictionary, the dictionary of table column names and their annotations.
    '''

    dataset_id = client.dataset(dataset_name, project=project_id)
    try:
        dataset=client.get_dataset(dataset_id)
        if table_annotation is None:
            gbq.to_gbq(data, dataset.dataset_id +'.'+ table_name, project_id=project_id, if_exists='replace')

        else:
            gbq.to_gbq(data, dataset.dataset_id +'.'+ table_name, project_id=project_id, table_schema = table_annotation , if_exists='replace')
        print("Table created successfully")

    except:
        print('Table could not be created')
    try:
        table=client.get_table(dataset.dataset_id +'.'+ table_name)
        table.description =table_desc
        table = client.update_table(table, ["description"])
        #print("Table description added successfully")
    except:
        print('Table description could not be updated')
Example #4
0
    def collect(self, base_url, keywords, limit=100, delay_time=1):
        """Collect URLs until there's no more to send to GBQ.

        Args:
            base_url (string): Domain URL for search mechanism.
            keywords (`list` of `int`): List of keywords to search for.
            limit (int, optional): Max number of URLs to collect to GBQ.
            delay_time (int, optional): Number of seconds of delay between
                search page requests.
        Returns:
            Number of URLs collected and sent to GBQ.
        """
        total = 0
        urls_dataframe = pd.DataFrame([])
        for new_urls in self.dataframe_generator(base_url, keywords, limit):
            urls_dataframe.append(new_urls)
            time.sleep(delay_time)
            n = len(new_urls)
            if n:
                print(f"Including {n} URLs to memory storage")
                total += n
        pandas_gbq.to_gbq(urls_dataframe,
                          self.table_id,
                          self.project_id,
                          if_exists='append',
                          table_schema=self.schema)
        print(f"Sending {total} URLs to {self.table_id} on Google Bigquery")
        return total
Example #5
0
def write_metadata_to_gcp(df,
                          table_id,
                          project_id,
                          credentials=None,
                          append=True):
    if append:
        if_exists = "append"
    else:
        if_exists = "fail"

    if credentials:
        pandas_gbq.to_gbq(
            df,
            table_id,
            project_id=project_id,
            credentials=credentials,
            if_exists=if_exists,
        )
    else:
        pandas_gbq.to_gbq(
            df,
            table_id,
            project_id=project_id,
            if_exists=if_exists,
        )

    print(f"{len(df)} rows written to BQ {table_id}, append={append}")
Example #6
0
 def ohlc_to_gbq(self, df, if_exists):
     print(self.pair)
     table_id = 'prices.' + self.pair
     pd_gbq.to_gbq(df,
                   table_id,
                   project_id=self.project_id,
                   if_exists=if_exists)
Example #7
0
def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
           verbose=True, reauth=False, if_exists='fail', private_key=None):
    pandas_gbq = _try_import()
    pandas_gbq.to_gbq(dataframe, destination_table, project_id,
                      chunksize=chunksize,
                      verbose=verbose, reauth=reauth,
                      if_exists=if_exists, private_key=private_key)
Example #8
0
def dk_predictions(request):
    from sklearn.impute import SimpleImputer
    project = os.environ["PROJECT_ID"]
    dataset_base = os.environ["DATASET_BASE"]
    dataset_dfs = os.environ["DATASET_DFS"]
    bucket = os.environ["BUCKET"]
    model_name = os.environ["MODEL"]
    today = (datetime.now() - timedelta(hours=4)).strftime('%Y-%m-%d')
    yesterday = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')

    model = load_pipeline(project_id=project,
                          bucket=bucket,
                          destination_path=model_name,
                          filename=model_name)
    input_run = inputData(project=project,
                          dataset=dataset_base,
                          yesterday=yesterday,
                          today=today)
    df = input_run.run()
    my_imputer = SimpleImputer()
    df["prediction"] = model.predict(
        my_imputer.fit_transform(df.drop(["name", "tm"], axis=1)))
    prediction_df = df[["name", "tm", "prediction"]]
    pandas_gbq.to_gbq(
        prediction_df,
        project_id=project,
        destination_table="{dataset}.mlb_dk_predictions_{dt}".format(
            dataset=dataset_dfs, dt=today.replace("-", "")),
        if_exists="replace")
def upload_to_gbq(results_df_transformed, project_id, dataset_name,
                  table_name):
    """Uploads data into bigquery and appends if data already exists.

    Args:
        results_df_transformed: pandas dataframe with converted schema
        project_id: name of project where you want to upload data
        dataset_name: name of target dataset
        table_name: name of target table

    """
    bigquery_client = bigquery.Client()
    dataset_ref = bigquery_client.dataset(dataset_name)
    table_ref = dataset_ref.table(table_name)
    # job_config = bigquery.job.LoadJobConfig() #configure how the data loads into bigquery
    # job_config.write_disposition = 'WRITE_TRUNCATE' #if table exists, append to it
    # job_config.ignoreUnknownValues = 'T' #ignore columns that don't match destination schema
    # job_config.schema_update_options ='ALLOW_FIELD_ADDITION'
    # TODO: bad request due to schema mismatch with an index field
    # https://github.com/googleapis/google-cloud-python/issues/5572
    # bigquery_client.load_table_from_dataframe(results_df_transformed,
    # table_ref, num_retries = 5, job_config = job_config).result()
    gbq.to_gbq(
        results_df_transformed,
        dataset_name + "." + table_name,
        project_id,
        if_exists="append",
        location="US",
        progress_bar=True,
    )
    logger.info(f"Data uploaded into: {table_ref.path}")
Example #10
0
def device_df_to_gbq():
    """
    This function is responsible for returning a Google BigQuery object of all the data related to devices tracking
    Brussels' traffic. The function first verifies that the table doesn't already exist. If it doesn't, we shall create
    it. If it does, then it shall check if the dimension of the data has changed. If the dimension changed, we replace
    the existing entries by the newest retrieved value.

    :return: a gbq object of all the traffic tracking devices in Brussels
    """
    traverse_devices_df = device_json_to_df(get_device_data())
    
    # TO DO: uncomment the below lines and define where to store your table.
	
	# project_id = ""
	# dataset_id = ""
	# table_id = ""
	dataset_table_id = dataset_id + "." + table_id

	if not does_table_exist(project_id, dataset_id, table_id):
		try:
			pandas_gbq.to_gbq(traverse_devices_df, dataset_table_id, project_id=project_id, if_exists='fail')
		except:
			pass
	else:
		old_traverse_devices_df = pandas_gbq.read_gbq("SELECT traverse_name from {}".format(dataset_table_id), project_id = project_id)
		if traverse_devices_df.shape[0] != old_traverse_devices_df.shape[0]:
			pandas_gbq.to_gbq(traverse_devices_df, dataset_table_id, project_id=project_id, if_exists='replace')
Example #11
0
def _update_weather(project_id, table_id, schema):
    """Update weather table in database

    Queries the a Box file maintained by Bill Petti for gamneday weather,
        then inserts the data in the `weather` table. Data can be found
        here: https://app.box.com/v/gamedayboxscoredata

    Args:
        project_id (str): A BigQuery project to write to.
        table_id (str): A BigQuery table to write to. Should be formatted as
            `dataset_id.table_id`.
        schema (dict): A schema dictionary for the target table.
    """

    uri = API_CONFIG['weather']
    dtypes = schema_to_dtypes(schema)
    dtypes[
        'attendance'] = 'str'  # `attendance`` uses commas as a thousand-separator

    data = _get_data(uri, dtypes=dtypes)

    if not data.empty:
        data = prep_weather(data, schema)

        gbq_schema = schema_subset(schema, ['name', 'type'])
        to_gbq(dataframe=data,
               destination_table=table_id,
               project_id=project_id,
               if_exists='replace',
               table_schema=gbq_schema,
               chunksize=5000,
               location='US',
               private_key=SERVICE_ACCOUNT_PATH)
Example #12
0
    def df_to_sql(cls, df: pd.DataFrame, **kwargs):
        """
        Upload data from a Pandas DataFrame to BigQuery. Calls
        `DataFrame.to_gbq()` which requires `pandas_gbq` to be installed.

        :param df: Dataframe with data to be uploaded
        :param kwargs: kwargs to be passed to to_gbq() method. Requires both `schema
        and ``name` to be present in kwargs, which are combined and passed to
        `to_gbq()` as `destination_table`.
        """
        try:
            import pandas_gbq
        except ImportError:
            raise Exception(
                "Could not import the library `pandas_gbq`, which is "
                "required to be installed in your environment in order "
                "to upload data to BigQuery"
            )

        if not ("name" in kwargs and "schema" in kwargs):
            raise Exception("name and schema need to be defined in kwargs")
        gbq_kwargs = {}
        gbq_kwargs["project_id"] = kwargs["con"].engine.url.host
        gbq_kwargs["destination_table"] = f"{kwargs.pop('schema')}.{kwargs.pop('name')}"

        # Only pass through supported kwargs
        supported_kwarg_keys = {"if_exists"}
        for key in supported_kwarg_keys:
            if key in kwargs:
                gbq_kwargs[key] = kwargs[key]
        pandas_gbq.to_gbq(df, **gbq_kwargs)
Example #13
0
def _update_players_historical(project_id, table_id, schema):
    """Update players table in database

    Queries the Baseball Prospectus CSV API for every player in MLB history,
        then inserts the data in the `players` table. 

    Args:
        project_id (str): A BigQuery project to write to.
        table_id (str): A BigQuery table to write to. Should be formatted as
            `dataset_id.table_id`.
        schema (dict): A schema dictionary for the target table.
    """

    uri = API_CONFIG['players_historical']
    dtypes = schema_to_dtypes(schema)
    data = _get_data(uri, dtypes=dtypes, encoding='ISO-8859-1')

    if not data.empty:
        data = prep_players_historical(data, schema)

        gbq_schema = schema_subset(schema, ['name', 'type'])
        to_gbq(dataframe=data,
               destination_table=table_id,
               project_id=project_id,
               if_exists='replace',
               table_schema=gbq_schema,
               chunksize=5000,
               location='US',
               private_key=SERVICE_ACCOUNT_PATH)
Example #14
0
def write_gbq_performance_table():
    df = get_quarterly_vintage_data()
    pandas_gbq.to_gbq(df,
                      'Group_CapMarkets.{}_vintage_data'.format(
                          one_month_back_str()),
                      project_id=project,
                      if_exists='replace')
Example #15
0
def upload_outdata(df, name):
    print(df)
    pandas_gbq.to_gbq(df,
                      name,
                      project_id=projectid,
                      table_schema=bq_schema,
                      if_exists='replace')
Example #16
0
def insertScoreResultData(InsertDataFrame, ProjectId, DatasetId, TableId):
    """Function for inserting data to BigQuery database

    Args:
        InsertDataFrame - pandas dtaframe object, with score result data by statuses
        ProjectId - string, name of project in google cloud platform 
        DatasetId - string, name of dataset in bigquery for raw data
        TableId - string, name of table for raw data

    Example:
        InsertDataFrame = assignee_id    status  count_last_period   count_mean_calc_period  count_sem_calc_period   score_value
                          11527290367    closed  163                 140.38                  12.4                    2
                          11527290367    solved  0                   0.00                    0.0                     0
        >>> insertScoreResultData(InsertDataFrame, 'test-gcp-project', 'test_dataset', 'test_table')
    """
    destination_table = f"{DatasetId}.{TableId}"

    types_mapper = {
        'int64': 'int',
        'float64': 'float',
    }

    res_df = pd.DataFrame()
    for column in InsertDataFrame.columns:
        column_type = str(InsertDataFrame[column].dtype)
        db_column_type = types_mapper.get(column_type, default='str')
        res_df[column] = InsertDataFrame[column].astype(db_column_type)

    res_df['developer'] = 'kirill.bayandin'
    res_df['developer'] = res_df['developer'].astype('str')

    pandas_gbq.to_gbq(res_df,
                      destination_table=destination_table,
                      project_id=ProjectId,
                      if_exists='append')
Example #17
0
def to_gbq(dataframe,
           destination_table,
           project_id=None,
           chunksize=None,
           reauth=False,
           if_exists='fail',
           auth_local_webserver=False,
           table_schema=None,
           location=None,
           progress_bar=True,
           credentials=None,
           verbose=None,
           private_key=None):
    pandas_gbq = _try_import()
    pandas_gbq.to_gbq(dataframe,
                      destination_table,
                      project_id=project_id,
                      chunksize=chunksize,
                      reauth=reauth,
                      if_exists=if_exists,
                      auth_local_webserver=auth_local_webserver,
                      table_schema=table_schema,
                      location=location,
                      progress_bar=progress_bar,
                      credentials=credentials,
                      verbose=verbose,
                      private_key=private_key)
Example #18
0
def upload_data(data, destination_table, if_exists='replace'):
    """Uploads the data to the BigQuery."""
    print(f'Uploading {destination_table} data to BigQuery...')
    pandas_gbq.to_gbq(data,
                      project_id='dotted-marking-256715',
                      destination_table=destination_table,
                      if_exists=if_exists)
Example #19
0
def export_forecast(df):

    filename = f"forecast_{datetime.now().strftime('%Y%m%d')}"
    gbq.to_gbq(df,
               f"forecast.{filename}",
               project_id="buienradar",
               if_exists="replace")
Example #20
0
def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
           verbose=True, reauth=False, if_exists='fail', private_key=None):
    pandas_gbq = _try_import()
    pandas_gbq.to_gbq(dataframe, destination_table, project_id,
                      chunksize=chunksize,
                      verbose=verbose, reauth=reauth,
                      if_exists=if_exists, private_key=private_key)
def operation_refine_city_data_appendbq(project_id: str,
                                        destination_tableid: str,
                                        newly_arrived: pd.DataFrame, *args,
                                        **kwargs):
    district = pandas_gbq.read_gbq(f"""
            SELECT district
            FROM `{project_id}.{config['dataset_id']}.dim_district`
            """,
                                   project_id=project_id)

    details_list = newly_arrived['details'] + ' ' + newly_arrived[
        'summary'] + ' ' + newly_arrived['name']
    newly_arrived['location_details'] = [
        extract_location_details(detail, district=district)
        for detail in details_list
    ]
    newly_arrived['location_details'] = newly_arrived[
        'location_details'] + ' ' + newly_arrived[
            'location_name'] + ' ' + 'Sweden'
    pandas_gbq.to_gbq(newly_arrived,
                      f"{config['dataset_id']}.{destination_tableid}",
                      project_id=project_id,
                      if_exists='append')
    print(
        f"{newly_arrived.shape[0]} rows added to table: {config['dataset_id']}.{destination_tableid}"
    )
Example #22
0
    def save(self, payload: DataFrame):
        """
        Save payload to database table.

        Arguments:
            payload {DataFrame} -- the payload to be stored in db

        Raises:
            DaoError: if cannot save payload to db
        """
        try:
            # Always append date column
            # Todays date
            date = datetime.today().strftime("%Y-%m-%d")
            payload.insert(0, "date", date)
            self.LOGGER.info(
                f"Saving {len(payload)} records to: {self.tablename}")
            pandas_gbq.to_gbq(
                payload,
                self._tablename,
                if_exists="append",
                table_schema=self._table,
                progress_bar=False,
            )
            self.LOGGER.info(
                f"{len(payload)} records saved successfully to: {self.tablename}"
            )
        except Exception as sql_exp:
            self.LOGGER.exception(str(sql_exp))
            raise DaoError(sql_exp)
Example #23
0
def main(data, context):
    """
    extract related queries/topics with input keyword. Filter for last 1 month and specific geography
    """
    tops=pd.DataFrame([])
    risings=pd.DataFrame([])
    df = preprocess()
    for k,k_norm, country in zip(df.keyword,df.keyword_norm,df.country_code):
        pytrend.build_payload(kw_list=[cate2_norm],cat=0, timeframe='today 1-m', geo=country, gprop='')
        related_queries = pytrend.related_queries() #or related_queries = pytrend.related_topics()

        cate2_queries=related_queries[cate2_norm]

        top=cate2_queries['top']
        if top is not None:
            top.keyword=k
            top.country_code = country
            tops = tops.append(top,ignore_index=True)

        rising=cate2_queries['rising']
        if rising is not None:
            rising.keyword = k
            rising.country_code = country
            risings=risings.append(rising,ignore_index=True)
        credentials, your_project_id = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
        client = bigquery.Client(credentials=credentials,project=your_project_id,location='['specify loaction, default='US')
        pandas_gbq.to_gbq(tops,destination_table='[dataset_id.table_id]',project_id='[project_id]',if_exists='replace')
        pandas_gbq.to_gbq(risings,destination_table='[dataset_id.table_id]',project_id='[project_id]',if_exists='replace')
    return print('channel google trends to GBQ')
Example #24
0
 def places_name_intent():
 for places in powai_places:
   #return(places['result']['name'])
   return(places['result'])    
 
 rows_to_insert = pd.DataFrame(places_name_intent())
 
 pandas_gbq.to_gbq(rows_to_insert, 'New.Powai_Places', project_id='api-new-242611',if_exists ='replace')
def upload_initial(df: pd.DataFrame,
                   project_id: str,
                   dataset_id: str = config['dataset_id'],
                   table_id='raw'):
    pandas_gbq.to_gbq(df,
                      f'{dataset_id}.{table_id}',
                      project_id=project_id,
                      if_exists='replace')
Example #26
0
    def stream(self, args):
        manufacturer, data = args
        data = data.dropna(how='all', axis=1).reset_index(drop=True)

        to_gbq(data,
               destination_table="Turbines.{}".format(manufacturer),
               if_exists='append',
               project_id="casestudy")
Example #27
0
 def delete_all(self):
     """Delete all records in repository."""
     pandas_gbq.to_gbq(
         pd.DataFrame(),
         self.tablename,
         if_exists="replace",
         table_schema=self._table,
     )
Example #28
0
 def write_df(self, df_to_write, dataset, table, if_exists='replace'):
     pandas_gbq.to_gbq(
         df_to_write
         , '{}.{}'.format(dataset, table)
         , project_id=self.project_id
         , if_exists=if_exists
         , credentials=self._credentials_gbq
     )
Example #29
0
def upload_to_bq(hist_bq:pd.DataFrame, project_id, if_exists:str='replace'):
    project_id = 'ds-smartsupply'
    dataset_id = 'currency_exchange'
    table_id = 'euro_rates_daily'
    pandas_gbq.to_gbq(hist_bq,  f'{dataset_id}.{table_id}', project_id=project_id, if_exists=if_exists, location='europe-west2')
    message_string = f'{len(hist_bq)} rows uploaded to table `{dataset_id}.{table_id}`'
    print(message_string)
    return({'status':message_string})
def write_gbq_performance_table():
    sql = open(scripts_file_path+'sql/historical_platform_performance_gbq.sql', 'r')
    sql = sql.read()
    df = pd.read_gbq(sql.format(one_month_back_str(), two_months_back_str()), project, dialect='standard')
    df['OQ'] = df['OriginationQuarter'].str[3:].map(str) + df['OriginationQuarter'].str[:2].map(str)
    pandas_gbq.to_gbq(df, 'Group_CapMarkets.{}_vintage_data'.format(
        one_month_back_str())
        , project_id=project
        , if_exists='replace')
Example #31
0
def write_df(df, table_id, dataset=DEFAULT_DATASET):
    """
    Write a dataframe to a table in a dataset.
    :param df:  dataframe to write
    :param table_id:  big query table id
    :param dataset: bigquery dataset.
    :return:
    """
    pd_gbq.to_gbq(df, dataset + '.' + table_id, PROJECT_ID, if_exists='append')
Example #32
0
def to_gbq(dataframe, destination_table, project_id=None, chunksize=None,
           reauth=False, if_exists='fail', auth_local_webserver=False,
           table_schema=None, location=None, progress_bar=True,
           credentials=None, verbose=None, private_key=None):
    pandas_gbq = _try_import()
    pandas_gbq.to_gbq(dataframe, destination_table, project_id=project_id,
                      chunksize=chunksize, reauth=reauth, if_exists=if_exists,
                      auth_local_webserver=auth_local_webserver,
                      table_schema=table_schema, location=location,
                      progress_bar=progress_bar, credentials=credentials,
                      verbose=verbose, private_key=private_key)
Example #33
0
def to_gbq(dataframe, destination_table, project_id, chunksize=None,
           verbose=None, reauth=False, if_exists='fail', private_key=None,
           auth_local_webserver=False, table_schema=None):
    pandas_gbq = _try_import()
    return pandas_gbq.to_gbq(
        dataframe, destination_table, project_id, chunksize=chunksize,
        verbose=verbose, reauth=reauth, if_exists=if_exists,
        private_key=private_key, auth_local_webserver=auth_local_webserver,
        table_schema=table_schema)