def __init__(self, user, spread, sheet=None, config=None, create_spread=False, create_sheet=False): """ :param str user: string indicating the key to a users credentials, which will be stored in a file (by default they will be stored in ``~/.config/gspread_pandas/creds/<user>`` but can be modified with ``creds_dir`` property in config) :param str spread: name, url, or id of the spreadsheet; must have read access by the authenticated user, see :meth:`open_spread <gspread_pandas.client.Spread.open_spread>` :param str,int sheet: optional, name or index of Worksheet, see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>` (default None) :param dict config: optional, if you want to provide an alternate configuration, see :meth:`get_config <gspread_pandas.conf.get_config>` :param bool create_sheet: whether to create the spreadsheet if it doesn't exist, it wil use the ``spread`` value as the sheet title :param bool create_spread: whether to create the sheet if it doesn't exist, it wil use the ``spread`` value as the sheet title """ self._config = config or get_config() self._creds_file = path.join(self._config['creds_dir'], user) self._login() self.email = self._get_email() self.open(spread, sheet, create_sheet, create_spread)
def zd_api(request): config = conf.get_config("./gspread_pandas/") articles = [] url = "https://collegetrack.zendesk.com//api/v2/help_center/categories/360000085163/articles.json" # also not used, but if we wanted to add in users the api URL is: /api/v2/users.json while url: response = requests.get(url, auth=(ZD_USERNAME, ZD_PASSWORD)) if response.status_code != 200: print("Status:", response.status_code, "Problem with the request. Exiting.") exit() data = json.loads(response.text) for article in data["articles"]: articles.append( [ article["id"], article["title"], article["html_url"], article["updated_at"], ] ) url = data["next_page"] # converting the date column into a usable form # to_write_updated = [datetime.date(parse(i[3])) for i in to_write] df = pd.DataFrame(articles) df.columns = ["ID", "Title", "URL", "Date"] df.Date = pd.to_datetime(df.Date).dt.date df = df.sort_values(by="Date", ascending=True) df.loc[:, "ID"] = df["ID"].astype("int").astype("str") # df = df.set_index("ID") # articles.sort(key = lambda item: item[3], reverse=False) # articles.sort(key = lambda item: item[3], reverse=True) spread = Spread(SPREADSHEET_ID, config=config) spread.open_sheet(0) existing_sheet = spread.sheet_to_df(sheet="Sheet1") existing_sheet_subset = existing_sheet[ ["Internal / Staff Facing", "Person Responsible"] ].reset_index() merged_df = df.merge(existing_sheet_subset, on="ID", how="left") spread.df_to_sheet( merged_df, index=False, sheet="Sheet1", start="A1", replace=False )
def setGoogleSheetsData(CREDSPATH, SNDATA, SHEETID, SHEETTAB): # Connect to the Google Sheets file spread = Spread( 'sn_datapusher', SHEETID, 0, conf.get_config(conf_dir=CREDSPATH, file_name='google_secret.json')) print('Spreadsheet loaded...') # Copies dataframe to google sheet spread.df_to_sheet( SNDATA, index=False, sheet=SHEETTAB, start='A1', replace=True, ) print('Spreadsheet updated!')
def import_library_sheet_from_google(year): global library_tracking_spreadsheet_df c = conf.get_config(conf_dir=sys.argv[2]) spread = Spread(lab_spreadsheet_id, config=c) library_tracking_spreadsheet_df = spread.sheet_to_df(sheet='2019', index=0, header_rows=1, start_row=1) hit = library_tracking_spreadsheet_df.iloc[0] logger.debug(f"First record: {hit}") for column_name in metadata_column_names: logger.debug(f"Checking for column name {column_name}...") if column_name not in hit: logger.error( f"Could not find column {column_name}. The file is not structured as expected! Aborting." ) exit(-1) logger.info( f"Loaded {len(library_tracking_spreadsheet_df.index)} records from library tracking sheet." )
from __future__ import print_function import pprint import pandas as pd from gspread_pandas import Spread, Client, conf conf.get_config("auth_secret/google_secret.json") # Print nicely pp = pprint.PrettyPrinter() # Find a workbook by name and open the first sheet # Make sure you use the right name here. id_sheetname = "20SH07 Food ID Map" spread = Spread(id_sheetname) id_df = spread.sheet_to_df() column_map = { "ID": 1, "Exact_Name": 2, "Recess": 3, "Lunch": 4, "Price": 5, "Name": 6, "MON/TUE/WED/TURS/FRI": 7, "Stall": 8 } stall_map = { "Stall 1 Fried Hokkien Mee": 1, "Stall 2 Chicken Rice": 2,
def main(): url = 'https://www.flashscore.com/football/england/premier-league/results/' lib = 'lxml' attr = 'sportName soccer' path = '/var/www/html/diag/pypp.png' view_source = gel().run_until_complete(pypp(url, lib, attr, path, True)) rows = view_source.select('.event__match') results = [] for row in rows[0]: print(row) for row in rows: try: unique_id = str(row['id']).replace('g_1_', '') datetime = row.select_one('.event__time').get_text() timestamp = isodate(str(datetime).split('.')) home_team = row.select_one('.event__participant--home').get_text() away_team = row.select_one('.event__participant--away').get_text() home_goal = strip_html(row.select('span')[0]) away_goal = strip_html(row.select('span')[1]) except Exception: print("Exception") else: results.append([ unique_id, timestamp, home_team, away_team, home_goal, away_goal ]) print(results[0]) file_dir = '' file_json = '' scope = [ 'https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive' ] config = conf.get_config(conf_dir=file_dir, file_name=file_json) worksheet = Spread('Profit & Loss', sheet='Football', config=config) df_worksheet = worksheet.sheet_to_df(index=0, header_rows=2) df_worksheet['Home'] = df_worksheet['Home'].apply(team_transform) df_worksheet['Away'] = df_worksheet['Away'].apply(team_transform) for result in results: df_worksheet.loc[(df_worksheet.Home == result[2]) & (df_worksheet.Away == result[3]) & (df_worksheet.Date_Time == result[1]), ['HG', 'AG']] = result[4], result[5] print(df_worksheet) worksheet.df_to_sheet(df_worksheet, index=False, headers=False, sheet='Football', start='A3', replace=False, raw_column_names=['Date_Time']) return
from oauth2client.service_account import ServiceAccountCredentials import pandas as pd from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import Select import requests from bs4 import BeautifulSoup # use creds to create a client to interact with the Google Drive API scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] creds = ServiceAccountCredentials.from_json_keyfile_name('client_secret.json', scope) client = gspread.authorize(creds) c = conf.get_config(r'C:\Users\SYDNEY\AppData\Local\Programs\Python\Python38\Scripts','client_secret.json') #For gspread_pandas def scrap_job_data(page_url): #input: page url #output: Dataframe containing the data scrapped from the page df = pd.DataFrame(columns=('url', 'job title', 'company', 'description')) row = [0,0, 0, 0] page = requests.get(page_url) soup = BeautifulSoup(page.content, 'html.parser') all_paid_jobs = soup.find_all(class_='PaidJob') all_other_jobs = soup.find_all(class_='jix_robotjob') for i in range(len(all_paid_jobs)): row[1], row[2] = all_paid_jobs[i].select('a b')[0].text, all_paid_jobs[i].select('a b')[1].text #Get job title and compay name row[0] = all_paid_jobs[i].select('a b')[0].parent['href'] #Some ads have a picture, some don't, but the url is always in the parent tag of the job title, that's how we get it row[3] = all_paid_jobs[i].find_all('p')[1].text #assumption: fixed structure, so the 1st paragraph of the description is in the second <p>
import os from dotenv import load_dotenv from ct_snippets.load_sf_class import SF_SOQL, SF_Report import soql_queries as soql SF_PASS = os.environ.get("SF_PASS") SF_TOKEN = os.environ.get("SF_TOKEN") SF_USERNAME = os.environ.get("SF_USERNAME") sf = Salesforce(username=SF_USERNAME, password=SF_PASS, security_token=SF_TOKEN) load_dotenv() config = conf.get_config("./gspread_pandas/") hs_spread = Spread("1u7fLc0Dlg1zhWfn8xVgedmYyedr0Z1D1NfZ6uSov5tc", config=config) hs_spread_spanish = Spread( "1jvY7a9yncWMI44uFf8HMypHuKhoHNkOEQ6Fr9Rqza94", config=config ) student_List = SF_SOQL("fy21_hs_survey_student_list", soql.student_list_query) student_List.load_from_sf_soql(sf) student_List.shorten_site_names("A_SITE__c") english_df = hs_spread.sheet_to_df().reset_index() english_df = english_df[ [
import logging
dicc[j] = literal_eval(i[j]) else: dicc[j] = i[j] resto.append(dicc) def asignado(i): if i['City'] in cities_asignadas: val = 'Si' else: val = 'No' return val # Credenciales cred = conf.get_config('C:\\Users\\micaela.fuchs\\Anaconda', 'PedidosYa-6e661fd93faf.json') # Roster CABA sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w' wks_name = 'Roster CABA' sheet = Spread(sheet_id, wks_name, config=cred) reporte_caba = sheet.sheet_to_df(index=0, header_rows=1) # Roster Resto sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w' wks_name = 'Roster Resto' sheet = Spread(sheet_id, wks_name, config=cred) reporte_resto = sheet.sheet_to_df(index=0, header_rows=1) # Roster Ciudades Asigandas sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w'
dicc[j] = i[j] caba.append(dicc) def a_listas_resto(i): dicc = {} for j in cols_resto: if j == 'City': dicc[j] = literal_eval(i[j]) else: dicc[j] = i[j] resto.append(dicc) # Credenciales cred = conf.get_config('C:\\Users\\santiago.curat\\Pandas\\PEYA', 'PedidosYa-8b8c4d19f61c.json') # Roster CABA sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w' wks_name = 'Roster CABA' sheet = Spread(sheet_id, wks_name, config=cred) reporte_caba = sheet.sheet_to_df(index=0, header_rows=1) # Roster Resto sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w' wks_name = 'Roster Resto' sheet = Spread(sheet_id, wks_name, config=cred) reporte_resto = sheet.sheet_to_df(index=0, header_rows=1) # Creo las columnas cols_caba = reporte_caba.columns
def test_with_sa(self, sa_config): c = conf.get_config(*sa_config) assert isinstance(c, dict) assert len(c) > 1
def test_with_oauth(self, oauth_config): c = conf.get_config(*oauth_config) assert isinstance(c, dict) assert len(c) == 1 assert len(c[list(c.keys())[0]]) > 1
def test_no_file(self): with pytest.raises(IOError): conf.get_config(file_name="this_file_doesnt_exist")