def sendMessagetoHost(self): source = Spread("https://docs.google.com/spreadsheets/d/15_YZB-LVAa2NkHORZ8nvBq25YBj4R7ZUHjY11HRl0R4/edit#gid=763524256") self.driver.get("https://www.airbnb.com.au") sign_in = input("Please sign in!") data = source.sheet_to_df() hosts = data["url"].values count = 0 for i in hosts: self.driver.get(i) time.sleep(25) soup = BeautifulSoup(self.driver.page_source,'lxml') host_id = soup.findAll("a", {"class": "_16lonkd"}) host_id = host_id[0] host_id = host_id["href"] host_id = re.findall('\d+',host_id)[0] test_message = input("Please enter message you would like to send: ") button = self.driver.find_element_by_link_text("Contact host") button.click() time.sleep(10) text_area = self.driver.find_elements_by_id("homes-contact-host--message-textarea") text_area[0].send_keys(test_message) button = self.driver.find_element_by_class_name("_72kmbi0") button.click() count+=1 if count > 10: break
def df_to_drive(df, foldername, filename, worksheet_index=0, **spread_kwargs): """Write a DataFrame to Google Drive.""" sh = get_sheet(foldername, filename) spread = Spread(sh.id) skwargs = {'replace': True} skwargs.update(spread_kwargs) spread.df_to_sheet(df, **skwargs)
def workflow(sheetid, tablename): """Process""" cursor = pgconn.cursor() cursor.execute(f"DROP TABLE IF EXISTS {tablename}") spread = Spread(sheetid, config=config["td"]["service_account"]) df = spread.sheet_to_df(index=None) cols = [cleaner(c) for c in df.columns] cursor.execute(f"CREATE TABLE {tablename} (ss_order int, %s)" % (",".join([' "%s" varchar' % (s, ) for s in cols]), )) cursor.execute(f"GRANT SELECT on {tablename} to nobody,apache") for i, row in enumerate(df.itertuples()): vals = [] for col in row[1:]: vals.append(col) sql = """ INSERT into %s (ss_order, %s) VALUES (%s, %s) """ % ( tablename, ",".join(['"%s"' % (s, ) for s in cols]), i, ",".join(["%s"] * len(cols)), ) cursor.execute(sql, vals) cursor.close() pgconn.commit()
def uploadtosheet(): file_name = "xyz.csv" df = pd.read_csv(file_name) # 'Example Spreadsheet' needs to already exist and your user must have access to it spread = Spread('Example Spreadsheet') # This will ask to authenticate if you haven't done so before # Display available worksheets spread.sheets # Save DataFrame to worksheet 'New Test Sheet', create it first if it doesn't exist spread.df_to_sheet(df, index=False, sheet='SDSheet', start='A2', replace=True) print(spread) # <gspread_pandas.client.Spread - User: '******', Spread: 'Example Spreadsheet', Sheet: 'New Test Sheet'> # You can now first instanciate a Client separately and query folders and # instanciate other Spread objects by passing in the Client client = Client() # Assumming you have a dir called 'example dir' with sheets in it available_sheets = client.find_spreadsheet_files_in_folders('example dir') spreads = [] for sheet in available_sheets.get('example dir', []): spreads.append(Spread(sheet['id'], client=client))
def __init__(self): options = Options() options.add_argument("--headless") self.driver = webdriver.Firefox( executable_path= r'C:/Users/Abdul Rehman/Downloads/geckodriver-v0.26.0-win64/geckodriver.exe' ) #self.driver.get("https://www.lookfantastic.com/") self.spreadsheet_key = Spread( "https://docs.google.com/spreadsheets/d/1aradT8_30SUbEj13ZyDju78V80eJB-rJyX9UmJGBxHA/edit?usp=sharing" ) now = datetime.datetime.now() self.date = now.strftime("%Y.%m.%d") self.data = pd.DataFrame( columns=['Brand', 'Title', 'Price(EUR)', 'Review', 'Url']) #self.f = open("cometic_test" + self.date + ".csv","w", encoding='utf8') #button = self.driver.find_element_by_xpath('//*[@class="emailReengagement_close_button"]') #button.click() self.urls = { "https://www.lookfantastic.com/health-beauty/fragrance/eau-de-toilette.list": "Eau de toilette", "https://www.lookfantastic.com/health-beauty/fragrance/eau-de-parfum.list": "Eau de Parfum", "https://www.lookfantastic.com/brands/mac/view-all.list": "MAC" }
def do(spreadkey, tablename): """Process""" cursor = pgconn.cursor() cursor.execute(f"DROP TABLE IF EXISTS {tablename}") spread = Spread(spreadkey, config=config["cscap"]["service_account"]) df = spread.sheet_to_df(index=None) sql = f"CREATE TABLE {tablename} (" for col in df.columns: sql += "%s varchar," % (cleankey(col), ) sql = sql[:-1] + ")" cursor.execute(sql) cursor.execute(f"GRANT SELECT on {tablename} to nobody,apache") for _, row in df.iterrows(): cols = [] values = [] for key, val in row.items(): cols.append(cleankey(key)) values.append((val or "").strip()) sql = "INSERT into %s (%s) VALUES (%s)" % ( tablename, ",".join(cols), ",".join(["%s"] * len(cols)), ) cursor.execute(sql, values) cursor.close() pgconn.commit()
def keepCrawling(self): #self.collectResults() counter = 0 page = 0 while True: try: page += 1 print("Pages searched " + str(page)) self.collectResults() counter += 20 #if self.stop_crawling == False: self.driver.get("https://www.airbnb.com.au/s/" + self.location + "--Australia/homes?refinement_paths%5B%5D=%2Fhomes&query=" + self.location + "&search_type=unknown&map_toggle=false&checkin=" + self.check_in +"&checkout="+ self.check_out + "§ion_offset=3&items_offset=" + str(counter)) if counter > 20: break except Exception as e: print(str(e)) break #self.data.to_excel("Results.xlsx") spreadsheet_key = Spread("https://docs.google.com/spreadsheets/d/15_YZB-LVAa2NkHORZ8nvBq25YBj4R7ZUHjY11HRl0R4/edit#gid=0") spreadsheet_key.df_to_sheet(self.data, index=False, sheet=(self.location+" "+self.date)) price = self.data["price"].values price = price.astype(np.float) print("The number of properties are: " + str(len(price))) print("The mean price is: "+ str(np.mean(price))) print("The max price is: "+ str(np.max(price))) print("The min price is: "+ str(np.min(price))) print("The standard deviation is "+ str(np.std(price)))
def share_permission(sheet_id): try: c = Spread(spread=sheet_id) c.add_permission( '[email protected]|reader|no') print(f'Shared {sheet_id} to service account.') except Exception as e: print(f'Failed to share {sheet_id} to service account.') print(e)
def get_raw_data_sheet_to_df(spreadsheet, client, cols_to_check): spread = Spread(spread=spreadsheet) # Get metadata: meta = spread.__dict__.get('_spread_metadata') project_id = meta.get('spreadsheetId') project_name = meta.get('properties').get('title') # Locate RawData sheet sheets = spread.sheets raw = [ x for x in sheets if 'rawdata' in x.__dict__['_properties']['title'].replace(' ', ''). replace('.', '').lower() and 'pivot' not in x.__dict__['_properties'] ['title'].replace(' ', '').replace('.', '').lower() ] raw.sort(key=lambda x: len(x.__dict__['_properties']['title'].replace( ' ', '').replace('.', '')), reverse=False) df = spread.sheet_records_to_df(empty2zero=False, header_rows=1, sheet=raw[0], default_blank=np.nan) # Check for column names: df_cols = list(map(lambda x: x.lower(), list(df.columns))) cols_to_check = list(map(lambda x: x.lower(), cols_to_check)) missing_cols = [] for col in cols_to_check: if col not in df_cols: missing_cols.append(col) if len(missing_cols) > 0: print(f"Project {project_name} is missing (id: {project_id}) " + ", ".join(missing_cols)) for missing_col in missing_cols: df[missing_col] = '' df = df.loc[:, ~df.T.duplicated(keep='first')] df.columns = map(lambda x: x.replace(' ', '_').replace('.', '').lower(), df.columns) # Add additional fields for future lookup and update df['_meta_sheetID'] = project_id df['_meta_projectName'] = project_name df['date'] = pd.to_datetime(df['date'], errors='coerce') for col in ['revenue', 'cost', 'profit']: df[col] = pd.to_numeric(df[col].astype('str').str.replace(',', ''), errors='coerce').fillna(0) df = df.infer_objects() return df
class Sheet: def __init__(self): self.spread = Spread(SPREADSHEET_NAME) self.ccs= self.spread.sheet_to_df(sheet=CCS_SHEET, index=0) self.zu = self.spread.sheet_to_df(sheet=ZOOM_USERS, index=0) # self.cs = self.spread.sheet_to_df(sheet=CALENDAR_SCHEDULE) self.sr = self.spread.sheet_to_df(sheet=STUDENT_ROSTER, index=0) # set index in ccs sheet with Unique ID column to get updated. # self.ccs = self.ccs.set_index(self.ccs['Unique ID']) # set index in sr sheet with ID_NUMBER # self.sr = self.sr.set_index(self.sr['COURSE', 'ADDRESS']) def update_calendar_schedule(self): for cn, cs, desc, loc, teacher, index in zip( self.ccs['Course Number'], self.ccs['Course Section'], self.ccs['Description'], self.ccs['Site'], self.ccs['Instructor 1'], self.ccs['Unique ID'] ): class_name = f"Q4-{cn}-{cs}-{desc}" location = loc[4:] quarter = '2020.Q4' slack_channel_name = f"Q4" host = '' # from zoom users sheet notes = '' def update_sheet_from_df(self, df, sheet_name): logger.info(f'--- Update {sheet_name} Sheet from df') self.spread.df_to_sheet(df, index=False, sheet=sheet_name) def update_ccs_sheet_from_df(self, df): self.update_sheet_from_df(df, CCS_SHEET) # deprecated def init_sheet_with_service_key(self): self.credentials = service_account.Credentials.from_service_account_file( './creds/google_secret.json', scopes=SPREADSHEETS_SCOPES, subject=GSUITE_USER_EMAIL) self.sheet_service = build('sheets', 'v4', credentials=self.credentials, cache_discovery=False) self.sheet = self.sheet_service.spreadsheets() # deprecated def read_sheets(self, range): result = self.sheet.values().get( spreadsheetId=SPREADSHEET_ID, range=range).execute() values = result.get('values', [])
def job(): print('Updating the clan DataFrame...') updatedDF = originalDF.append(other=pipeline()) #updatedDF.to_csv('clan ' + orario) spread = Spread('*****@*****.**', 'clan') spread.df_to_sheet(updatedDF, index=False, sheet='clan', start='A1', replace=True) return updatedDF
def sync(self): logger.info(f"Synchronizing Workbook: {self.workbook_id}") client = Client(creds=self.credentials) spread = Spread(self.workbook_id, create_sheet=True, client=client) for sheet_name, data in self.survey.worksheets: logger.info(f"Updating Google Sheet: {sheet_name}") spread.df_to_sheet(data, index=False, sheet=sheet_name, replace=True) logger.success(f"Google Sheet updated: {sheet_name}") logger.success(f"Workbook synchronized: {self.workbook_id}")
def pull_data_sheet(self, config_auth): spread_dict = self.__conf__["sheet_url"] spread_obj = {} sheet_dict = {} for j in spread_dict: spread = Spread(spread=spread_dict[j], sheet="Sheet1", config=config_auth) data_frame = spread.sheet_to_df() sheet_dict[j] = data_frame.reset_index() spread_obj[j] = spread return sheet_dict, spread_obj
class PandasGoogleSpreadsheetWrapper(PandasWrapperBase): def __init__(self, credentialsManager, spreadsheetId, dataFrame=None): super().__init__(dataFrame) self.__credentialsManager = credentialsManager self.__id = spreadsheetId self.__spreadsheet = Spread( spread=self.__id, creds=self.__credentialsManager.credentials) @property def credentialsManager(self): return self.__credentialsManager @property def spreadsheetId(self): return self.__id @property def spreadsheet(self): return self.__spreadsheet @property def active_sheet(self): return self.__spreadsheet.sheet def load(self, filePath): # filePath = SheetName print("Load GoogleSpreadsheet: " + str(self.__id) + " [" + str(filePath) + "]") self.onLoadPreprocessing(self.df) df = self.__spreadsheet.sheet_to_df(index=False, sheet=filePath) self.setDataFrame(df) print(" Loaded Length: " + str(len(self.df.index))) self.onLoadPostprocessing(self.df) return self def save(self, filePath): # filePath = SheetName print("Save GoogleSpreadsheet: " + str(self.__id) + " [" + str(filePath) + "]") self.onSavePreprocessing(self.df) self.__spreadsheet.open_sheet(filePath, create=True) self.__spreadsheet.df_to_sheet(df=self.df, index=False, headers=True, start='A1', replace=True) print(" Saved Length : " + str(len(self.df.index))) self.onSavePostprocessing(self.df) return self
def __init__(self, spread, sheet=0, creds=None, create_sheet=False, conf_file=None): if creds: self.creds = creds else: credentials = ParseConfiguration(conf_file).get_google_creds() self.creds = get_creds(config=credentials) self.spread = Spread(spread, sheet=sheet, creds=self.creds, create_sheet=create_sheet)
def get_events_sheet(only_top=True): spreadsheet_user = get_config_field('GSHEETS', 'user') s = Spread(user=spreadsheet_user, spread='Release & PR Events', sheet='Events', create_spread=False, create_sheet=False) events = s.sheet_to_df() events.index = pd.to_datetime(events.index) events = events.reset_index().reset_index().set_index('date') events['top'] = events['top'] == 'TRUE' if only_top: events = events[events['top']] return events
def main(): """Go Main Go.""" config = util.get_config() drive = util.get_driveclient(config) res = drive.files().list(q="title contains 'Agronomic Data'").execute() for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue LOG.debug(item["title"]) spread = Spread(item["id"], config=config["cscap"]["service_account"]) for sheet in spread.sheets: df = spread.sheet_to_df(index=None, sheet=sheet) LOG.debug("%s %s", sheet.title, len(df.index))
def setGoogleSheetsData(CREDSPATH, SNDATA, SHEETID, SHEETTAB): # Connect to the Google Sheets file spread = Spread( 'sn_datapusher', SHEETID, 0, conf.get_config(conf_dir=CREDSPATH, file_name='google_secret.json')) print('Spreadsheet loaded...') # Copies dataframe to google sheet spread.df_to_sheet( SNDATA, index=False, sheet=SHEETTAB, start='A1', replace=True, ) print('Spreadsheet updated!')
def update_petrov(): dfs = et.load_from_file(date_str='most_recent', coll_names=['users']) dfu = dfs['users'] have_codes = pd.read_csv('/Users/rbloom/Downloads/petrov_day_list.csv') codes = pd.read_csv('/Users/rbloom/Downloads/codez_secret_no_use.csv', usecols=['codes']) have_codes.head() projection = ['_id', 'username', 'displayName', 'karma', 'petrovPressedButtonDate', 'petrovCodesEntered', 'petrovCodesEnteredDate'] users_raw = et.get_collection('users', query_filter={'petrovPressedButtonDate': {'$exists':True}}, projection=projection, db=et.get_mongo_db_object()) users = users_raw.merge(have_codes[['Username']], left_on='username', right_on='Username', indicator=True, how='left') users = users.merge(dfu[['_id', 'num_days_present_last_30_days', 'createdAt', 'true_earliest']].fillna(0), left_on='_id', right_on='_id', how='left') users['has_codes'] = (users['_merge']=='both') users['valid_code'] = users['petrovCodesEntered'].apply(lambda x: x in codes.values) users.loc[:,['petrovPressedButtonDate', 'petrovCodesEnteredDate']] = users.loc[:,['petrovPressedButtonDate', 'petrovCodesEnteredDate']] - pd.Timedelta('7 hours') users['karma'] = users['karma'].fillna(0) users = users.sort_values('petrovPressedButtonDate', ascending=False) users = users[ ['displayName', 'karma', 'petrovPressedButtonDate', 'petrovCodesEntered', 'petrovCodesEnteredDate', 'has_codes', 'valid_code', 'num_days_present_last_30_days', 'true_earliest']] users_pressed = users.dropna(subset=['petrovPressedButtonDate']) users_pressed = users_pressed.sort_values('petrovPressedButtonDate', ascending=True) users_pressed = users_pressed.reset_index(drop=True).reset_index() users_pressed['index'] = users_pressed['index'] + 1 users_pressed = users_pressed.sort_values('petrovPressedButtonDate', ascending=False) print('num users pressed button: {}'.format(users_pressed.shape[0])) users_pressed_and_entered = users.dropna(subset=['petrovPressedButtonDate','petrovCodesEntered']) print('num users pressed button and entered codes: {}'.format(users_pressed_and_entered.shape[0])) users_pressed_and_entered_has_codes = users_pressed_and_entered[users_pressed_and_entered['has_codes']] print('num users pressed button and entered codes: {}'.format(users_pressed_and_entered_has_codes.shape[0])) # plot_table(users_pressed, title='Users Who Pressed Button', online=True) # plot_table(users_pressed_and_entered, title='Users Who Pressed Button and Entered Codes', online=True) # plot_table(users_pressed_and_entered_has_codes, title='Users Who Pressed Button and Entered Some Codes Who Have True Codes', online=True) users_pressed['birth'] = pd.datetime.now() spreadsheet_name = get_config_field('GSHEETS', 'spreadsheet_name') s = Spread(get_config_field('GSHEETS', 'user'), spreadsheet_name, sheet='Trust & Doom', create_spread=True, create_sheet=True) s.df_to_sheet(users_pressed, replace=True, sheet='Trust & Doom', index=False)
def get_raw_data_sheet_to_df(spreadsheet, client, cols_to_filter): spread = Spread(spread=spreadsheet) meta = spread.__dict__.get('_spread_metadata') project_id = meta.get('spreadsheetId') project_name = meta.get('properties').get('title') sheets = spread.sheets raw = [ x for x in sheets if 'rawdata' in x.__dict__['_properties']['title'].replace(' ', ''). replace('.', '').lower() and 'pivot' not in x.__dict__['_properties'] ['title'].replace(' ', '').replace('.', '').lower() ] raw.sort(key=lambda x: len(x.__dict__['_properties']['title'].replace( ' ', '').replace('.', '')), reverse=False) df = spread.sheet_to_df(sheet=raw[0], index=0) # Check for column names: df_cols = list(map(lambda x: x.lower(), list(df.columns))) cols_to_filter = list(map(lambda x: x.lower(), cols_to_filter)) missing_cols = [] for col in cols_to_filter: if col not in df_cols: missing_cols.append(col) if len(missing_cols) > 0: print(f"Project {project_name} is missing (id: {project_id}) " + ", ".join(missing_cols)) for missing_col in missing_cols: df[missing_col] = '' cols = list(set(cols_to_filter).intersection(df.columns)) lower_cols = [x.replace(' ', '_').lower() for x in cols] if len(cols) == 0: return pd.DataFrame(columns=cols_to_filter) _df = df.loc[:, cols].copy() _df = _df.loc[:, ~_df.T.duplicated(keep='first')] _df.columns = map(lambda x: x.replace(' ', '_').lower(), _df.columns) _df['date'] = pd.to_datetime(_df['date'], errors='coerce') for col in ['revenue', 'cost', 'profit']: _df[col] = pd.to_numeric(_df[col].astype('str').str.replace(',', ''), errors='coerce').fillna(0) return _df
def import_library_sheet_validation_from_google(): global validation_df spread = Spread(lab_spreadsheet_id) validation_df = spread.sheet_to_df(sheet='Validation', index=0, header_rows=1, start_row=1) hit = validation_df.iloc[0] logger.debug(f"First record of validation data: {hit}") for column_name in metadata_validation_column_names: logger.debug(f"Checking for column name {column_name}...") if column_name not in hit: logger.error( f"Could not find column {column_name}. The file is not structured as expected! Aborting." ) exit(-1) logger.info(f"Loaded library tracking sheet validation data.")
def on_epoch_end(self, epoch, logs=None): spread = Spread('*****@*****.**', 'https://docs.google.com/spreadsheets/d/1CksJQdyochF1M6RB4XfFgewjYGak38ixOhoAMySWTM8/edit') df_results = spread.sheet_to_df(sheet='Quantitative') df_results = df_results.reset_index() df_results = df_results.append({'Dataset': self.dataset, 'Machine': self.machine, 'Training time': time() - self._start_time, 'Epochs': epoch, 'Loss': logs.get('loss'), 'Accuracy': logs.get('acc'), 'Validation loss': logs.get('val_loss'), 'Validation accuracy': logs.get('val_acc'), 'Weights file': self.file_weights.format(epoch=epoch, **logs), 'Additional parameters': self.additional_parameters}, ignore_index=True) spread.df_to_sheet(df_results, sheet='Quantitative', index=False, replace=True)
def get_data(): """ query data from the vendors table """ conn = None scoped_credentials = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] credentials = service_account.Credentials.from_service_account_file('./service_account.json', scopes=scoped_credentials) gc = gspread.service_account(filename='./service_account.json') sh = gc.create("OLX_Ihor_Nikolenko_"+datetime.today().strftime('%Y-%m-%d')) sh.share('*****@*****.**', perm_type='user', role='writer') spread = Spread(spread="OLX_Ihor_Nikolenko_"+datetime.today().strftime('%Y-%m-%d'), creds=credentials) df_overlap = pd.DataFrame(columns=('category_row', 'category_col', 'overlap')) try: params = config() conn = psycopg2.connect(**params) cur = conn.cursor() cur.execute('WITH Counted_Ads AS ( ' 'SELECT DISTINCT user_id, ' 'category_name, ' 'category_id, ' 'COUNT(*) AS Ads ' 'FROM ads ' 'group by user_id, ' 'category_id, category_name ' ') ' 'SELECT *, RANK() OVER(PARTITION BY user_id ORDER BY Ads DESC) AS Rank_Category ' 'FROM Counted_Ads') print("The number of parts: ", cur.rowcount) row = cur.fetchall() df = pd.DataFrame(row, columns=('user_id', 'category', 'category_id', 'Ads', 'Rank_Category')) attributs = set(df.category) for attribut1, attribut2 in product(attributs, attributs): df1 = df[df.category == attribut1]["user_id"] df2 = df[df.category == attribut2]["user_id"] intersection = len(set(df1).intersection(set(df2))) df_overlap.loc[len(df_overlap) + 1] = [attribut1, attribut2, intersection] pvt = df_overlap.pivot_table(index=['category_row'], columns=['category_col'], values='overlap', aggfunc='sum') spread.df_to_sheet(pvt, index=True, sheet='Test task', start='A1', replace=True) cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close()
def importTo_clean_sheet(): from gspread_pandas import Spread, Client import gspread_pandas as gp import datetime import numpy as np import pandas as pd import re import importlib #import cleaner #cleaner = importlib.reload(cleaner) #from cleaner import cleaning_and_to_sql pd.set_option('display.max_rows', 20) pd.set_option('display.max_columns', 50) s = Spread('work', 'Python Autoscrapers') df1 = s.sheet_to_df(index=0, start_row=1, header_rows=1, sheet="Macbooks-CPH").astype(str) import time from datetime import datetime from datetime import timedelta dups_ids = df1.pivot_table(index=['DBA ID'], aggfunc='size') type(dups_ids) di = dups_ids.to_frame() di.head() di.columns = ['days active'] di.reset_index(inplace=True) df1 = pd.merge(df1, di, on='DBA ID') df1 = df1.drop_duplicates(subset='DBA ID', keep='first') filter = df1["Date Scraped"] != " " df1 = df1[filter] df1['state'] = "Not sold yet" #datetime_object = datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p') for stdat, ndays, idx in zip(df1['Date Scraped'], df1['days active'], df1.index): do = datetime.strptime(stdat, '%d/%m/%Y') ndays -= 1 do = do + timedelta(days=ndays) tod = datetime.strptime(today.strftime("%d/%m/%Y"), '%d/%m/%Y') if do < tod: df1.state[idx] = 'Sold' # print(do.strftime("%d/%m/%Y"), " ", stdat, " today", today.strftime("%d/%m/%Y")) s = Spread('work', 'Python Autoscrapers') s.df_to_sheet(df1, index=False, sheet='Macbooks-CPH/Clean', start='A2', replace=True)
def import_library_sheet_from_google(year): global library_tracking_spreadsheet_df spread = Spread(lab_spreadsheet_id) library_tracking_spreadsheet_df = spread.sheet_to_df(sheet='2019', index=0, header_rows=1, start_row=1) hit = library_tracking_spreadsheet_df.iloc[0] logger.debug(f"First record: {hit}") for column_name in column_names: logger.debug(f"Checking for column name {column_name}...") if column_name not in hit: logger.error( f"Could not find column {column_name}. The file is not structured as expected! Aborting." ) exit(-1) logger.info( f"Loaded {len(library_tracking_spreadsheet_df.index)} records from library tracking sheet." )
def main(): print('loading source files...') main_doc = Spread(spread=DOCID) datapoints = main_doc.sheet_to_df(sheet='datapoints', index=None) topics = main_doc.sheet_to_df(sheet='topics', index=None) concepts = main_doc.sheet_to_df(sheet='concepts', index=None) datapoint_docs = dict() # we can reuse the spreadsheet object to download multiple sheets downloaded = set() env = { 'downloaded': downloaded, 'datapoint_docs': datapoint_docs } print('saving datapoints into etl/source/datapoints...') for _, row in datapoints.iterrows(): process(row, env) datapoints.to_csv(osp.join(SOURCE_DIR, 'datapoints.csv'), index=False) topics.to_csv(osp.join(SOURCE_DIR, 'topics.csv'), index=False) concepts.to_csv(osp.join(SOURCE_DIR, 'concepts.csv'), index=False)
def pd_to_gsheet(self, df, spreadsheet_key, worksheet_name, value_input_option='USER_ENTERED', clean=True, use_df2gsprad=True): """ :param df: pandas DataFrame :param spreadsheet_key: id for Spreadsheet taken from URL :param worksheet_name: name as visibile in worksheet :param value_input_option: 'USER_ENTERED' if scope is to maintain column types from pandas DataFrame :param clean: whether to clean or not :param use_df2gsprad: :return: """ if use_df2gsprad: if clean is True: self.delete_cells(spreadsheet_key, worksheet_name) return d2g.upload(df, spreadsheet_key, worksheet_name, credentials=self.creds, row_names=False, value_input_option=value_input_option, clean=clean) else: if clean is True: self.delete_cells(spreadsheet_key, worksheet_name) x = Spread(spreadsheet_key, worksheet_name, creds=self.creds, create_sheet=True) return x.df_to_sheet(df, index=False, sheet=worksheet_name, replace=clean)
def zd_api(request): config = conf.get_config("./gspread_pandas/") articles = [] url = "https://collegetrack.zendesk.com//api/v2/help_center/categories/360000085163/articles.json" # also not used, but if we wanted to add in users the api URL is: /api/v2/users.json while url: response = requests.get(url, auth=(ZD_USERNAME, ZD_PASSWORD)) if response.status_code != 200: print("Status:", response.status_code, "Problem with the request. Exiting.") exit() data = json.loads(response.text) for article in data["articles"]: articles.append( [ article["id"], article["title"], article["html_url"], article["updated_at"], ] ) url = data["next_page"] # converting the date column into a usable form # to_write_updated = [datetime.date(parse(i[3])) for i in to_write] df = pd.DataFrame(articles) df.columns = ["ID", "Title", "URL", "Date"] df.Date = pd.to_datetime(df.Date).dt.date df = df.sort_values(by="Date", ascending=True) df.loc[:, "ID"] = df["ID"].astype("int").astype("str") # df = df.set_index("ID") # articles.sort(key = lambda item: item[3], reverse=False) # articles.sort(key = lambda item: item[3], reverse=True) spread = Spread(SPREADSHEET_ID, config=config) spread.open_sheet(0) existing_sheet = spread.sheet_to_df(sheet="Sheet1") existing_sheet_subset = existing_sheet[ ["Internal / Staff Facing", "Person Responsible"] ].reset_index() merged_df = df.merge(existing_sheet_subset, on="ID", how="left") spread.df_to_sheet( merged_df, index=False, sheet="Sheet1", start="A1", replace=False )
def move_dashboards(): client = Client() # Get Dashboard links: dashboard_links_sheet = Spread( spread='1geNkTULCutp7PgcqiuMKaNkH7Ynp1nGu3oX1NqoXHBA', client=client, sheet='Dashboard').sheet_to_df() sheet_ids = dashboard_links_sheet['Link dashboard'].str.extract( '/d/([^/]+)', expand=False).unique().tolist() for sheet_id in sheet_ids: try: client.move_file(sheet_id, '/New Dashboards') except Exception as e: print('Error with sheet id:' + str(sheet_id)) pass
def create_and_update_all_sheets(dfs, spreadsheet_name): dfu = dfs['users'] dfp = dfs['posts'] # dfv = dfs['votes'] s = Spread(spread=spreadsheet_name, sheet=None, create_spread=True, create_sheet=True, user=get_config_field('GSHEETS', 'user')) _ = create_and_update_user_sheet(dfu, s) _ = create_and_update_posts_sheet(dfp, s) # _ = create_and_update_votes_sheet(dfv, s) // we never use this return s