class Sheet: def __init__(self): self.spread = Spread(SPREADSHEET_NAME) self.ccs= self.spread.sheet_to_df(sheet=CCS_SHEET, index=0) self.zu = self.spread.sheet_to_df(sheet=ZOOM_USERS, index=0) # self.cs = self.spread.sheet_to_df(sheet=CALENDAR_SCHEDULE) self.sr = self.spread.sheet_to_df(sheet=STUDENT_ROSTER, index=0) # set index in ccs sheet with Unique ID column to get updated. # self.ccs = self.ccs.set_index(self.ccs['Unique ID']) # set index in sr sheet with ID_NUMBER # self.sr = self.sr.set_index(self.sr['COURSE', 'ADDRESS']) def update_calendar_schedule(self): for cn, cs, desc, loc, teacher, index in zip( self.ccs['Course Number'], self.ccs['Course Section'], self.ccs['Description'], self.ccs['Site'], self.ccs['Instructor 1'], self.ccs['Unique ID'] ): class_name = f"Q4-{cn}-{cs}-{desc}" location = loc[4:] quarter = '2020.Q4' slack_channel_name = f"Q4" host = '' # from zoom users sheet notes = '' def update_sheet_from_df(self, df, sheet_name): logger.info(f'--- Update {sheet_name} Sheet from df') self.spread.df_to_sheet(df, index=False, sheet=sheet_name) def update_ccs_sheet_from_df(self, df): self.update_sheet_from_df(df, CCS_SHEET) # deprecated def init_sheet_with_service_key(self): self.credentials = service_account.Credentials.from_service_account_file( './creds/google_secret.json', scopes=SPREADSHEETS_SCOPES, subject=GSUITE_USER_EMAIL) self.sheet_service = build('sheets', 'v4', credentials=self.credentials, cache_discovery=False) self.sheet = self.sheet_service.spreadsheets() # deprecated def read_sheets(self, range): result = self.sheet.values().get( spreadsheetId=SPREADSHEET_ID, range=range).execute() values = result.get('values', [])
def workflow(sheetid, tablename): """Process""" cursor = pgconn.cursor() cursor.execute(f"DROP TABLE IF EXISTS {tablename}") spread = Spread(sheetid, config=config["td"]["service_account"]) df = spread.sheet_to_df(index=None) cols = [cleaner(c) for c in df.columns] cursor.execute(f"CREATE TABLE {tablename} (ss_order int, %s)" % (",".join([' "%s" varchar' % (s, ) for s in cols]), )) cursor.execute(f"GRANT SELECT on {tablename} to nobody,apache") for i, row in enumerate(df.itertuples()): vals = [] for col in row[1:]: vals.append(col) sql = """ INSERT into %s (ss_order, %s) VALUES (%s, %s) """ % ( tablename, ",".join(['"%s"' % (s, ) for s in cols]), i, ",".join(["%s"] * len(cols)), ) cursor.execute(sql, vals) cursor.close() pgconn.commit()
def sendMessagetoHost(self): source = Spread("https://docs.google.com/spreadsheets/d/15_YZB-LVAa2NkHORZ8nvBq25YBj4R7ZUHjY11HRl0R4/edit#gid=763524256") self.driver.get("https://www.airbnb.com.au") sign_in = input("Please sign in!") data = source.sheet_to_df() hosts = data["url"].values count = 0 for i in hosts: self.driver.get(i) time.sleep(25) soup = BeautifulSoup(self.driver.page_source,'lxml') host_id = soup.findAll("a", {"class": "_16lonkd"}) host_id = host_id[0] host_id = host_id["href"] host_id = re.findall('\d+',host_id)[0] test_message = input("Please enter message you would like to send: ") button = self.driver.find_element_by_link_text("Contact host") button.click() time.sleep(10) text_area = self.driver.find_elements_by_id("homes-contact-host--message-textarea") text_area[0].send_keys(test_message) button = self.driver.find_element_by_class_name("_72kmbi0") button.click() count+=1 if count > 10: break
def do(spreadkey, tablename): """Process""" cursor = pgconn.cursor() cursor.execute(f"DROP TABLE IF EXISTS {tablename}") spread = Spread(spreadkey, config=config["cscap"]["service_account"]) df = spread.sheet_to_df(index=None) sql = f"CREATE TABLE {tablename} (" for col in df.columns: sql += "%s varchar," % (cleankey(col), ) sql = sql[:-1] + ")" cursor.execute(sql) cursor.execute(f"GRANT SELECT on {tablename} to nobody,apache") for _, row in df.iterrows(): cols = [] values = [] for key, val in row.items(): cols.append(cleankey(key)) values.append((val or "").strip()) sql = "INSERT into %s (%s) VALUES (%s)" % ( tablename, ",".join(cols), ",".join(["%s"] * len(cols)), ) cursor.execute(sql, values) cursor.close() pgconn.commit()
def importTo_clean_sheet(): from gspread_pandas import Spread, Client import gspread_pandas as gp import datetime import numpy as np import pandas as pd import re import importlib #import cleaner #cleaner = importlib.reload(cleaner) #from cleaner import cleaning_and_to_sql pd.set_option('display.max_rows', 20) pd.set_option('display.max_columns', 50) s = Spread('work', 'Python Autoscrapers') df1 = s.sheet_to_df(index=0, start_row=1, header_rows=1, sheet="Macbooks-CPH").astype(str) import time from datetime import datetime from datetime import timedelta dups_ids = df1.pivot_table(index=['DBA ID'], aggfunc='size') type(dups_ids) di = dups_ids.to_frame() di.head() di.columns = ['days active'] di.reset_index(inplace=True) df1 = pd.merge(df1, di, on='DBA ID') df1 = df1.drop_duplicates(subset='DBA ID', keep='first') filter = df1["Date Scraped"] != " " df1 = df1[filter] df1['state'] = "Not sold yet" #datetime_object = datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p') for stdat, ndays, idx in zip(df1['Date Scraped'], df1['days active'], df1.index): do = datetime.strptime(stdat, '%d/%m/%Y') ndays -= 1 do = do + timedelta(days=ndays) tod = datetime.strptime(today.strftime("%d/%m/%Y"), '%d/%m/%Y') if do < tod: df1.state[idx] = 'Sold' # print(do.strftime("%d/%m/%Y"), " ", stdat, " today", today.strftime("%d/%m/%Y")) s = Spread('work', 'Python Autoscrapers') s.df_to_sheet(df1, index=False, sheet='Macbooks-CPH/Clean', start='A2', replace=True)
def main(): print('loading source files...') main_doc = Spread(spread=DOCID) datapoints = main_doc.sheet_to_df(sheet='datapoints', index=None) topics = main_doc.sheet_to_df(sheet='topics', index=None) concepts = main_doc.sheet_to_df(sheet='concepts', index=None) datapoint_docs = dict() # we can reuse the spreadsheet object to download multiple sheets downloaded = set() env = { 'downloaded': downloaded, 'datapoint_docs': datapoint_docs } print('saving datapoints into etl/source/datapoints...') for _, row in datapoints.iterrows(): process(row, env) datapoints.to_csv(osp.join(SOURCE_DIR, 'datapoints.csv'), index=False) topics.to_csv(osp.join(SOURCE_DIR, 'topics.csv'), index=False) concepts.to_csv(osp.join(SOURCE_DIR, 'concepts.csv'), index=False)
def zd_api(request): config = conf.get_config("./gspread_pandas/") articles = [] url = "https://collegetrack.zendesk.com//api/v2/help_center/categories/360000085163/articles.json" # also not used, but if we wanted to add in users the api URL is: /api/v2/users.json while url: response = requests.get(url, auth=(ZD_USERNAME, ZD_PASSWORD)) if response.status_code != 200: print("Status:", response.status_code, "Problem with the request. Exiting.") exit() data = json.loads(response.text) for article in data["articles"]: articles.append( [ article["id"], article["title"], article["html_url"], article["updated_at"], ] ) url = data["next_page"] # converting the date column into a usable form # to_write_updated = [datetime.date(parse(i[3])) for i in to_write] df = pd.DataFrame(articles) df.columns = ["ID", "Title", "URL", "Date"] df.Date = pd.to_datetime(df.Date).dt.date df = df.sort_values(by="Date", ascending=True) df.loc[:, "ID"] = df["ID"].astype("int").astype("str") # df = df.set_index("ID") # articles.sort(key = lambda item: item[3], reverse=False) # articles.sort(key = lambda item: item[3], reverse=True) spread = Spread(SPREADSHEET_ID, config=config) spread.open_sheet(0) existing_sheet = spread.sheet_to_df(sheet="Sheet1") existing_sheet_subset = existing_sheet[ ["Internal / Staff Facing", "Person Responsible"] ].reset_index() merged_df = df.merge(existing_sheet_subset, on="ID", how="left") spread.df_to_sheet( merged_df, index=False, sheet="Sheet1", start="A1", replace=False )
def pull_data_sheet(self, config_auth): spread_dict = self.__conf__["sheet_url"] spread_obj = {} sheet_dict = {} for j in spread_dict: spread = Spread(spread=spread_dict[j], sheet="Sheet1", config=config_auth) data_frame = spread.sheet_to_df() sheet_dict[j] = data_frame.reset_index() spread_obj[j] = spread return sheet_dict, spread_obj
class GoogleSheets(object): def __init__(self, file_name): self.file_name = file_name """Authenication from oauth token from "My First Project", project in google developer console - owned by [email protected] user""" self.spread = Spread(('1096395197574-0lbfvsdbd5crh7hnlgmm4asll1u3f0g5' '.apps.googleusercontent.com'), self.file_name) def to_csv(self, sheet_name, output_file_path): df = self.spread.sheet_to_df(sheet=sheet_name, index=0) df.to_csv(output_file_path, index=False) def csv_to_sheet(self, csv_path, start_tuple, output_sheet_name): df = pd.read_csv(csv_path) self.spread.df_to_sheet(df, start=start_tuple, sheet=output_sheet_name, index=False) def clear_sheet(self, sheet_name): # self.spread.open_sheet(sheet=sheet_name) self.spread.clear_sheet(sheet=sheet_name, rows=0, cols=0) def df_to_sheet(self, df, sheet_name): self.spread.df_to_sheet(df=df, sheet=sheet_name, index=False, headers=True, replace=True, freeze_headers=True) def sheet_to_df(self, sheet_name, index=1, header_rows=1, start_row=1): return self.spread.sheet_to_df(sheet=sheet_name, index=index, header_rows=header_rows, start_row=start_row)
class PandasGoogleSpreadsheetWrapper(PandasWrapperBase): def __init__(self, credentialsManager, spreadsheetId, dataFrame=None): super().__init__(dataFrame) self.__credentialsManager = credentialsManager self.__id = spreadsheetId self.__spreadsheet = Spread( spread=self.__id, creds=self.__credentialsManager.credentials) @property def credentialsManager(self): return self.__credentialsManager @property def spreadsheetId(self): return self.__id @property def spreadsheet(self): return self.__spreadsheet @property def active_sheet(self): return self.__spreadsheet.sheet def load(self, filePath): # filePath = SheetName print("Load GoogleSpreadsheet: " + str(self.__id) + " [" + str(filePath) + "]") self.onLoadPreprocessing(self.df) df = self.__spreadsheet.sheet_to_df(index=False, sheet=filePath) self.setDataFrame(df) print(" Loaded Length: " + str(len(self.df.index))) self.onLoadPostprocessing(self.df) return self def save(self, filePath): # filePath = SheetName print("Save GoogleSpreadsheet: " + str(self.__id) + " [" + str(filePath) + "]") self.onSavePreprocessing(self.df) self.__spreadsheet.open_sheet(filePath, create=True) self.__spreadsheet.df_to_sheet(df=self.df, index=False, headers=True, start='A1', replace=True) print(" Saved Length : " + str(len(self.df.index))) self.onSavePostprocessing(self.df) return self
def get_events_sheet(only_top=True): spreadsheet_user = get_config_field('GSHEETS', 'user') s = Spread(user=spreadsheet_user, spread='Release & PR Events', sheet='Events', create_spread=False, create_sheet=False) events = s.sheet_to_df() events.index = pd.to_datetime(events.index) events = events.reset_index().reset_index().set_index('date') events['top'] = events['top'] == 'TRUE' if only_top: events = events[events['top']] return events
def main(): """Go Main Go.""" config = util.get_config() drive = util.get_driveclient(config) res = drive.files().list(q="title contains 'Agronomic Data'").execute() for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue LOG.debug(item["title"]) spread = Spread(item["id"], config=config["cscap"]["service_account"]) for sheet in spread.sheets: df = spread.sheet_to_df(index=None, sheet=sheet) LOG.debug("%s %s", sheet.title, len(df.index))
def get_sheet(SHEET, CSV_SPREADSHEET='Asistensi PBO Jumat 2019'): spread = Spread(CSV_SPREADSHEET) spread.open_sheet(SHEET) df = spread.sheet_to_df() df['Nilai'] = pd.to_numeric(df['Nilai'], errors='coerce') links = [] for link in tqdm(df.REPO, desc='issue'): try: links.append(get_issues(link)) except: links.append(None) print(links) df['Issues'] = links spread.update_cells('H2', 'H28', links) df.drop(['Catatan', 'REPO'], axis=1, inplace=True) return df
def get_raw_data_sheet_to_df(spreadsheet, client, cols_to_filter): spread = Spread(spread=spreadsheet) meta = spread.__dict__.get('_spread_metadata') project_id = meta.get('spreadsheetId') project_name = meta.get('properties').get('title') sheets = spread.sheets raw = [ x for x in sheets if 'rawdata' in x.__dict__['_properties']['title'].replace(' ', ''). replace('.', '').lower() and 'pivot' not in x.__dict__['_properties'] ['title'].replace(' ', '').replace('.', '').lower() ] raw.sort(key=lambda x: len(x.__dict__['_properties']['title'].replace( ' ', '').replace('.', '')), reverse=False) df = spread.sheet_to_df(sheet=raw[0], index=0) # Check for column names: df_cols = list(map(lambda x: x.lower(), list(df.columns))) cols_to_filter = list(map(lambda x: x.lower(), cols_to_filter)) missing_cols = [] for col in cols_to_filter: if col not in df_cols: missing_cols.append(col) if len(missing_cols) > 0: print(f"Project {project_name} is missing (id: {project_id}) " + ", ".join(missing_cols)) for missing_col in missing_cols: df[missing_col] = '' cols = list(set(cols_to_filter).intersection(df.columns)) lower_cols = [x.replace(' ', '_').lower() for x in cols] if len(cols) == 0: return pd.DataFrame(columns=cols_to_filter) _df = df.loc[:, cols].copy() _df = _df.loc[:, ~_df.T.duplicated(keep='first')] _df.columns = map(lambda x: x.replace(' ', '_').lower(), _df.columns) _df['date'] = pd.to_datetime(_df['date'], errors='coerce') for col in ['revenue', 'cost', 'profit']: _df[col] = pd.to_numeric(_df[col].astype('str').str.replace(',', ''), errors='coerce').fillna(0) return _df
def import_library_sheet_validation_from_google(): global validation_df spread = Spread(lab_spreadsheet_id) validation_df = spread.sheet_to_df(sheet='Validation', index=0, header_rows=1, start_row=1) hit = validation_df.iloc[0] logger.debug(f"First record of validation data: {hit}") for column_name in metadata_validation_column_names: logger.debug(f"Checking for column name {column_name}...") if column_name not in hit: logger.error( f"Could not find column {column_name}. The file is not structured as expected! Aborting." ) exit(-1) logger.info(f"Loaded library tracking sheet validation data.")
def on_epoch_end(self, epoch, logs=None): spread = Spread('*****@*****.**', 'https://docs.google.com/spreadsheets/d/1CksJQdyochF1M6RB4XfFgewjYGak38ixOhoAMySWTM8/edit') df_results = spread.sheet_to_df(sheet='Quantitative') df_results = df_results.reset_index() df_results = df_results.append({'Dataset': self.dataset, 'Machine': self.machine, 'Training time': time() - self._start_time, 'Epochs': epoch, 'Loss': logs.get('loss'), 'Accuracy': logs.get('acc'), 'Validation loss': logs.get('val_loss'), 'Validation accuracy': logs.get('val_acc'), 'Weights file': self.file_weights.format(epoch=epoch, **logs), 'Additional parameters': self.additional_parameters}, ignore_index=True) spread.df_to_sheet(df_results, sheet='Quantitative', index=False, replace=True)
def import_library_sheet_from_google(year): global library_tracking_spreadsheet_df spread = Spread(lab_spreadsheet_id) library_tracking_spreadsheet_df = spread.sheet_to_df(sheet='2019', index=0, header_rows=1, start_row=1) hit = library_tracking_spreadsheet_df.iloc[0] logger.debug(f"First record: {hit}") for column_name in column_names: logger.debug(f"Checking for column name {column_name}...") if column_name not in hit: logger.error( f"Could not find column {column_name}. The file is not structured as expected! Aborting." ) exit(-1) logger.info( f"Loaded {len(library_tracking_spreadsheet_df.index)} records from library tracking sheet." )
def read_sheet(doc: Spread, sheet_name): df = doc.sheet_to_df(sheet=sheet_name, index=None) df = df.dropna(how='all') # remove empty rows # detect error in sheet if df.empty: raise EmptySheet(f"{sheet_name} is empty") elif df.shape[0] == 1 and df.iloc[0, 0] in ['#N/A', '#VALUE!', 0]: msg = f"{sheet_name} contains all NA values" raise EmptyColumn(f"{sheet_name} contains all NA values") elif len(df['geo'].unique()) == 1 and 'world' not in df['geo'].values: msg = f"{sheet_name}, geo column contains NA values" raise EmptyColumn(msg) else: for c in df.columns: if df[c].hasnans or '[Invalid]' in df[c].values: msg = f'{sheet_name}, column {c} has NA values' raise EmptyCell(msg) print(df.head()) return df
django.setup() import posts.models from posts.models import Post, Taxonomy AERA = dict((v, k) for (k, v) in posts.models.AREA) AERABUS = dict((v, k) for (k, v) in posts.models.SUBAREA) TNENOPMOC = dict((v, k) for (k, v) in posts.models.COMPONENTS) import gspread_pandas from gspread_pandas import Spread s = Spread( "https://docs.google.com/spreadsheets/d/1EfS2W11kRg2C1H6KD2vVjR_Htvy6j7C4SSGHPA8Bvpc/edit?ts=5d22a555", sheet=0) df = s.sheet_to_df() def process_row(pid, row): try: post = Post.objects.get(id=pid) a = AERA[row['ÁREA'].strip()] sub = AERABUS[row['SUB-ÁREA'].strip()] c = TNENOPMOC[row['COMPONENTE'].strip()] tax = Taxonomy(post=post, area=a, subarea=sub, component=c) tax.save() return tax except Exception as e: print("============") print(e) print("ERROR {} - {}".format(pid, row))
from __future__ import print_function import pprint import pandas as pd from gspread_pandas import Spread, Client, conf conf.get_config("auth_secret/google_secret.json") # Print nicely pp = pprint.PrettyPrinter() # Find a workbook by name and open the first sheet # Make sure you use the right name here. id_sheetname = "20SH07 Food ID Map" spread = Spread(id_sheetname) id_df = spread.sheet_to_df() column_map = { "ID": 1, "Exact_Name": 2, "Recess": 3, "Lunch": 4, "Price": 5, "Name": 6, "MON/TUE/WED/TURS/FRI": 7, "Stall": 8 } stall_map = { "Stall 1 Fried Hokkien Mee": 1, "Stall 2 Chicken Rice": 2,
def main(): print('loading source files...') main_doc = Spread(spread=DOCID) concepts = main_doc.sheet_to_df(sheet='concepts', index=None) datapoints = main_doc.sheet_to_df(sheet='datapoints', index=None) tags = main_doc.sheet_to_df(sheet='topics', index=None) # construct a dictionary, keys are docids, values are dictionaries which # keys are sheet names and values are the csv links for the docid/sheet name pair. csv_link_dict = get_csv_link_dict(datapoints.csv_link.values) # create a dictionary that has same layout of `csv_link_dict` but the values are # dataframes, instead of links csv_dict = csv_link_dict.copy() for docid, di in csv_link_dict.items(): print(f"Downloading sheets from file: {docid}") csv_dict[docid] = dict() doc = Spread(spread=docid) for sheet_name, link in di.items(): print(f"sheet: {sheet_name}") df = doc.sheet_to_df(sheet=sheet_name, index=None) if df.empty: print( f"WARNING: empty dataframe: doc: {docid}, sheet_name: {sheet_name}" ) csv_dict[docid][sheet_name] = df print('creating ddf datasets...') # entities entities_columns = set( ) # mark down the columns, use to create concept table later for e in [ 'country', 'global', 'world_4region', 'g77_and_oecd_countries', 'income_groups', 'landlocked', 'main_religion_2008', 'world_6region', 'unicef_region', 'income_3groups', 'un_sdg_ldc', 'un_sdg_region' ]: edf = pd.read_csv( f'../source/ddf--open_numbers/ddf--entities--geo--{e}.csv', na_filter=False, dtype=str) edf.to_csv(f'../../ddf--entities--geo--{e}.csv', index=False, encoding='utf8') for c in edf.columns: entities_columns.add(c) # tags entities tags = tags.rename(columns={ 'topic_id': 'tag', 'topic_name': 'name', 'parent_topic': 'parent' }) tags.to_csv('../../ddf--entities--tag.csv', index=False, encoding='utf8') for c in tags.columns: entities_columns.add(c) # concepts cdf = serve_concepts(concepts, entities_columns) # datapoints serve_datapoints(datapoints, cdf, csv_dict)
def main(): url = 'https://www.flashscore.com/football/england/premier-league/results/' lib = 'lxml' attr = 'sportName soccer' path = '/var/www/html/diag/pypp.png' view_source = gel().run_until_complete(pypp(url, lib, attr, path, True)) rows = view_source.select('.event__match') results = [] for row in rows[0]: print(row) for row in rows: try: unique_id = str(row['id']).replace('g_1_', '') datetime = row.select_one('.event__time').get_text() timestamp = isodate(str(datetime).split('.')) home_team = row.select_one('.event__participant--home').get_text() away_team = row.select_one('.event__participant--away').get_text() home_goal = strip_html(row.select('span')[0]) away_goal = strip_html(row.select('span')[1]) except Exception: print("Exception") else: results.append([ unique_id, timestamp, home_team, away_team, home_goal, away_goal ]) print(results[0]) file_dir = '' file_json = '' scope = [ 'https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive' ] config = conf.get_config(conf_dir=file_dir, file_name=file_json) worksheet = Spread('Profit & Loss', sheet='Football', config=config) df_worksheet = worksheet.sheet_to_df(index=0, header_rows=2) df_worksheet['Home'] = df_worksheet['Home'].apply(team_transform) df_worksheet['Away'] = df_worksheet['Away'].apply(team_transform) for result in results: df_worksheet.loc[(df_worksheet.Home == result[2]) & (df_worksheet.Away == result[3]) & (df_worksheet.Date_Time == result[1]), ['HG', 'AG']] = result[4], result[5] print(df_worksheet) worksheet.df_to_sheet(df_worksheet, index=False, headers=False, sheet='Football', start='A3', replace=False, raw_column_names=['Date_Time']) return
if i['City'] in cities_asignadas: val = 'Si' else: val = 'No' return val # Credenciales cred = conf.get_config('C:\\Users\\micaela.fuchs\\Anaconda', 'PedidosYa-6e661fd93faf.json') # Roster CABA sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w' wks_name = 'Roster CABA' sheet = Spread(sheet_id, wks_name, config=cred) reporte_caba = sheet.sheet_to_df(index=0, header_rows=1) # Roster Resto sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w' wks_name = 'Roster Resto' sheet = Spread(sheet_id, wks_name, config=cred) reporte_resto = sheet.sheet_to_df(index=0, header_rows=1) # Roster Ciudades Asigandas sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w' wks_name = 'Ciudades Asignadas' sheet = Spread(sheet_id, wks_name, config=cred) cities_asignadas = sheet.sheet_to_df(index=0, header_rows=1) # Creo lista de Cities Asignadas cities_asignadas = cities_asignadas[cities_asignadas.columns[0]].to_list()
load_dotenv() config = conf.get_config("./gspread_pandas/") hs_spread = Spread("1u7fLc0Dlg1zhWfn8xVgedmYyedr0Z1D1NfZ6uSov5tc", config=config) hs_spread_spanish = Spread( "1jvY7a9yncWMI44uFf8HMypHuKhoHNkOEQ6Fr9Rqza94", config=config ) student_List = SF_SOQL("fy21_hs_survey_student_list", soql.student_list_query) student_List.load_from_sf_soql(sf) student_List.shorten_site_names("A_SITE__c") english_df = hs_spread.sheet_to_df().reset_index() english_df = english_df[ [ "Student: Contact Id", "If/When College Track is able to have students physically return to the site, are you interested in doing so?", "I will feel comfortable returning to the site for in-person programming if/when (check all that apply): ", "Other", ] ] merged_df = english_df.merge( student_List.df, left_on="Student: Contact Id", right_on="C_Id", how="left" )
credentials = None # The file token.pickle stores the user's access and refresh tokens, and is # created automatically when the authorization flow completes for the first # time. if os.path.exists('token.pickle'): with open('token.pickle', 'rb') as token: credentials = pickle.load(token) # If there are no (valid) credentials available, let the user log in. if not credentials or not credentials.valid: if credentials and credentials.expired and credentials.refresh_token: credentials.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( 'credentials.json', SCOPES) credentials = flow.run_local_server(port=0) # Save the credentials for the next run with open('token.pickle', 'wb') as token: pickle.dump(credentials, token) spread = Spread(spread=spread_name, sheet=sheet_name, scope=SCOPES, creds=credentials) # Reading Spreadsheet into Pandas DataFrame df = spread.sheet_to_df(index=0) print(isinstance(df, pd.DataFrame)) # Writing Pandas DataFrame into Google Spreadsheet spread.df_to_sheet(df, index=False, sheet=sheet_name, start='A1', replace=True)
def ingest_student_data_google_sheet(sheet_id, pull_date): logger.info('Ingesting data from sheet with pull date {} and ID {}'.format( pull_date, sheet_id)) spread = Spread(sheet_id) df = spread.sheet_to_df(index=None) df['school_id_tc'] = pd.to_numeric(df['school_id']).astype('Int64') df['child_raw_dict'] = df['child_raw'].apply(lambda x: json.loads(x)) df['student_id_tc'] = pd.to_numeric( df['child_raw_dict'].apply(lambda x: int(x.get('id')))).astype('Int64') df['pull_datetime'] = pd.to_datetime(pull_date, utc=True) df['student_first_name_tc'] = df['child_raw_dict'].apply( lambda x: x.get('first_name')).astype('string') df['student_middle_name_tc'] = df['child_raw_dict'].apply( lambda x: x.get('middle_name')).astype('string') df['student_last_name_tc'] = df['child_raw_dict'].apply( lambda x: x.get('last_name')).astype('string') df['student_birth_date_tc'] = df['child_raw_dict'].apply( lambda x: to_date(x.get('birth_date'))) df['student_gender_tc'] = df['child_raw_dict'].apply( lambda x: x.get('gender')).astype('string') df['student_ethnicity_tc'] = df['child_raw_dict'].apply( lambda x: x.get('ethnicity')) df['student_dominant_language_tc'] = df['child_raw_dict'].apply( lambda x: x.get('dominant_language')).astype('string') df['student_household_income_tc'] = df['child_raw_dict'].apply( lambda x: x.get('household_income')).astype('string') df['student_grade_tc'] = df['child_raw_dict'].apply( lambda x: x.get('grade')).astype('string') df['student_classroom_ids_tc'] = df['child_raw_dict'].apply( lambda x: x.get('classroom_ids')) df['student_program_tc'] = df['child_raw_dict'].apply( lambda x: x.get('program')).astype('string') df['student_hours_string_tc'] = df['child_raw_dict'].apply( lambda x: x.get('hours_string')).astype('string') df['student_id_alt_tc'] = df['child_raw_dict'].apply( lambda x: x.get('student_id')).astype('string') df['student_allergies_tc'] = df['child_raw_dict'].apply( lambda x: x.get('allergies')).astype('string') df['student_parent_ids_tc'] = df['child_raw_dict'].apply( lambda x: x.get('parent_ids')) df['student_approved_adults_string_tc'] = df['child_raw_dict'].apply( lambda x: x.get('approved_adults_string')).astype('string') df['student_emergency_contacts_string_tc'] = df['child_raw_dict'].apply( lambda x: x.get('emergency_contacts_string')).astype('string') df['student_notes_tc'] = df['child_raw_dict'].apply( lambda x: x.get('notes')).astype('string') df['student_last_day_tc'] = df['child_raw_dict'].apply( lambda x: to_date(x.get('last_day'))) df['student_exit_reason_tc'] = df['child_raw_dict'].apply( lambda x: x.get('exit_reason')).astype('string') df['student_exit_survey_id_tc'] = pd.to_numeric(df['child_raw_dict'].apply( lambda x: x.get('exit_survey_id'))).astype('Int64') df['student_exit_notes_tc'] = df['child_raw_dict'].apply( lambda x: x.get('exit_notes')).astype('string') df = df.reindex(columns=[ 'school_id_tc', 'student_id_tc', 'pull_datetime', 'student_first_name_tc', 'student_middle_name_tc', 'student_last_name_tc', 'student_birth_date_tc', 'student_gender_tc', 'student_ethnicity_tc', 'student_dominant_language_tc', 'student_household_income_tc', 'student_grade_tc', 'student_classroom_ids_tc', 'student_program_tc', 'student_hours_string_tc', 'student_id_alt_tc', 'student_allergies_tc', 'student_parent_ids_tc', 'student_approved_adults_string_tc', 'student_emergency_contacts_string_tc', 'student_notes_tc', 'student_last_day_tc', 'student_exit_reason_tc', 'student_exit_survey_id_tc', 'student_exit_notes_tc' ]) if df.duplicated(['school_id_tc', 'student_id_tc']).any(): raise ValueError( 'Ingested data contains duplicate Transparent Classroom school ID/student id combinations' ) return df
def handler(incoming): ## Put code here # ## Get list of projects # In[15]: main_spread = Spread('1wZDpHfIqKBEhmS_F485kFKmrBUAbrfMqu2HW4NOY6BE') # In[16]: project_list = main_spread.sheet_to_df(index=0) # In[17]: project_list # In[18]: # create sqlalchemy engine engine = create_engine("mysql+pymysql://{user}:{pw}@{host}/{db}".format( user=user, pw=password, host=host, db=dbname)) # In[19]: # open the database connection dbConnection = engine.connect() # In[20]: # loop through all the projects for index, row in project_list.iterrows(): project = row['dialogflow_project_id'] spreadsheet_id = row['google_spreadsheet_id'] # Get the project data we need df = pd.read_sql( f"SELECT `identifier`, `item_key`, `item_value` FROM `data_pieces` WHERE `project` = '{project}'", dbConnection) columns_df = pd.read_sql( f"SELECT * FROM `column_tracker` WHERE `project` = '{project}'", dbConnection) text_df = pd.read_sql( f"SELECT * FROM `text_log` WHERE `project` = '{project}'", dbConnection) first_contact_df = pd.read_sql( f"SELECT * FROM `first_contact` WHERE `project` = '{project}'", dbConnection) # pivot all the data pieces so columns along the top, identfiers as rows pivoted = df.pivot(index='identifier', columns='item_key', values=['item_value']) # flatten the column headers pivoted.columns = pivoted.columns.get_level_values(1) pivoted.reset_index(inplace=True) # sort the columns by order they were created columns_df.sort_values(by=['created_at'], inplace=True) # recorder the columns of the pivoted table ordered_columns = columns_df['col'].tolist() ordered_columns.insert(0, "identifier") pivoted_ordered = pivoted[ordered_columns] # on the text_df (or log) table, do a "group by" by identifier # and also concatinate the items in the group text_concat = text_df.groupby(['identifier'])['raw_text'].apply( ' | '.join).reset_index() # add the "raw_text" column to the table so far merge1 = pd.merge(pivoted_ordered, text_concat, on="identifier") # sort the list of first-contacts, and merge into the final table first_contact_df.sort_values(by=['created_at'], inplace=True) final_table = pd.merge(first_contact_df, merge1, on="identifier") # drop the project columns from the final_table for the spreadsheet final_table.drop(columns=['proj_ident_hash', 'project'], inplace=True) # establish the project spreadsheet and update the first sheet spread = Spread(spreadsheet_id) spread.df_to_sheet(final_table, index=False, sheet='Sheet1', start='A1', replace=True) print(f"Spreadsheet for {row['friendly_name']} updated.") # In[21]: # close the database dbConnection.close() # In[22]: print("All updates complete.") return True
def the_work(creds_path, backup_folder, idph_csv_folder, geo_folder, chrome_options, chrome_path='chromedriver', **kwargs): # Dates the_date = datetime.now(timezone('US/Central')).strftime("%m-%d") the_date_year_yday = datetime.strftime(datetime.now(timezone('US/Central')) - timedelta(1), '%m-%d-%y') the_date_year = datetime.now(timezone('US/Central')).strftime("%m-%d-%y") the_date_YEAR = datetime.now(timezone('US/Central')).strftime("%m-%d-%Y") the_date_n_time = datetime.now(timezone('US/Central')).strftime("%m-%d-%H%M") the_time = datetime.now(timezone('US/Central')).strftime("%H:%M") print("Starting scraper...{} {}".format(the_date_year, the_time)) credentials = ServiceAccountCredentials.from_json_keyfile_name(creds_path) # If you copy this, make sure the file you are opening is accessible to your service account # Ie. Give Sharing/Edit access to ExProc ([email protected]) totals_spread, zip_spread, county_spread = None, None, None while not (zip_spread and county_spread and totals_spread): try: if not zip_spread: zip_spread = Spread(spread=gsheet_zip_link, creds=credentials) if not county_spread: county_spread = Spread(spread=gsheet_county_link, creds=credentials) if not totals_spread: totals_spread = Spread(spread=gsheet_totals_link, creds=credentials) except gspread.exceptions.APIError as e: print(e) print("gspread API Error occurred") time.sleep(5) continue except requests.exceptions.ReadTimeout: print("requests ReadTimeout occurred") time.sleep(5) continue driver = webdriver.Chrome(executable_path=chrome_path, options=chrome_options) # Chrome options are passed into function driver.implicitly_wait(300) # %% # Webdriver Work - First for Zips driver.get(idph_link) time.sleep(10) # Collecting totals daily_totals = {"tests_pos": int(driver.find_element_by_id("covid19positive").text.replace(",", "")), "deaths": int(driver.find_element_by_id("covid19deaths").text.replace(",", "")), "total_tests": int(driver.find_element_by_id("covid19totaltest").text.replace(",", ""))#, #"recovered": int(driver.find_element_by_id("covid19recovered").text.replace(",", "")) } by_county_button = driver.find_element_by_link_text("By County") by_zip_button = driver.find_element_by_link_text("By Zip") by_zip_button.click() time.sleep(2) all_zip_data_button = driver.find_element_by_link_text("All") all_zip_data_button.click() time.sleep(5) # Excessive, I know zip_soup = bs4.BeautifulSoup(driver.page_source, 'lxml') zip_table_soup = zip_soup.find("table", class_="padded-table", id="detailedData") by_county_button.click() time.sleep(2) all_county_data_button = driver.find_element_by_link_text("All") all_county_data_button.click() time.sleep(5) # Excessive, I know county_soup = bs4.BeautifulSoup(driver.page_source, 'lxml') county_table_soup = county_soup.find("table", class_="padded-table", id="detailedData") # Todo: Make sure to back this up as well with open(backup_folder / ("backup_soup_zip_%s.txt" % the_date_n_time), "w") as backup_zip_soup: backup_zip_soup.writelines(str(zip_soup.encode("utf-8"))) with open(backup_folder / ("backup_soup_county_%s.txt" % the_date_n_time), "w") as backup_county_soup: backup_county_soup.writelines(str(county_soup.encode("utf-8"))) # %% zip_table_headers = zip_table_soup.find_all("th") zip_table_headers_arr = [x.get_text() for x in zip_table_headers] assert zip_table_headers_arr == ['Zip', 'Tested', 'Positive Cases', 'Deaths'] county_table_headers = county_table_soup.find_all("th") county_table_headers_arr = [x.get_text() for x in county_table_headers] assert county_table_headers_arr == ['County', 'Tested', 'Positive Cases', 'Deaths'] #print("Zip first:") #print(zip_table_soup.find_all("tr")[1:]) #print() #print("County second:") #print(county_table_soup.find_all("tr")[1:]) df_zip_today = pd.DataFrame(columns=zip_table_headers_arr) for tr in zip_table_soup.find_all('tr')[1:]: tds = tr.find_all('td') df_zip_today = df_zip_today.append( {'Zip': tds[0].text, 'Tested': tds[1].text, 'Positive Cases': tds[2].text, 'Deaths': tds[3].text}, ignore_index=True) df_county_today = pd.DataFrame(columns=county_table_headers_arr) for tr in county_table_soup.find_all('tr')[1:]: tds = tr.find_all('td') df_county_today = df_county_today.append( {'County': tds[0].text, 'Tested': tds[1].text, 'Positive Cases': tds[2].text, 'Deaths': tds[3].text}, ignore_index=True) df_zip_today.set_index("Zip", inplace=True) df_county_today.set_index("County", inplace=True) # %% # Totals -- Assertions, Comparisons, and Long form production df_totals_oldlong = totals_spread.sheet_to_df(index=0) assert daily_totals['tests_pos'] >= int(df_totals_oldlong.iloc[0]['tests_pos']) # Was the number back on 4/18/2020 assert daily_totals['deaths'] >= int(df_totals_oldlong.iloc[0]['deaths']) assert daily_totals['total_tests'] >= int(df_totals_oldlong.iloc[0]['total_tests']) #assert daily_totals['recovered'] >= int(df_totals_oldlong.iloc[0]['recovered']) print("Daily Totals Assertions passed.") totals_changed = False if (daily_totals['tests_pos'] != int(df_totals_oldlong.iloc[0]['tests_pos'])) or \ (daily_totals['deaths'] != int(df_totals_oldlong.iloc[0]['deaths'])) or \ (daily_totals['total_tests'] != int(df_totals_oldlong.iloc[0]['total_tests'])): # or \ #(daily_totals['recovered'] != int(df_totals_oldlong.iloc[0]['recovered'])): totals_changed = True print("Totals were detected as changed.") if totals_changed: daily_totals['update_date'] = the_date_YEAR daily_totals['update_time'] = the_time daily_totals['tests_neg'] = daily_totals['total_tests'] - daily_totals['tests_pos'] daily_totals['new_tests'] = daily_totals['total_tests'] - int(df_totals_oldlong.iloc[0]['total_tests']) df_totals_today = pd.DataFrame(daily_totals, index=[0]) df_totals_newlong = df_totals_today.append(df_totals_oldlong)[ ["update_date", "tests_pos", "deaths", "total_tests", #"recovered", "tests_neg", "new_tests"]].reset_index( drop=True) df_totals_newlong.to_csv(idph_csv_folder / ("Long Totals %s.csv" % the_date_n_time), index=False) totals_spread.df_to_sheet(df_totals_newlong, index=False, sheet="IL_long", start="A1", replace=True) # %% Assertions Zip - Does the table look as expected? assert df_zip_today.index[0] <= "60002" # First zip code in Illinois assert int(df_zip_today.index[-1]) >= 62959 # This was the last zip code reported on 4/12/2020 assert df_zip_today.shape[0] >= 353 # This was the size of the table on 4/12/2020 assert df_zip_today.Deaths.str.contains("N/A").all() # The Death column should be "N/A" for all zip codes. print("Zip Assertions passed.") # %% Assertions County - Does the table look as expected? assert df_county_today.index[0] == "Illinois" # First "county" reported on their table assert df_county_today.index[-1] == "Woodford" # This was the last county code reported on their table on 4/13/2020 assert df_county_today.shape[0] >= 90 # This was the size of the table on 4/12/2020 print("County Assertions passed.") # %% Basic Assertions passed -- Drop and rename things # Zip table first df_zip_today.drop(columns="Deaths", inplace=True) df_zip_today.rename(columns={"Positive Cases": 'Positive_Cases'}, inplace=True) # County next df_county_today.rename(columns={"Positive Cases": "Positive_Cases"}, inplace=True) # %% # Open the most recent Zip file for comparison zip_changed = False # Assume the sheet has already been updated for the day, try grabbing and comparing with that first. zip_spread_yday = Spread(spread=gsheet_zip_link, creds=credentials) df_zip_yday = zip_spread_yday.sheet_to_df(index=0, sheet=zip_spread.find_sheet(the_date_year)) if "update_time" in df_zip_yday.columns: # Oh, so it didn't find it for today, let's grab the one from yesterday df_zip_yday = zip_spread_yday.sheet_to_df(index=0, sheet=zip_spread.find_sheet(the_date_year_yday)) # Now explicitly compare tables to see if they're different df_zip_yday.set_index("Zip", inplace=True) if df_zip_today.equals(df_zip_yday): print("Zip values have not yet changed from latest data.") else: print("Zip values have been detected as different.") zip_changed = True # %% # Open the most recent County file for comparison county_changed = False # Assume the sheet has already been updated for the day, try grabbing and comparing with that first. county_spread_yday = Spread(spread=gsheet_county_link, creds=credentials) df_county_yday = county_spread_yday.sheet_to_df(index=0, sheet=county_spread.find_sheet(the_date_year)) if "update_time" in df_county_yday.columns: # Oh, so it didn't find it for today, let's grab the one from yesterday df_county_yday = county_spread_yday.sheet_to_df(index=0, sheet=county_spread.find_sheet(the_date_year_yday)) # Now explicitly compare tables to see if they're different df_county_yday.set_index("County", inplace=True) if df_county_today.equals(df_county_yday): print("County values have not yet changed from latest data.") else: print("County values have been detected as different.") county_changed = True # %% If they were different, let's save them and upload them to sheets print("\nUploading new sheets if changed.") if zip_changed: df_zip_today.to_csv(idph_csv_folder / ("IDPH Stats Zip %s.csv" % the_date_n_time), index=True) zip_spread.df_to_sheet(df_zip_today, index=True, sheet=the_date_year, start='A1', replace=True) print("\tNew Zip was uploaded to Sheets.") else: print("\tZip version was not uploaded.") if county_changed: df_county_today.to_csv(idph_csv_folder / ("IDPH Stats County %s.csv" % the_date_n_time), index=True) county_spread.df_to_sheet(df_county_today, index=True, sheet=the_date_year, start='A1', replace=True) print("\tNew County was uploaded to Sheets.") else: print("\tCounty version was not uploaded.") # %% Now deal with production of Long version print("\nProducing Zip and County long versions.") # Bring in mapping tools census_df = pd.read_csv(geo_folder / "Illinois_Census_200414_1816.csv") nofo_map_dict = dict(zip(census_df.CountyName, census_df.NOFO_Region)) metro_map_dict = dict(zip(census_df.CountyName, census_df.Metro_area)) if zip_changed: df_zip_oldlong = zip_spread.sheet_to_df(index=0, sheet=zip_spread.find_sheet("long")) df_zip_today['update_date'] = the_date_YEAR df_zip_today['update_time'] = the_time df_zip_today.reset_index(inplace=True) df_zip_newlong = df_zip_today.append(df_zip_oldlong)[ ["update_date", "update_time", "Zip", "Positive_Cases", "Tested"]].reset_index(drop=True) df_zip_newlong.to_csv(idph_csv_folder / ("Long Zip %s.csv" % the_date_n_time), index=False) zip_spread.df_to_sheet(df_zip_newlong, index=False, sheet="long", start="A1", replace=True) print("\tZip Long version made and uploaded.") else: print("\tZip version was not uploaded.") if True: ## CHANGED THIS was county_changed df_county_oldlong = county_spread.sheet_to_df(index=0, sheet=county_spread.find_sheet("long")) df_county_today['update_date'] = the_date_YEAR df_county_today['update_time'] = the_time # Add NOFO Region df_county_today['NOFO_Region'] = df_county_today.index.map(nofo_map_dict) df_county_today.loc[df_county_today['NOFO_Region'].isna(), "NOFO_Region"] = df_county_today.index[ df_county_today['NOFO_Region'].isna()] # Add Metro Area df_county_today['Metro_Area'] = df_county_today.index.map(metro_map_dict) # Chicago and Illinois are not mapped on the Metro mapping file. df_county_today.loc[df_county_today.index == "Chicago", "Metro_Area"] = 'Chicago' df_county_today.loc[df_county_today.index == "Illinois", "Metro_Area"] = 'Illinois' # Leaving unlabeled metro areas as Nulls. df_county_today.reset_index(inplace=True) df_county_newlong = df_county_today.append(df_county_oldlong)[ ["update_date", "update_time", "County", "Positive_Cases", "Deaths", "Tested", "NOFO_Region", "Metro_Area"]].reset_index(drop=True) df_county_newlong.to_csv(idph_csv_folder / ("Long County %s.csv" % the_date_n_time), index=False) county_spread.df_to_sheet(df_county_newlong, index=False, sheet="long", start="A1", replace=True) print("\tLong editions made and uploaded.") else: print("\tCounty version was not uploaded.") # %% Now producing NOFO Region and Metro Area Rollups directly from County_long and Zip_long directly from gsheets if zip_changed or county_changed: # cln = county_long_now cln = county_spread.sheet_to_df(index=None, sheet=county_spread.find_sheet("long")) # I'm not converting the hourly one because as datetimes it includes the wrong day. (cln.update_date) = pd.to_datetime(cln.update_date) cln.Tested = cln.Tested.apply(lambda x: "0" if x == "" else x) cln.Positive_Cases, cln.Deaths, cln.Tested = cln.Positive_Cases.astype(int), cln.Deaths.astype( int), cln.Tested.astype(int) # County data is ready for roll-up and upload to NOFO_long cln_nofo = cln.groupby(by=["update_date", "update_time", "NOFO_Region"]).sum() cln_nofo = cln_nofo.sort_index(axis=0, level=[-1, -3], sort_remaining=False) totals_spread.df_to_sheet(cln_nofo, index=True, sheet="NOFO_long", start="A1", replace=True) # County data is ready for roll-up and upload to Metro_long cln_metro = cln.groupby(by=["update_date", "update_time", "Metro_Area"]).sum() cln_metro = cln_metro.sort_index(axis=0, level=[-1, -3], sort_remaining=False) cln_metro = cln_metro.drop(axis=0, labels="", level=-1) totals_spread.df_to_sheet(cln_metro, index=True, sheet="Metro_long", start="A1", replace=True) # %% driver.close() print("End of " + the_time + " run.") return zip_changed or county_changed
if j == 'City': dicc[j] = literal_eval(i[j]) else: dicc[j] = i[j] resto.append(dicc) # Credenciales cred = conf.get_config('C:\\Users\\santiago.curat\\Pandas\\PEYA', 'PedidosYa-8b8c4d19f61c.json') # Roster CABA sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w' wks_name = 'Roster CABA' sheet = Spread(sheet_id, wks_name, config=cred) reporte_caba = sheet.sheet_to_df(index=0, header_rows=1) # Roster Resto sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w' wks_name = 'Roster Resto' sheet = Spread(sheet_id, wks_name, config=cred) reporte_resto = sheet.sheet_to_df(index=0, header_rows=1) # Creo las columnas cols_caba = reporte_caba.columns cols_resto = reporte_resto.columns # Creo las listas caba = [] resto = [] reporte_caba.apply(a_listas_caba, axis=1)
#TIER PRICING CALCULATOR import pandas as pd from gspread_pandas import Spread pricing = Spread('price', 'Plexchat Pricing') pricing.open_sheet(0) price_input = pricing.sheet_to_df(header_rows=1, index=True).apply(pd.to_numeric, errors='ignore') tier_1_MAU = int(price_input.loc["Tier 1", "MAU"]) tier_2_MAU = int(price_input.loc["Tier 2", "MAU"]) tier_3_MAU = int(price_input.loc["Tier 3", "MAU"]) tier_4_MAU = int(price_input.loc["Tier 4", "MAU"]) tier1_cloud_cost = price_input.loc["Tier 1", "COST_USER"] tier2_cloud_cost = price_input.loc["Tier 2", "COST_USER"] tier3_cloud_cost = price_input.loc["Tier 3", "COST_USER"] tier4_cloud_cost = price_input.loc["Tier 4", "COST_USER"] estimated_MAU = int(price_input.loc["ESTIMATED MAU", "MAU"]) tiers_MAU = { "Tier 1": tier_1_MAU, "Tier 2": tier_2_MAU, "Tier 3": tier_3_MAU, "Tier 4": tier_4_MAU } tiers_price_per_user = { "Tier 1": price_input.loc["Tier 1", "PRICE_USER"],