Python Spread.sheet_to_df 예제들, gspread_pandas.Spread.sheet_to_df Python 예제들

예제 #1

0

파일 보기

파일: sheet.py 프로젝트: idev-a/miami-automation

class Sheet:

	def __init__(self):
		self.spread = Spread(SPREADSHEET_NAME)
		self.ccs= self.spread.sheet_to_df(sheet=CCS_SHEET, index=0)
		self.zu = self.spread.sheet_to_df(sheet=ZOOM_USERS, index=0)
		# self.cs = self.spread.sheet_to_df(sheet=CALENDAR_SCHEDULE)
		self.sr = self.spread.sheet_to_df(sheet=STUDENT_ROSTER, index=0)
		
		# set index in ccs sheet with Unique ID column to get updated.
		# self.ccs = self.ccs.set_index(self.ccs['Unique ID'])

		# set index in sr sheet with ID_NUMBER
		# self.sr = self.sr.set_index(self.sr['COURSE', 'ADDRESS'])

	def update_calendar_schedule(self):
		for cn, cs, desc, loc, teacher, index in zip(
			self.ccs['Course Number'],
			self.ccs['Course Section'],
			self.ccs['Description'],
			self.ccs['Site'],
			self.ccs['Instructor 1'],
			self.ccs['Unique ID']
		):
			class_name = f"Q4-{cn}-{cs}-{desc}"
			location = loc[4:]
			quarter = '2020.Q4'
			slack_channel_name = f"Q4"
			host = '' # from zoom users sheet
			notes = ''

	def update_sheet_from_df(self, df, sheet_name):
		logger.info(f'--- Update {sheet_name} Sheet from df')
		self.spread.df_to_sheet(df, index=False, sheet=sheet_name)

	def update_ccs_sheet_from_df(self, df):
		self.update_sheet_from_df(df, CCS_SHEET)

	# deprecated
	def init_sheet_with_service_key(self):
		self.credentials = service_account.Credentials.from_service_account_file(
			'./creds/google_secret.json',
			scopes=SPREADSHEETS_SCOPES,
			subject=GSUITE_USER_EMAIL)

		self.sheet_service = build('sheets', 'v4', credentials=self.credentials, cache_discovery=False)
		self.sheet = self.sheet_service.spreadsheets()

	# deprecated
	def read_sheets(self, range):
		result = self.sheet.values().get(
			spreadsheetId=SPREADSHEET_ID,
			range=range).execute()
		values = result.get('values', [])

예제 #2

0

파일 보기

파일: ss2database.py 프로젝트: akrherz/datateam

def workflow(sheetid, tablename):
    """Process"""
    cursor = pgconn.cursor()
    cursor.execute(f"DROP TABLE IF EXISTS {tablename}")
    spread = Spread(sheetid, config=config["td"]["service_account"])
    df = spread.sheet_to_df(index=None)
    cols = [cleaner(c) for c in df.columns]
    cursor.execute(f"CREATE TABLE {tablename} (ss_order int, %s)" %
                   (",".join([' "%s" varchar' % (s, ) for s in cols]), ))
    cursor.execute(f"GRANT SELECT on {tablename} to nobody,apache")
    for i, row in enumerate(df.itertuples()):
        vals = []
        for col in row[1:]:
            vals.append(col)
        sql = """
        INSERT into %s (ss_order, %s) VALUES (%s, %s)
        """ % (
            tablename,
            ",".join(['"%s"' % (s, ) for s in cols]),
            i,
            ",".join(["%s"] * len(cols)),
        )
        cursor.execute(sql, vals)
    cursor.close()
    pgconn.commit()

예제 #3

0

파일 보기

파일: AirBNBScraper_v1.py 프로젝트: agmo1993/AIRBNBScraping

 def sendMessagetoHost(self):
     source = Spread("https://docs.google.com/spreadsheets/d/15_YZB-LVAa2NkHORZ8nvBq25YBj4R7ZUHjY11HRl0R4/edit#gid=763524256")
     self.driver.get("https://www.airbnb.com.au")
     sign_in = input("Please sign in!")
     data = source.sheet_to_df()
     hosts = data["url"].values
     count = 0
     for i in hosts:
         self.driver.get(i)
         time.sleep(25)
         soup = BeautifulSoup(self.driver.page_source,'lxml')
         host_id = soup.findAll("a", {"class": "_16lonkd"})
         host_id = host_id[0]
         host_id = host_id["href"]
         host_id = re.findall('\d+',host_id)[0]
         test_message = input("Please enter message you would like to send: ")
         button = self.driver.find_element_by_link_text("Contact host")
         button.click()
         time.sleep(10)
         text_area = self.driver.find_elements_by_id("homes-contact-host--message-textarea")
         text_area[0].send_keys(test_message)
         button = self.driver.find_element_by_class_name("_72kmbi0")
         button.click()
         count+=1
         if count > 10:
             break

예제 #4

0

파일 보기

파일: sheet2database.py 프로젝트: akrherz/datateam

def do(spreadkey, tablename):
    """Process"""
    cursor = pgconn.cursor()
    cursor.execute(f"DROP TABLE IF EXISTS {tablename}")
    spread = Spread(spreadkey, config=config["cscap"]["service_account"])
    df = spread.sheet_to_df(index=None)
    sql = f"CREATE TABLE {tablename} ("
    for col in df.columns:
        sql += "%s varchar," % (cleankey(col), )
    sql = sql[:-1] + ")"
    cursor.execute(sql)
    cursor.execute(f"GRANT SELECT on {tablename} to nobody,apache")
    for _, row in df.iterrows():
        cols = []
        values = []
        for key, val in row.items():
            cols.append(cleankey(key))
            values.append((val or "").strip())
        sql = "INSERT into %s (%s) VALUES (%s)" % (
            tablename,
            ",".join(cols),
            ",".join(["%s"] * len(cols)),
        )
        cursor.execute(sql, values)
    cursor.close()
    pgconn.commit()

예제 #5

0

파일 보기

파일: MacBookCPH.py 프로젝트: sinaden/PythonScraper2

def importTo_clean_sheet():
    from gspread_pandas import Spread, Client
    import gspread_pandas as gp
    import datetime
    import numpy as np
    import pandas as pd
    import re
    import importlib
    #import cleaner
    #cleaner = importlib.reload(cleaner)
    #from cleaner import cleaning_and_to_sql

    pd.set_option('display.max_rows', 20)
    pd.set_option('display.max_columns', 50)
    s = Spread('work', 'Python Autoscrapers')
    df1 = s.sheet_to_df(index=0,
                        start_row=1,
                        header_rows=1,
                        sheet="Macbooks-CPH").astype(str)

    import time
    from datetime import datetime

    from datetime import timedelta

    dups_ids = df1.pivot_table(index=['DBA ID'], aggfunc='size')
    type(dups_ids)
    di = dups_ids.to_frame()
    di.head()

    di.columns = ['days active']
    di.reset_index(inplace=True)

    df1 = pd.merge(df1, di, on='DBA ID')
    df1 = df1.drop_duplicates(subset='DBA ID', keep='first')

    filter = df1["Date Scraped"] != " "
    df1 = df1[filter]

    df1['state'] = "Not sold yet"

    #datetime_object = datetime.strptime('Jun 1 2005  1:33PM', '%b %d %Y %I:%M%p')
    for stdat, ndays, idx in zip(df1['Date Scraped'], df1['days active'],
                                 df1.index):
        do = datetime.strptime(stdat, '%d/%m/%Y')
        ndays -= 1
        do = do + timedelta(days=ndays)
        tod = datetime.strptime(today.strftime("%d/%m/%Y"), '%d/%m/%Y')

        if do < tod:
            df1.state[idx] = 'Sold'
    # print(do.strftime("%d/%m/%Y"), "   ", stdat, " today", today.strftime("%d/%m/%Y"))

    s = Spread('work', 'Python Autoscrapers')
    s.df_to_sheet(df1,
                  index=False,
                  sheet='Macbooks-CPH/Clean',
                  start='A2',
                  replace=True)

예제 #6

0

파일 보기

def main():
    print('loading source files...')
    main_doc = Spread(spread=DOCID)
    datapoints = main_doc.sheet_to_df(sheet='datapoints', index=None)
    topics = main_doc.sheet_to_df(sheet='topics', index=None)
    concepts = main_doc.sheet_to_df(sheet='concepts', index=None)
    datapoint_docs = dict()   # we can reuse the spreadsheet object to download multiple sheets
    downloaded = set()
    env = {
        'downloaded': downloaded,
        'datapoint_docs': datapoint_docs
    }
    print('saving datapoints into etl/source/datapoints...')
    for _, row in datapoints.iterrows():
        process(row, env)

    datapoints.to_csv(osp.join(SOURCE_DIR, 'datapoints.csv'), index=False)
    topics.to_csv(osp.join(SOURCE_DIR, 'topics.csv'), index=False)
    concepts.to_csv(osp.join(SOURCE_DIR, 'concepts.csv'), index=False)

예제 #7

0

파일 보기

def zd_api(request):
    config = conf.get_config("./gspread_pandas/")

    articles = []
    url = "https://collegetrack.zendesk.com//api/v2/help_center/categories/360000085163/articles.json"

    # also not used, but if we wanted to add in users the api URL is: /api/v2/users.json

    while url:
        response = requests.get(url, auth=(ZD_USERNAME, ZD_PASSWORD))
        if response.status_code != 200:
            print("Status:", response.status_code, "Problem with the request. Exiting.")
            exit()
        data = json.loads(response.text)
        for article in data["articles"]:
            articles.append(
                [
                    article["id"],
                    article["title"],
                    article["html_url"],
                    article["updated_at"],
                ]
            )
        url = data["next_page"]

    # converting the date column into a usable form
    # to_write_updated = [datetime.date(parse(i[3])) for i in to_write]

    df = pd.DataFrame(articles)

    df.columns = ["ID", "Title", "URL", "Date"]
    df.Date = pd.to_datetime(df.Date).dt.date

    df = df.sort_values(by="Date", ascending=True)
    df.loc[:, "ID"] = df["ID"].astype("int").astype("str")
    # df = df.set_index("ID")

    # articles.sort(key = lambda item: item[3], reverse=False)

    # articles.sort(key = lambda item: item[3], reverse=True)

    spread = Spread(SPREADSHEET_ID, config=config)
    spread.open_sheet(0)
    existing_sheet = spread.sheet_to_df(sheet="Sheet1")
    existing_sheet_subset = existing_sheet[
        ["Internal / Staff Facing", "Person Responsible"]
    ].reset_index()

    merged_df = df.merge(existing_sheet_subset, on="ID", how="left")

    spread.df_to_sheet(
        merged_df, index=False, sheet="Sheet1", start="A1", replace=False
    )

예제 #8

0

파일 보기

파일: sheet_data_parser.py 프로젝트: kalyaniuniversity/COVID-19-Datasets

 def pull_data_sheet(self, config_auth):
     spread_dict = self.__conf__["sheet_url"]
     spread_obj = {}
     sheet_dict = {}
     for j in spread_dict:
         spread = Spread(spread=spread_dict[j],
                         sheet="Sheet1",
                         config=config_auth)
         data_frame = spread.sheet_to_df()
         sheet_dict[j] = data_frame.reset_index()
         spread_obj[j] = spread
     return sheet_dict, spread_obj

예제 #9

0

파일 보기

class GoogleSheets(object):
    def __init__(self, file_name):
        self.file_name = file_name
        """Authenication from oauth token from "My First Project", project in google developer console - owned by 
           [email protected] user"""
        self.spread = Spread(('1096395197574-0lbfvsdbd5crh7hnlgmm4asll1u3f0g5'
                              '.apps.googleusercontent.com'), self.file_name)

    def to_csv(self, sheet_name, output_file_path):
        df = self.spread.sheet_to_df(sheet=sheet_name, index=0)

        df.to_csv(output_file_path, index=False)

    def csv_to_sheet(self, csv_path, start_tuple, output_sheet_name):
        df = pd.read_csv(csv_path)

        self.spread.df_to_sheet(df,
                                start=start_tuple,
                                sheet=output_sheet_name,
                                index=False)

    def clear_sheet(self, sheet_name):
        # self.spread.open_sheet(sheet=sheet_name)
        self.spread.clear_sheet(sheet=sheet_name, rows=0, cols=0)

    def df_to_sheet(self, df, sheet_name):
        self.spread.df_to_sheet(df=df,
                                sheet=sheet_name,
                                index=False,
                                headers=True,
                                replace=True,
                                freeze_headers=True)

    def sheet_to_df(self, sheet_name, index=1, header_rows=1, start_row=1):
        return self.spread.sheet_to_df(sheet=sheet_name,
                                       index=index,
                                       header_rows=header_rows,
                                       start_row=start_row)

예제 #10

0

파일 보기

파일: PandasGoogleSpreadsheetWrapper.py 프로젝트: jakenjarvis/Lakshmi

class PandasGoogleSpreadsheetWrapper(PandasWrapperBase):
    def __init__(self, credentialsManager, spreadsheetId, dataFrame=None):
        super().__init__(dataFrame)
        self.__credentialsManager = credentialsManager
        self.__id = spreadsheetId
        self.__spreadsheet = Spread(
            spread=self.__id, creds=self.__credentialsManager.credentials)

    @property
    def credentialsManager(self):
        return self.__credentialsManager

    @property
    def spreadsheetId(self):
        return self.__id

    @property
    def spreadsheet(self):
        return self.__spreadsheet

    @property
    def active_sheet(self):
        return self.__spreadsheet.sheet

    def load(self, filePath):
        # filePath = SheetName
        print("Load GoogleSpreadsheet: " + str(self.__id) + " [" +
              str(filePath) + "]")
        self.onLoadPreprocessing(self.df)
        df = self.__spreadsheet.sheet_to_df(index=False, sheet=filePath)
        self.setDataFrame(df)
        print("  Loaded Length: " + str(len(self.df.index)))
        self.onLoadPostprocessing(self.df)
        return self

    def save(self, filePath):
        # filePath = SheetName
        print("Save GoogleSpreadsheet: " + str(self.__id) + " [" +
              str(filePath) + "]")
        self.onSavePreprocessing(self.df)
        self.__spreadsheet.open_sheet(filePath, create=True)
        self.__spreadsheet.df_to_sheet(df=self.df,
                                       index=False,
                                       headers=True,
                                       start='A1',
                                       replace=True)
        print("  Saved Length : " + str(len(self.df.index)))
        self.onSavePostprocessing(self.df)
        return self

예제 #11

0

파일 보기

def get_events_sheet(only_top=True):
    spreadsheet_user = get_config_field('GSHEETS', 'user')
    s = Spread(user=spreadsheet_user,
               spread='Release & PR Events',
               sheet='Events',
               create_spread=False,
               create_sheet=False)
    events = s.sheet_to_df()
    events.index = pd.to_datetime(events.index)
    events = events.reset_index().reset_index().set_index('date')
    events['top'] = events['top'] == 'TRUE'
    if only_top:
        events = events[events['top']]

    return events

예제 #12

0

파일 보기

파일: check_2012_on.py 프로젝트: akrherz/datateam

def main():
    """Go Main Go."""
    config = util.get_config()

    drive = util.get_driveclient(config)

    res = drive.files().list(q="title contains 'Agronomic Data'").execute()

    for item in res["items"]:
        if item["mimeType"] != "application/vnd.google-apps.spreadsheet":
            continue
        LOG.debug(item["title"])
        spread = Spread(item["id"], config=config["cscap"]["service_account"])
        for sheet in spread.sheets:
            df = spread.sheet_to_df(index=None, sheet=sheet)
            LOG.debug("%s %s", sheet.title, len(df.index))

예제 #13

0

파일 보기

파일: gotta_catch_em_all.py 프로젝트: asistensi-matematika/pbo

def get_sheet(SHEET, CSV_SPREADSHEET='Asistensi PBO Jumat 2019'):
    spread = Spread(CSV_SPREADSHEET)
    spread.open_sheet(SHEET)
    df = spread.sheet_to_df()
    df['Nilai'] = pd.to_numeric(df['Nilai'], errors='coerce')
    links = []
    for link in tqdm(df.REPO, desc='issue'):
        try:
            links.append(get_issues(link))
        except:
            links.append(None)
    print(links)
    df['Issues'] = links
    spread.update_cells('H2', 'H28', links)
    df.drop(['Catatan', 'REPO'], axis=1, inplace=True)
    return df

예제 #14

0

파일 보기

def get_raw_data_sheet_to_df(spreadsheet, client, cols_to_filter):
    spread = Spread(spread=spreadsheet)

    meta = spread.__dict__.get('_spread_metadata')
    project_id = meta.get('spreadsheetId')
    project_name = meta.get('properties').get('title')

    sheets = spread.sheets
    raw = [
        x for x in sheets
        if 'rawdata' in x.__dict__['_properties']['title'].replace(' ', '').
        replace('.', '').lower() and 'pivot' not in x.__dict__['_properties']
        ['title'].replace(' ', '').replace('.', '').lower()
    ]

    raw.sort(key=lambda x: len(x.__dict__['_properties']['title'].replace(
        ' ', '').replace('.', '')),
             reverse=False)
    df = spread.sheet_to_df(sheet=raw[0], index=0)

    # Check for column names:
    df_cols = list(map(lambda x: x.lower(), list(df.columns)))
    cols_to_filter = list(map(lambda x: x.lower(), cols_to_filter))
    missing_cols = []
    for col in cols_to_filter:
        if col not in df_cols:
            missing_cols.append(col)
    if len(missing_cols) > 0:

        print(f"Project {project_name} is missing (id: {project_id}) " +
              ", ".join(missing_cols))
        for missing_col in missing_cols:
            df[missing_col] = ''

    cols = list(set(cols_to_filter).intersection(df.columns))
    lower_cols = [x.replace(' ', '_').lower() for x in cols]
    if len(cols) == 0:
        return pd.DataFrame(columns=cols_to_filter)
    _df = df.loc[:, cols].copy()
    _df = _df.loc[:, ~_df.T.duplicated(keep='first')]
    _df.columns = map(lambda x: x.replace(' ', '_').lower(), _df.columns)
    _df['date'] = pd.to_datetime(_df['date'], errors='coerce')
    for col in ['revenue', 'cost', 'profit']:
        _df[col] = pd.to_numeric(_df[col].astype('str').str.replace(',', ''),
                                 errors='coerce').fillna(0)
    return _df

예제 #15

0

파일 보기

파일: samplesheet-check.py 프로젝트: Tefnet/infrastructure

def import_library_sheet_validation_from_google():
    global validation_df
    spread = Spread(lab_spreadsheet_id)
    validation_df = spread.sheet_to_df(sheet='Validation',
                                       index=0,
                                       header_rows=1,
                                       start_row=1)
    hit = validation_df.iloc[0]
    logger.debug(f"First record of validation data: {hit}")
    for column_name in metadata_validation_column_names:
        logger.debug(f"Checking for column name {column_name}...")
        if column_name not in hit:
            logger.error(
                f"Could not find column {column_name}. The file is not structured as expected! Aborting."
            )
            exit(-1)
    logger.info(f"Loaded library tracking sheet validation data.")

예제 #16

0

파일 보기

    def on_epoch_end(self, epoch, logs=None):
        spread = Spread('*****@*****.**',
                        'https://docs.google.com/spreadsheets/d/1CksJQdyochF1M6RB4XfFgewjYGak38ixOhoAMySWTM8/edit')
        df_results = spread.sheet_to_df(sheet='Quantitative')
        df_results = df_results.reset_index()

        df_results = df_results.append({'Dataset': self.dataset,
                                        'Machine': self.machine,
                                        'Training time': time() - self._start_time,
                                        'Epochs': epoch,
                                        'Loss': logs.get('loss'),
                                        'Accuracy': logs.get('acc'),
                                        'Validation loss': logs.get('val_loss'),
                                        'Validation accuracy': logs.get('val_acc'),
                                        'Weights file': self.file_weights.format(epoch=epoch, **logs),
                                        'Additional parameters': self.additional_parameters}, ignore_index=True)
        spread.df_to_sheet(df_results, sheet='Quantitative', index=False, replace=True)

예제 #17

0

파일 보기

def import_library_sheet_from_google(year):
    global library_tracking_spreadsheet_df
    spread = Spread(lab_spreadsheet_id)
    library_tracking_spreadsheet_df = spread.sheet_to_df(sheet='2019',
                                                         index=0,
                                                         header_rows=1,
                                                         start_row=1)
    hit = library_tracking_spreadsheet_df.iloc[0]
    logger.debug(f"First record: {hit}")
    for column_name in column_names:
        logger.debug(f"Checking for column name {column_name}...")
        if column_name not in hit:
            logger.error(
                f"Could not find column {column_name}. The file is not structured as expected! Aborting."
            )
            exit(-1)
    logger.info(
        f"Loaded {len(library_tracking_spreadsheet_df.index)} records from library tracking sheet."
    )

예제 #18

0

파일 보기

def read_sheet(doc: Spread, sheet_name):
    df = doc.sheet_to_df(sheet=sheet_name, index=None)
    df = df.dropna(how='all')  # remove empty rows
    # detect error in sheet
    if df.empty:
        raise EmptySheet(f"{sheet_name} is empty")
    elif df.shape[0] == 1 and df.iloc[0, 0] in ['#N/A', '#VALUE!', 0]:
        msg = f"{sheet_name} contains all NA values"
        raise EmptyColumn(f"{sheet_name} contains all NA values")
    elif len(df['geo'].unique()) == 1 and 'world' not in df['geo'].values:
        msg = f"{sheet_name}, geo column contains NA values"
        raise EmptyColumn(msg)
    else:
        for c in df.columns:
            if df[c].hasnans or '[Invalid]' in df[c].values:
                msg = f'{sheet_name}, column {c} has NA values'
                raise EmptyCell(msg)
    print(df.head())
    return df

예제 #19

0

파일 보기

파일: callou.py 프로젝트: tian2992/afinidata-content-manager

django.setup()

import posts.models
from posts.models import Post, Taxonomy

AERA = dict((v, k) for (k, v) in posts.models.AREA)
AERABUS = dict((v, k) for (k, v) in posts.models.SUBAREA)
TNENOPMOC = dict((v, k) for (k, v) in posts.models.COMPONENTS)

import gspread_pandas
from gspread_pandas import Spread

s = Spread(
    "https://docs.google.com/spreadsheets/d/1EfS2W11kRg2C1H6KD2vVjR_Htvy6j7C4SSGHPA8Bvpc/edit?ts=5d22a555",
    sheet=0)
df = s.sheet_to_df()


def process_row(pid, row):
    try:
        post = Post.objects.get(id=pid)
        a = AERA[row['ÁREA'].strip()]
        sub = AERABUS[row['SUB-ÁREA'].strip()]
        c = TNENOPMOC[row['COMPONENTE'].strip()]
        tax = Taxonomy(post=post, area=a, subarea=sub, component=c)
        tax.save()
        return tax
    except Exception as e:
        print("============")
        print(e)
        print("ERROR {} - {}".format(pid, row))

예제 #20

0

파일 보기

파일: main.py 프로젝트: 12458/SafeEating

from __future__ import print_function
import pprint
import pandas as pd
from gspread_pandas import Spread, Client, conf


conf.get_config("auth_secret/google_secret.json")

# Print nicely
pp = pprint.PrettyPrinter()

# Find a workbook by name and open the first sheet
# Make sure you use the right name here.
id_sheetname = "20SH07 Food ID Map"
spread = Spread(id_sheetname)
id_df = spread.sheet_to_df()

column_map = {
    "ID": 1,
    "Exact_Name": 2,
    "Recess": 3,
    "Lunch": 4,
    "Price": 5,
    "Name": 6,
    "MON/TUE/WED/TURS/FRI": 7,
    "Stall": 8
}

stall_map = {
    "Stall 1  Fried Hokkien Mee": 1,
    "Stall 2 Chicken Rice": 2,

예제 #21

0

파일 보기

파일: etl.py 프로젝트: cayasanchez/ddf--gapminder--fasttrack

def main():
    print('loading source files...')
    main_doc = Spread(spread=DOCID)
    concepts = main_doc.sheet_to_df(sheet='concepts', index=None)
    datapoints = main_doc.sheet_to_df(sheet='datapoints', index=None)
    tags = main_doc.sheet_to_df(sheet='topics', index=None)

    # construct a dictionary, keys are docids, values are dictionaries which
    # keys are sheet names and values are the csv links for the docid/sheet name pair.
    csv_link_dict = get_csv_link_dict(datapoints.csv_link.values)

    # create a dictionary that has same layout of `csv_link_dict` but the values are
    # dataframes, instead of links
    csv_dict = csv_link_dict.copy()
    for docid, di in csv_link_dict.items():
        print(f"Downloading sheets from file: {docid}")
        csv_dict[docid] = dict()
        doc = Spread(spread=docid)
        for sheet_name, link in di.items():
            print(f"sheet: {sheet_name}")
            df = doc.sheet_to_df(sheet=sheet_name, index=None)
            if df.empty:
                print(
                    f"WARNING: empty dataframe: doc: {docid}, sheet_name: {sheet_name}"
                )
            csv_dict[docid][sheet_name] = df

    print('creating ddf datasets...')

    # entities
    entities_columns = set(
    )  # mark down the columns, use to create concept table later
    for e in [
            'country', 'global', 'world_4region', 'g77_and_oecd_countries',
            'income_groups', 'landlocked', 'main_religion_2008',
            'world_6region', 'unicef_region', 'income_3groups', 'un_sdg_ldc',
            'un_sdg_region'
    ]:
        edf = pd.read_csv(
            f'../source/ddf--open_numbers/ddf--entities--geo--{e}.csv',
            na_filter=False,
            dtype=str)
        edf.to_csv(f'../../ddf--entities--geo--{e}.csv',
                   index=False,
                   encoding='utf8')
        for c in edf.columns:
            entities_columns.add(c)

    # tags entities
    tags = tags.rename(columns={
        'topic_id': 'tag',
        'topic_name': 'name',
        'parent_topic': 'parent'
    })
    tags.to_csv('../../ddf--entities--tag.csv', index=False, encoding='utf8')
    for c in tags.columns:
        entities_columns.add(c)

    # concepts
    cdf = serve_concepts(concepts, entities_columns)

    # datapoints
    serve_datapoints(datapoints, cdf, csv_dict)

예제 #22

0

파일 보기

파일: profitloss.py 프로젝트: HaroldP64393329/python-snippets

def main():

    url = 'https://www.flashscore.com/football/england/premier-league/results/'
    lib = 'lxml'
    attr = 'sportName soccer'
    path = '/var/www/html/diag/pypp.png'

    view_source = gel().run_until_complete(pypp(url, lib, attr, path, True))
    rows = view_source.select('.event__match')

    results = []

    for row in rows[0]:
        print(row)

    for row in rows:
        try:
            unique_id = str(row['id']).replace('g_1_', '')
            datetime = row.select_one('.event__time').get_text()
            timestamp = isodate(str(datetime).split('.'))
            home_team = row.select_one('.event__participant--home').get_text()
            away_team = row.select_one('.event__participant--away').get_text()
            home_goal = strip_html(row.select('span')[0])
            away_goal = strip_html(row.select('span')[1])
        except Exception:
            print("Exception")
        else:
            results.append([
                unique_id, timestamp, home_team, away_team, home_goal,
                away_goal
            ])

    print(results[0])

    file_dir = ''
    file_json = ''
    scope = [
        'https://spreadsheets.google.com/feeds',
        'https://www.googleapis.com/auth/drive'
    ]
    config = conf.get_config(conf_dir=file_dir, file_name=file_json)

    worksheet = Spread('Profit & Loss', sheet='Football', config=config)
    df_worksheet = worksheet.sheet_to_df(index=0, header_rows=2)
    df_worksheet['Home'] = df_worksheet['Home'].apply(team_transform)
    df_worksheet['Away'] = df_worksheet['Away'].apply(team_transform)

    for result in results:
        df_worksheet.loc[(df_worksheet.Home == result[2]) &
                         (df_worksheet.Away == result[3]) &
                         (df_worksheet.Date_Time == result[1]),
                         ['HG', 'AG']] = result[4], result[5]

    print(df_worksheet)

    worksheet.df_to_sheet(df_worksheet,
                          index=False,
                          headers=False,
                          sheet='Football',
                          start='A3',
                          replace=False,
                          raw_column_names=['Date_Time'])

    return

예제 #23

0

파일 보기

파일: Roster.py 프로젝트: SantiagoCuratPeya/Planning

    if i['City'] in cities_asignadas:
        val = 'Si'
    else:
        val = 'No'
    return val


# Credenciales
cred = conf.get_config('C:\\Users\\micaela.fuchs\\Anaconda',
                       'PedidosYa-6e661fd93faf.json')

# Roster CABA
sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w'
wks_name = 'Roster CABA'
sheet = Spread(sheet_id, wks_name, config=cred)
reporte_caba = sheet.sheet_to_df(index=0, header_rows=1)

# Roster Resto
sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w'
wks_name = 'Roster Resto'
sheet = Spread(sheet_id, wks_name, config=cred)
reporte_resto = sheet.sheet_to_df(index=0, header_rows=1)

# Roster Ciudades Asigandas
sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w'
wks_name = 'Ciudades Asignadas'
sheet = Spread(sheet_id, wks_name, config=cred)
cities_asignadas = sheet.sheet_to_df(index=0, header_rows=1)

# Creo lista de Cities Asignadas
cities_asignadas = cities_asignadas[cities_asignadas.columns[0]].to_list()

예제 #24

0

파일 보기

파일: process_covId_questions.py 프로젝트: College-Track/fy21_hs_survey

load_dotenv()


config = conf.get_config("./gspread_pandas/")
hs_spread = Spread("1u7fLc0Dlg1zhWfn8xVgedmYyedr0Z1D1NfZ6uSov5tc", config=config)

hs_spread_spanish = Spread(
    "1jvY7a9yncWMI44uFf8HMypHuKhoHNkOEQ6Fr9Rqza94", config=config
)

student_List = SF_SOQL("fy21_hs_survey_student_list", soql.student_list_query)
student_List.load_from_sf_soql(sf)
student_List.shorten_site_names("A_SITE__c")


english_df = hs_spread.sheet_to_df().reset_index()

english_df = english_df[
    [
        "Student: Contact Id",
        "If/When College Track is able to have students physically return to the site, are you interested in doing so?",
        "I will feel comfortable returning to the site for in-person programming if/when (check all that apply): ",
        "Other",
    ]
]

merged_df = english_df.merge(
    student_List.df, left_on="Student: Contact Id", right_on="C_Id", how="left"
)

예제 #25

0

파일 보기

credentials = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
    with open('token.pickle', 'rb') as token:
        credentials = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not credentials or not credentials.valid:
    if credentials and credentials.expired and credentials.refresh_token:
        credentials.refresh(Request())
    else:
        flow = InstalledAppFlow.from_client_secrets_file(
            'credentials.json', SCOPES)
        credentials = flow.run_local_server(port=0)
    # Save the credentials for the next run
    with open('token.pickle', 'wb') as token:
        pickle.dump(credentials, token)

spread = Spread(spread=spread_name,
                sheet=sheet_name,
                scope=SCOPES,
                creds=credentials)

# Reading Spreadsheet into Pandas DataFrame
df = spread.sheet_to_df(index=0)
print(isinstance(df, pd.DataFrame))

# Writing Pandas DataFrame into Google Spreadsheet
spread.df_to_sheet(df, index=False, sheet=sheet_name, start='A1', replace=True)

예제 #26

0

파일 보기

파일: google_sheets.py 프로젝트: WildflowerSchools/wf-core-data-python

def ingest_student_data_google_sheet(sheet_id, pull_date):
    logger.info('Ingesting data from sheet with pull date {} and ID {}'.format(
        pull_date, sheet_id))
    spread = Spread(sheet_id)
    df = spread.sheet_to_df(index=None)
    df['school_id_tc'] = pd.to_numeric(df['school_id']).astype('Int64')
    df['child_raw_dict'] = df['child_raw'].apply(lambda x: json.loads(x))
    df['student_id_tc'] = pd.to_numeric(
        df['child_raw_dict'].apply(lambda x: int(x.get('id')))).astype('Int64')
    df['pull_datetime'] = pd.to_datetime(pull_date, utc=True)
    df['student_first_name_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('first_name')).astype('string')
    df['student_middle_name_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('middle_name')).astype('string')
    df['student_last_name_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('last_name')).astype('string')
    df['student_birth_date_tc'] = df['child_raw_dict'].apply(
        lambda x: to_date(x.get('birth_date')))
    df['student_gender_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('gender')).astype('string')
    df['student_ethnicity_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('ethnicity'))
    df['student_dominant_language_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('dominant_language')).astype('string')
    df['student_household_income_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('household_income')).astype('string')
    df['student_grade_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('grade')).astype('string')
    df['student_classroom_ids_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('classroom_ids'))
    df['student_program_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('program')).astype('string')
    df['student_hours_string_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('hours_string')).astype('string')
    df['student_id_alt_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('student_id')).astype('string')
    df['student_allergies_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('allergies')).astype('string')
    df['student_parent_ids_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('parent_ids'))
    df['student_approved_adults_string_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('approved_adults_string')).astype('string')
    df['student_emergency_contacts_string_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('emergency_contacts_string')).astype('string')
    df['student_notes_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('notes')).astype('string')
    df['student_last_day_tc'] = df['child_raw_dict'].apply(
        lambda x: to_date(x.get('last_day')))
    df['student_exit_reason_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('exit_reason')).astype('string')
    df['student_exit_survey_id_tc'] = pd.to_numeric(df['child_raw_dict'].apply(
        lambda x: x.get('exit_survey_id'))).astype('Int64')
    df['student_exit_notes_tc'] = df['child_raw_dict'].apply(
        lambda x: x.get('exit_notes')).astype('string')
    df = df.reindex(columns=[
        'school_id_tc', 'student_id_tc', 'pull_datetime',
        'student_first_name_tc', 'student_middle_name_tc',
        'student_last_name_tc', 'student_birth_date_tc', 'student_gender_tc',
        'student_ethnicity_tc', 'student_dominant_language_tc',
        'student_household_income_tc', 'student_grade_tc',
        'student_classroom_ids_tc', 'student_program_tc',
        'student_hours_string_tc', 'student_id_alt_tc', 'student_allergies_tc',
        'student_parent_ids_tc', 'student_approved_adults_string_tc',
        'student_emergency_contacts_string_tc', 'student_notes_tc',
        'student_last_day_tc', 'student_exit_reason_tc',
        'student_exit_survey_id_tc', 'student_exit_notes_tc'
    ])
    if df.duplicated(['school_id_tc', 'student_id_tc']).any():
        raise ValueError(
            'Ingested data contains duplicate Transparent Classroom school ID/student id combinations'
        )
    return df

예제 #27

0

파일 보기

def handler(incoming):

    ## Put code here

    # ## Get list of projects

    # In[15]:

    main_spread = Spread('1wZDpHfIqKBEhmS_F485kFKmrBUAbrfMqu2HW4NOY6BE')

    # In[16]:

    project_list = main_spread.sheet_to_df(index=0)

    # In[17]:

    project_list

    # In[18]:

    # create sqlalchemy engine
    engine = create_engine("mysql+pymysql://{user}:{pw}@{host}/{db}".format(
        user=user, pw=password, host=host, db=dbname))

    # In[19]:

    # open the database connection
    dbConnection = engine.connect()

    # In[20]:

    # loop through all the projects
    for index, row in project_list.iterrows():

        project = row['dialogflow_project_id']
        spreadsheet_id = row['google_spreadsheet_id']

        # Get the project data we need
        df = pd.read_sql(
            f"SELECT `identifier`, `item_key`, `item_value` FROM `data_pieces` WHERE `project` = '{project}'",
            dbConnection)
        columns_df = pd.read_sql(
            f"SELECT * FROM `column_tracker` WHERE `project` = '{project}'",
            dbConnection)
        text_df = pd.read_sql(
            f"SELECT * FROM `text_log` WHERE `project` = '{project}'",
            dbConnection)
        first_contact_df = pd.read_sql(
            f"SELECT * FROM `first_contact` WHERE `project` = '{project}'",
            dbConnection)

        # pivot all the data pieces so columns along the top, identfiers as rows
        pivoted = df.pivot(index='identifier',
                           columns='item_key',
                           values=['item_value'])

        # flatten the column headers
        pivoted.columns = pivoted.columns.get_level_values(1)
        pivoted.reset_index(inplace=True)

        # sort the columns by order they were created
        columns_df.sort_values(by=['created_at'], inplace=True)

        # recorder the columns of the pivoted table
        ordered_columns = columns_df['col'].tolist()
        ordered_columns.insert(0, "identifier")
        pivoted_ordered = pivoted[ordered_columns]

        # on the text_df (or log) table, do a "group by" by identifier
        # and also concatinate the items in the group
        text_concat = text_df.groupby(['identifier'])['raw_text'].apply(
            ' | '.join).reset_index()

        # add the "raw_text" column to the table so far
        merge1 = pd.merge(pivoted_ordered, text_concat, on="identifier")

        # sort the list of first-contacts, and merge into the final table
        first_contact_df.sort_values(by=['created_at'], inplace=True)
        final_table = pd.merge(first_contact_df, merge1, on="identifier")

        # drop the project columns from the final_table for the spreadsheet
        final_table.drop(columns=['proj_ident_hash', 'project'], inplace=True)

        # establish the project spreadsheet and update the first sheet
        spread = Spread(spreadsheet_id)
        spread.df_to_sheet(final_table,
                           index=False,
                           sheet='Sheet1',
                           start='A1',
                           replace=True)

        print(f"Spreadsheet for {row['friendly_name']} updated.")

    # In[21]:

    # close the database
    dbConnection.close()

    # In[22]:

    print("All updates complete.")

    return True

예제 #28

0

파일 보기

def the_work(creds_path, backup_folder, idph_csv_folder, geo_folder, chrome_options, chrome_path='chromedriver', **kwargs):
    # Dates
    the_date = datetime.now(timezone('US/Central')).strftime("%m-%d")
    the_date_year_yday = datetime.strftime(datetime.now(timezone('US/Central')) - timedelta(1), '%m-%d-%y')
    the_date_year = datetime.now(timezone('US/Central')).strftime("%m-%d-%y")
    the_date_YEAR = datetime.now(timezone('US/Central')).strftime("%m-%d-%Y")
    the_date_n_time = datetime.now(timezone('US/Central')).strftime("%m-%d-%H%M")
    the_time = datetime.now(timezone('US/Central')).strftime("%H:%M")

    print("Starting scraper...{} {}".format(the_date_year, the_time))

    credentials = ServiceAccountCredentials.from_json_keyfile_name(creds_path)
    # If you copy this, make sure the file you are opening is accessible to your service account
    # Ie. Give Sharing/Edit access to ExProc ([email protected])
    totals_spread, zip_spread, county_spread = None, None, None
    while not (zip_spread and county_spread and totals_spread):
        try:
            if not zip_spread:
                zip_spread = Spread(spread=gsheet_zip_link, creds=credentials)
            if not county_spread:
                county_spread = Spread(spread=gsheet_county_link, creds=credentials)
            if not totals_spread:
                totals_spread = Spread(spread=gsheet_totals_link, creds=credentials)
        except gspread.exceptions.APIError as e:
            print(e)
            print("gspread API Error occurred")
            time.sleep(5)
            continue
        except requests.exceptions.ReadTimeout:
            print("requests ReadTimeout occurred")
            time.sleep(5)
            continue

    driver = webdriver.Chrome(executable_path=chrome_path,
                              options=chrome_options)  # Chrome options are passed into function
    driver.implicitly_wait(300)

    # %%
    # Webdriver Work - First for Zips
    driver.get(idph_link)
    time.sleep(10)

    # Collecting totals
    daily_totals = {"tests_pos": int(driver.find_element_by_id("covid19positive").text.replace(",", "")),
                    "deaths": int(driver.find_element_by_id("covid19deaths").text.replace(",", "")),
                    "total_tests": int(driver.find_element_by_id("covid19totaltest").text.replace(",", ""))#,
                    #"recovered": int(driver.find_element_by_id("covid19recovered").text.replace(",", ""))
                    }

    by_county_button = driver.find_element_by_link_text("By County")
    by_zip_button = driver.find_element_by_link_text("By Zip")

    by_zip_button.click()
    time.sleep(2)
    all_zip_data_button = driver.find_element_by_link_text("All")
    all_zip_data_button.click()
    time.sleep(5)  # Excessive, I know

    zip_soup = bs4.BeautifulSoup(driver.page_source, 'lxml')
    zip_table_soup = zip_soup.find("table", class_="padded-table", id="detailedData")

    by_county_button.click()
    time.sleep(2)
    all_county_data_button = driver.find_element_by_link_text("All")
    all_county_data_button.click()
    time.sleep(5)  # Excessive, I know

    county_soup = bs4.BeautifulSoup(driver.page_source, 'lxml')
    county_table_soup = county_soup.find("table", class_="padded-table", id="detailedData")

    # Todo: Make sure to back this up as well
    with open(backup_folder / ("backup_soup_zip_%s.txt" % the_date_n_time), "w") as backup_zip_soup:
        backup_zip_soup.writelines(str(zip_soup.encode("utf-8")))
    with open(backup_folder / ("backup_soup_county_%s.txt" % the_date_n_time), "w") as backup_county_soup:
        backup_county_soup.writelines(str(county_soup.encode("utf-8")))

    # %%
    zip_table_headers = zip_table_soup.find_all("th")
    zip_table_headers_arr = [x.get_text() for x in zip_table_headers]
    assert zip_table_headers_arr == ['Zip', 'Tested', 'Positive Cases', 'Deaths']

    county_table_headers = county_table_soup.find_all("th")
    county_table_headers_arr = [x.get_text() for x in county_table_headers]
    assert county_table_headers_arr == ['County', 'Tested', 'Positive Cases', 'Deaths']

    #print("Zip first:")
    #print(zip_table_soup.find_all("tr")[1:])
    #print()
    #print("County second:")
    #print(county_table_soup.find_all("tr")[1:])

    df_zip_today = pd.DataFrame(columns=zip_table_headers_arr)
    for tr in zip_table_soup.find_all('tr')[1:]:
        tds = tr.find_all('td')
        df_zip_today = df_zip_today.append(
            {'Zip': tds[0].text, 'Tested': tds[1].text, 'Positive Cases': tds[2].text, 'Deaths': tds[3].text},
            ignore_index=True)

    df_county_today = pd.DataFrame(columns=county_table_headers_arr)
    for tr in county_table_soup.find_all('tr')[1:]:
        tds = tr.find_all('td')
        df_county_today = df_county_today.append(
            {'County': tds[0].text, 'Tested': tds[1].text, 'Positive Cases': tds[2].text, 'Deaths': tds[3].text},
            ignore_index=True)

    df_zip_today.set_index("Zip", inplace=True)
    df_county_today.set_index("County", inplace=True)

    # %%
    # Totals -- Assertions, Comparisons, and Long form production
    df_totals_oldlong = totals_spread.sheet_to_df(index=0)
    assert daily_totals['tests_pos'] >= int(df_totals_oldlong.iloc[0]['tests_pos'])  # Was the number back on 4/18/2020
    assert daily_totals['deaths'] >= int(df_totals_oldlong.iloc[0]['deaths'])
    assert daily_totals['total_tests'] >= int(df_totals_oldlong.iloc[0]['total_tests'])
    #assert daily_totals['recovered'] >= int(df_totals_oldlong.iloc[0]['recovered'])
    print("Daily Totals Assertions passed.")

    totals_changed = False

    if (daily_totals['tests_pos'] != int(df_totals_oldlong.iloc[0]['tests_pos'])) or \
            (daily_totals['deaths'] != int(df_totals_oldlong.iloc[0]['deaths'])) or \
            (daily_totals['total_tests'] != int(df_totals_oldlong.iloc[0]['total_tests'])): # or \
            #(daily_totals['recovered'] != int(df_totals_oldlong.iloc[0]['recovered'])):
        totals_changed = True
        print("Totals were detected as changed.")

    if totals_changed:
        daily_totals['update_date'] = the_date_YEAR
        daily_totals['update_time'] = the_time
        daily_totals['tests_neg'] = daily_totals['total_tests'] - daily_totals['tests_pos']
        daily_totals['new_tests'] = daily_totals['total_tests'] - int(df_totals_oldlong.iloc[0]['total_tests'])
        df_totals_today = pd.DataFrame(daily_totals, index=[0])

        df_totals_newlong = df_totals_today.append(df_totals_oldlong)[
            ["update_date", "tests_pos", "deaths", "total_tests",
             #"recovered",
             "tests_neg", "new_tests"]].reset_index(
            drop=True)
        df_totals_newlong.to_csv(idph_csv_folder / ("Long Totals %s.csv" % the_date_n_time), index=False)
        totals_spread.df_to_sheet(df_totals_newlong, index=False, sheet="IL_long", start="A1", replace=True)

    # %% Assertions Zip - Does the table look as expected?
    assert df_zip_today.index[0] <= "60002"  # First zip code in Illinois
    assert int(df_zip_today.index[-1]) >= 62959  # This was the last zip code reported on 4/12/2020
    assert df_zip_today.shape[0] >= 353  # This was the size of the table on 4/12/2020
    assert df_zip_today.Deaths.str.contains("N/A").all()  # The Death column should be "N/A" for all zip codes.
    print("Zip Assertions passed.")

    # %% Assertions County - Does the table look as expected?
    assert df_county_today.index[0] == "Illinois"  # First "county" reported on their table
    assert df_county_today.index[-1] == "Woodford"  # This was the last county code reported on their table on 4/13/2020
    assert df_county_today.shape[0] >= 90  # This was the size of the table on 4/12/2020
    print("County Assertions passed.")

    # %% Basic Assertions passed -- Drop and rename things
    # Zip table first
    df_zip_today.drop(columns="Deaths", inplace=True)
    df_zip_today.rename(columns={"Positive Cases": 'Positive_Cases'}, inplace=True)

    # County next
    df_county_today.rename(columns={"Positive Cases": "Positive_Cases"}, inplace=True)

    # %%
    # Open the most recent Zip file for comparison
    zip_changed = False

    # Assume the sheet has already been updated for the day, try grabbing and comparing with that first.
    zip_spread_yday = Spread(spread=gsheet_zip_link, creds=credentials)
    df_zip_yday = zip_spread_yday.sheet_to_df(index=0, sheet=zip_spread.find_sheet(the_date_year))
    if "update_time" in df_zip_yday.columns:  # Oh, so it didn't find it for today, let's grab the one from yesterday
        df_zip_yday = zip_spread_yday.sheet_to_df(index=0, sheet=zip_spread.find_sheet(the_date_year_yday))

    # Now explicitly compare tables to see if they're different
    df_zip_yday.set_index("Zip", inplace=True)
    if df_zip_today.equals(df_zip_yday):
        print("Zip values have not yet changed from latest data.")
    else:
        print("Zip values have been detected as different.")
        zip_changed = True

    # %%
    # Open the most recent County file for comparison
    county_changed = False

    # Assume the sheet has already been updated for the day, try grabbing and comparing with that first.
    county_spread_yday = Spread(spread=gsheet_county_link, creds=credentials)
    df_county_yday = county_spread_yday.sheet_to_df(index=0, sheet=county_spread.find_sheet(the_date_year))
    if "update_time" in df_county_yday.columns:  # Oh, so it didn't find it for today, let's grab the one from yesterday
        df_county_yday = county_spread_yday.sheet_to_df(index=0, sheet=county_spread.find_sheet(the_date_year_yday))

    # Now explicitly compare tables to see if they're different
    df_county_yday.set_index("County", inplace=True)
    if df_county_today.equals(df_county_yday):
        print("County values have not yet changed from latest data.")
    else:
        print("County values have been detected as different.")
        county_changed = True

    # %% If they were different, let's save them and upload them to sheets
    print("\nUploading new sheets if changed.")
    if zip_changed:
        df_zip_today.to_csv(idph_csv_folder / ("IDPH Stats Zip %s.csv" % the_date_n_time), index=True)
        zip_spread.df_to_sheet(df_zip_today, index=True, sheet=the_date_year, start='A1', replace=True)
        print("\tNew Zip was uploaded to Sheets.")
    else:
        print("\tZip version was not uploaded.")

    if county_changed:
        df_county_today.to_csv(idph_csv_folder / ("IDPH Stats County %s.csv" % the_date_n_time), index=True)
        county_spread.df_to_sheet(df_county_today, index=True, sheet=the_date_year, start='A1', replace=True)
        print("\tNew County was uploaded to Sheets.")
    else:
        print("\tCounty version was not uploaded.")

    # %% Now deal with production of Long version
    print("\nProducing Zip and County long versions.")

    # Bring in mapping tools
    census_df = pd.read_csv(geo_folder / "Illinois_Census_200414_1816.csv")
    nofo_map_dict = dict(zip(census_df.CountyName, census_df.NOFO_Region))
    metro_map_dict = dict(zip(census_df.CountyName, census_df.Metro_area))

    if zip_changed:
        df_zip_oldlong = zip_spread.sheet_to_df(index=0, sheet=zip_spread.find_sheet("long"))
        df_zip_today['update_date'] = the_date_YEAR
        df_zip_today['update_time'] = the_time
        df_zip_today.reset_index(inplace=True)
        df_zip_newlong = df_zip_today.append(df_zip_oldlong)[
            ["update_date", "update_time", "Zip", "Positive_Cases", "Tested"]].reset_index(drop=True)
        df_zip_newlong.to_csv(idph_csv_folder / ("Long Zip %s.csv" % the_date_n_time), index=False)
        zip_spread.df_to_sheet(df_zip_newlong, index=False, sheet="long", start="A1", replace=True)
        print("\tZip Long version made and uploaded.")
    else:
        print("\tZip version was not uploaded.")

    if True:   ## CHANGED THIS was county_changed
        df_county_oldlong = county_spread.sheet_to_df(index=0, sheet=county_spread.find_sheet("long"))
        df_county_today['update_date'] = the_date_YEAR
        df_county_today['update_time'] = the_time
        # Add NOFO Region
        df_county_today['NOFO_Region'] = df_county_today.index.map(nofo_map_dict)
        df_county_today.loc[df_county_today['NOFO_Region'].isna(), "NOFO_Region"] = df_county_today.index[
            df_county_today['NOFO_Region'].isna()]
        # Add Metro Area
        df_county_today['Metro_Area'] = df_county_today.index.map(metro_map_dict)
        # Chicago and Illinois are not mapped on the Metro mapping file.
        df_county_today.loc[df_county_today.index == "Chicago", "Metro_Area"] = 'Chicago'
        df_county_today.loc[df_county_today.index == "Illinois", "Metro_Area"] = 'Illinois'
        # Leaving unlabeled metro areas as Nulls.

        df_county_today.reset_index(inplace=True)
        df_county_newlong = df_county_today.append(df_county_oldlong)[
            ["update_date", "update_time", "County", "Positive_Cases", "Deaths", "Tested", "NOFO_Region",
             "Metro_Area"]].reset_index(drop=True)
        df_county_newlong.to_csv(idph_csv_folder / ("Long County %s.csv" % the_date_n_time), index=False)
        county_spread.df_to_sheet(df_county_newlong, index=False, sheet="long", start="A1", replace=True)
        print("\tLong editions made and uploaded.")
    else:
        print("\tCounty version was not uploaded.")

    # %% Now producing NOFO Region and Metro Area Rollups directly from County_long and Zip_long directly from gsheets
    if zip_changed or county_changed:
        # cln = county_long_now
        cln = county_spread.sheet_to_df(index=None, sheet=county_spread.find_sheet("long"))
        # I'm not converting the hourly one because as datetimes it includes the wrong day.
        (cln.update_date) = pd.to_datetime(cln.update_date)
        cln.Tested = cln.Tested.apply(lambda x: "0" if x == "" else x)
        cln.Positive_Cases, cln.Deaths, cln.Tested = cln.Positive_Cases.astype(int), cln.Deaths.astype(
            int), cln.Tested.astype(int)

        # County data is ready for roll-up and upload to NOFO_long
        cln_nofo = cln.groupby(by=["update_date", "update_time", "NOFO_Region"]).sum()
        cln_nofo = cln_nofo.sort_index(axis=0, level=[-1, -3], sort_remaining=False)
        totals_spread.df_to_sheet(cln_nofo, index=True, sheet="NOFO_long", start="A1", replace=True)

        # County data is ready for roll-up and upload to Metro_long
        cln_metro = cln.groupby(by=["update_date", "update_time", "Metro_Area"]).sum()
        cln_metro = cln_metro.sort_index(axis=0, level=[-1, -3], sort_remaining=False)
        cln_metro = cln_metro.drop(axis=0, labels="", level=-1)
        totals_spread.df_to_sheet(cln_metro, index=True, sheet="Metro_long", start="A1", replace=True)

    # %%
    driver.close()
    print("End of " + the_time + " run.")

    return zip_changed or county_changed

예제 #29

0

파일 보기

파일: Roster.py 프로젝트: SantiagoCuratPeya/Planning

        if j == 'City':
            dicc[j] = literal_eval(i[j])
        else:
            dicc[j] = i[j]
    resto.append(dicc)


# Credenciales
cred = conf.get_config('C:\\Users\\santiago.curat\\Pandas\\PEYA',
                       'PedidosYa-8b8c4d19f61c.json')

# Roster CABA
sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w'
wks_name = 'Roster CABA'
sheet = Spread(sheet_id, wks_name, config=cred)
reporte_caba = sheet.sheet_to_df(index=0, header_rows=1)

# Roster Resto
sheet_id = '1JNywQTVzEQKRwqrJRkpzjiXx5Ly-FldtBMfeSYHuL7w'
wks_name = 'Roster Resto'
sheet = Spread(sheet_id, wks_name, config=cred)
reporte_resto = sheet.sheet_to_df(index=0, header_rows=1)

# Creo las columnas
cols_caba = reporte_caba.columns
cols_resto = reporte_resto.columns

# Creo las listas
caba = []
resto = []
reporte_caba.apply(a_listas_caba, axis=1)

예제 #30

0

파일 보기

#TIER PRICING CALCULATOR
import pandas as pd
from gspread_pandas import Spread

pricing = Spread('price', 'Plexchat Pricing')
pricing.open_sheet(0)
price_input = pricing.sheet_to_df(header_rows=1,
                                  index=True).apply(pd.to_numeric,
                                                    errors='ignore')

tier_1_MAU = int(price_input.loc["Tier 1", "MAU"])
tier_2_MAU = int(price_input.loc["Tier 2", "MAU"])
tier_3_MAU = int(price_input.loc["Tier 3", "MAU"])
tier_4_MAU = int(price_input.loc["Tier 4", "MAU"])

tier1_cloud_cost = price_input.loc["Tier 1", "COST_USER"]
tier2_cloud_cost = price_input.loc["Tier 2", "COST_USER"]
tier3_cloud_cost = price_input.loc["Tier 3", "COST_USER"]
tier4_cloud_cost = price_input.loc["Tier 4", "COST_USER"]

estimated_MAU = int(price_input.loc["ESTIMATED MAU", "MAU"])

tiers_MAU = {
    "Tier 1": tier_1_MAU,
    "Tier 2": tier_2_MAU,
    "Tier 3": tier_3_MAU,
    "Tier 4": tier_4_MAU
}

tiers_price_per_user = {
    "Tier 1": price_input.loc["Tier 1", "PRICE_USER"],