def main(): """Go Main Go.""" config = get_config() drive = get_driveclient(config, "td") res = ( drive.files() .list( q=( "'1MA6spcXyu_TeyZYkUSizks9fuQTSLC7m' in parents and " "mimeType='application/vnd.google-apps.folder'" ) ) .execute() ) rows = [] for item in res["items"]: siteid, typename = item["title"].rsplit("_", 1) rows.append({"siteid": siteid, "res": typename, "id": item["id"]}) df = pd.DataFrame(rows) df = df.pivot("siteid", "res", "id") pgconn = get_dbconn("td") cursor = pgconn.cursor() for siteid, row in df.iterrows(): cursor.execute( "UPDATE meta_site_history SET drive_maps_folder = %s, " "drive_photos_folder = %s where siteid = %s", (row["maps"], row["photos"], siteid), ) if cursor.rowcount != 1: LOG.info("failed update for |%s|", siteid) cursor.close() pgconn.commit()
def main(): """Go Main Go""" config = util.get_config() spr_client = util.get_spreadsheet_client(config) drive = util.get_driveclient(config) # Fake last conditional to make it easy to reprocess one site... res = (drive.files().list(q=("title contains 'Soil Texture Data'"), maxResults=999).execute()) HEADERS = [ "uniqueid", "plotid", "depth", "tillage", "rotation", "soil6", "nitrogen", "drainage", "rep", "subsample", "landscape", "notes", "herbicide", "sampledate", ] sz = len(res["items"]) for i, item in enumerate(res["items"]): if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue spreadsheet = util.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() for year in spreadsheet.worksheets: print('%3i/%3i sheet "%s" for "%s"' % (i + 1, sz, year, item["title"])) lf = spreadsheet.worksheets[year].get_list_feed() for rownum, entry in enumerate(lf.entry): dirty = False data = entry.to_dict() for key in ["soil13", "soil14"]: if key not in data: continue value = data[key] if rownum == 1 and value == "%": print("updating % to g/kg") entry.set_value(key, "g/kg") dirty = True continue if rownum >= 2: try: newvalue = float(value) * 10.0 except Exception: continue print("%s updating %s to %s" % (key, value, newvalue)) entry.set_value(key, "%.4f" % (newvalue, )) dirty = True if dirty: util.exponential_backoff(spr_client.update, entry)
def main(): """Go Main!""" pgconn = get_dbconn("sustainablecorn") cursor = pgconn.cursor() drive = util.get_driveclient(util.get_config(), "cscap") spr_client = util.get_spreadsheet_client(util.get_config()) res = drive.files().list(q="title contains 'Plot Identifiers'").execute() for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue site = item["title"].split()[0] print(site) cursor.execute( """SELECT distinct plotid from agronomic_data WHERE site = %s""", (site,), ) agr_plotids = [row[0] for row in cursor] cursor.execute( """SELECT distinct plotid from soil_data WHERE site = %s""", (site,), ) soil_plotids = [row[0] for row in cursor] spreadsheet = util.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() sheet = spreadsheet.worksheets["Sheet 1"] for entry in sheet.get_list_feed().entry: dirty = False data = entry.to_dict() res = "yes" if data["plotid"] not in agr_plotids: res = "no" # print("%s %s" % (data['plotid'], agr_plotids)) if data["agro"] != res: print( " AGR plotid: %s :: %s -> %s" % (data["plotid"], data["agro"], res) ) entry.set_value("agro", res) dirty = True res = "yes" if data["plotid"] not in soil_plotids: res = "no" # print("%s %s" % (data['plotid'], soil_plotids)) if data["soil"] != res: print( " SOIL plotid: %s :: %s -> %s" % (data["plotid"], data["soil"], res) ) entry.set_value("soil", res) dirty = True if dirty: spr_client.update(entry)
def main(): """Go!""" config = util.get_config() sheets = util.get_sheetsclient(config, "cscap") drive = util.get_driveclient(config) res = (drive.files().list( q=("title contains 'Soil Bulk Density' or " "title contains 'Soil Nitrate Data' or " "title contains 'Soil Texture Data' or " "title contains 'Agronomic Data'"), maxResults=999, ).execute()) results = [] for item in tqdm(res["items"]): if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue title = item["title"] f = sheets.spreadsheets().get(spreadsheetId=item["id"], includeGridData=True) j = util.exponential_backoff(f.execute) for sheet in j["sheets"]: sheet_title = sheet["properties"]["title"] for griddata in sheet["data"]: startcol = griddata.get("startColumn", 1) startrow = griddata.get("startRow", 1) header = [] for row, rowdata in enumerate(griddata["rowData"]): if "values" not in rowdata: # empty sheet continue for col, celldata in enumerate(rowdata["values"]): if row == 0: header.append(celldata.get("formattedValue", "n/a")) if celldata.get("note") is not None: results.append({ "title": title, "header": header[col], "sheet_title": sheet_title, "row": row + startrow + 1, "col": col + startcol + 1, "note": celldata["note"], }) df = pd.DataFrame(results) df.to_csv("notes.csv", sep="|")
def main(): """Go Main Go.""" config = util.get_config() # Get me a client, stat spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient() res = (drive_client.files().list( q="title contains 'Agronomic Data'").execute()) for item in res["items"]: spreadsheet = util.Spreadsheet(spr_client, item["id"]) for yr in ["2011", "2012", "2013", "2014", "2015"]: spreadsheet.worksheets[yr].del_column("AGR392")
def main(): """Go Main!""" pgconn = psycopg2.connect(database='sustainablecorn', host='iemdb', user='******') cursor = pgconn.cursor() drive = util.get_driveclient(util.get_config(), 'cscap') spr_client = util.get_spreadsheet_client(util.get_config()) res = drive.files().list(q="title contains 'Plot Identifiers'").execute() for item in res['items']: if item['mimeType'] != 'application/vnd.google-apps.spreadsheet': continue site = item['title'].split()[0] print(site) cursor.execute("""SELECT distinct plotid from agronomic_data WHERE site = %s""", (site, )) agr_plotids = [row[0] for row in cursor] cursor.execute("""SELECT distinct plotid from soil_data WHERE site = %s""", (site, )) soil_plotids = [row[0] for row in cursor] spreadsheet = util.Spreadsheet(spr_client, item['id']) spreadsheet.get_worksheets() sheet = spreadsheet.worksheets['Sheet 1'] for entry in sheet.get_list_feed().entry: dirty = False data = entry.to_dict() res = 'yes' if data['plotid'] not in agr_plotids: res = 'no' # print("%s %s" % (data['plotid'], agr_plotids)) if data['agro'] != res: print(" AGR plotid: %s :: %s -> %s" % (data['plotid'], data['agro'], res)) entry.set_value('agro', res) dirty = True res = 'yes' if data['plotid'] not in soil_plotids: res = 'no' # print("%s %s" % (data['plotid'], soil_plotids)) if data['soil'] != res: print(" SOIL plotid: %s :: %s -> %s" % (data['plotid'], data['soil'], res)) entry.set_value('soil', res) dirty = True if dirty: spr_client.update(entry)
def main(): """Go Main Go.""" config = util.get_config() drive = util.get_driveclient(config) res = drive.files().list(q="title contains 'Agronomic Data'").execute() for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue LOG.debug(item["title"]) spread = Spread(item["id"], config=config["cscap"]["service_account"]) for sheet in spread.sheets: df = spread.sheet_to_df(index=None, sheet=sheet) LOG.debug("%s %s", sheet.title, len(df.index))
def main(): """Go Main Go""" config = util.get_config() spr_client = util.get_spreadsheet_client(config) drive = util.get_driveclient(config) # Fake last conditional to make it easy to reprocess one site... res = drive.files().list(q=("title contains 'Soil Texture Data'"), maxResults=999).execute() HEADERS = ['uniqueid', 'plotid', 'depth', 'tillage', 'rotation', 'soil6', 'nitrogen', 'drainage', 'rep', 'subsample', 'landscape', 'notes', 'herbicide', 'sampledate'] sz = len(res['items']) for i, item in enumerate(res['items']): if item['mimeType'] != 'application/vnd.google-apps.spreadsheet': continue spreadsheet = util.Spreadsheet(spr_client, item['id']) spreadsheet.get_worksheets() for year in spreadsheet.worksheets: print('%3i/%3i sheet "%s" for "%s"' % (i + 1, sz, year, item['title'])) lf = spreadsheet.worksheets[year].get_list_feed() for rownum, entry in enumerate(lf.entry): dirty = False data = entry.to_dict() for key in ['soil13', 'soil14']: if key not in data: continue value = data[key] if rownum == 1 and value == '%': print("updating % to g/kg") entry.set_value(key, 'g/kg') dirty = True continue if rownum >= 2: try: newvalue = float(value) * 10. except Exception as _exp: continue print("%s updating %s to %s" % (key, value, newvalue)) entry.set_value(key, "%.4f" % (newvalue, )) dirty = True if dirty: util.exponential_backoff(spr_client.update, entry)
def main(): """Go Main!""" config = util.get_config() spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient(config) treat_feed = spr_client.get_list_feed(config["cscap"]["treatkey"], "od6") treatments, treatment_names = util.build_treatments(treat_feed) res = ( drive_client.files() .list(q="title contains 'Plot Identifiers'") .execute() ) for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue spreadsheet = util.Spreadsheet(spr_client, item["id"]) print("Processing '%s'..." % (item["title"],)) spreadsheet.get_worksheets() worksheet = spreadsheet.worksheets["Sheet 1"] for entry in worksheet.get_list_feed().entry: data = entry.to_dict() sitekey = data.get("uniqueid").lower() if sitekey is None: continue trt = treatments[sitekey] if data[COLKEY] is not None and data[COLKEY] != "": continue if len(trt[TRTKEY]) != 2: print("can't deal with this: %s" % (trt[TRTKEY],)) break newval = treatment_names.get(trt[TRTKEY][1], "") entry.set_value(COLKEY, newval) print( ("Setting plotid: %s uniqueid: %s column:%s to %s") % (data.get("plotid"), sitekey, COLKEY, newval) ) spr_client.update(entry)
def main(): """Go Main Go.""" config = utils.get_config() drive = utils.get_driveclient(config, 'cscap') perms = drive.permissions().list( fileId=config['cscap']['basefolder']).execute() for item in perms.get('items', []): # Unclear what type of permission this is that does not have this # set, maybe a file with an allow for anybody that has the link to it if 'emailAddress' not in item: continue email = item['emailAddress'].lower() res = input("%s id: %s role: %s revoke?[y]" % (email, item['id'], item['role'])) if res == "": drive.permissions().update(fileId=config['cscap']['basefolder'], permissionId=item['id'], body={ 'role': 'reader' }).execute() print("Del")
def main(): """Do Main""" pgconn = psycopg2.connect(database="sustainablecorn") plotdf = read_sql( """SELECT upper(uniqueid) as uniqueid, upper(plotid) as plotid from plotids""", pgconn, index_col=None, ) plotdf["ghg"] = "no" drive = utils.get_driveclient(utils.get_config(), "cscap") spr_client = utils.get_spreadsheet_client(utils.get_config()) X = { "2011": "1DSHcfeBNJArVowk0CG_YvzI0YQnHIZXhcxOjBi2fPM4", "2012": "1ax_N80tIBBKEnWDnrGxsK4KUa1ssokwMxQZNVHEWWM8", "2013": "1UY5JYKlBHDElwljnEC-1tF7CozplAfyGpbkbd0OFqsA", "2014": "12NqffqVMQ0M4PMT_CP5hYfmydC-vzXQ0lFHbKwwfqzg", "2015": "1FxKx0GDJxv_8fIjKe2xRJ58FGILLlUSXcb6EuSLQSrI", } years = X.keys() years.sort() unknown = ["UniqueID_PlotID", "-_-"] rows = [] for year in years: spreadsheet = utils.Spreadsheet(spr_client, X.get(year)) for worksheet in spreadsheet.worksheets: lf = spreadsheet.worksheets[worksheet].get_list_feed() for entry in lf.entry: d = entry.to_dict() if d.get("uniqueid") is None or d.get("plotid") is None: continue rows.append(d) df2 = plotdf[(plotdf["uniqueid"] == d["uniqueid"].upper()) & (plotdf["plotid"] == d["plotid"].upper())] if len(df2.index) == 0: key = "%s_%s" % (d["uniqueid"], d["plotid"]) if key in unknown: continue unknown.append(key) print(("%s[%s] Unknown uniqueid: |%s| plotid: |%s|") % (year, worksheet, d["uniqueid"], d["plotid"])) else: idx = plotdf[ (plotdf["uniqueid"] == d["uniqueid"].upper()) & (plotdf["plotid"] == d["plotid"].upper())].index plotdf.at[idx, "ghg"] = "yes" df = pd.DataFrame(rows) writer = pd.ExcelWriter("output.xlsx") df.to_excel(writer, "Sheet1") writer.save() res = drive.files().list(q="title contains 'Plot Identifiers'").execute() for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue site = item["title"].split()[0] print(site) spreadsheet = utils.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() sheet = spreadsheet.worksheets["Sheet 1"] for entry in sheet.get_list_feed().entry: data = entry.to_dict() df2 = plotdf[(plotdf["uniqueid"] == site) & (plotdf["plotid"] == data["plotid"])] res = "no" if len(df2.index) == 1: res = df2["ghg"].values[0] if data["ghg"] != res: print(" GHG plotid: %s :: %s -> %s" % (data["plotid"], data["ghg"], res)) entry.set_value("ghg", res) spr_client.update(entry)
def drive_changelog(regime, yesterday, html): """ Do something """ drive = util.get_driveclient(CONFIG, regime) folders = util.get_folders(drive) start_change_id = CONFIG[regime]["changestamp"] html += """<p><table border="1" cellpadding="3" cellspacing="0"> <thead> <tr><th>Folder</th><th>Resource</th></tr> </thead> <tbody>""" largestChangeId = -1 hits = 0 page_token = None changestamp = None param = {'includeDeleted': False, 'maxResults': 1000} while True: if start_change_id: param['startChangeId'] = start_change_id if page_token: param['pageToken'] = page_token print(("[%s] start_change_id: %s largestChangeId: %s page_token: %s" ) % (regime, start_change_id, largestChangeId, page_token)) response = drive.changes().list(**param).execute() largestChangeId = response['largestChangeId'] page_token = response.get('nextPageToken') for item in response['items']: if item['file']['mimeType'] in [FMIME, FORM_MTYPE, SITES_MYTPE]: continue changestamp = item['id'] if item['deleted']: continue # don't do more work when this file actually did not change modifiedDate = datetime.datetime.strptime( item['file']['modifiedDate'][:19], '%Y-%m-%dT%H:%M:%S') modifiedDate = modifiedDate.replace(tzinfo=pytz.timezone("UTC")) if modifiedDate < yesterday: continue # Need to see which base folder this file is in! isproject = False for parent in item['file']['parents']: if parent['id'] not in folders: print(('[%s] file: %s has unknown parent: %s' ) % (regime, item['id'], parent['id'])) continue isproject = True if not isproject: print(('[%s] %s (%s) skipped as basefolders are: %s' ) % (regime, repr(item['file']['title']), item['file']['mimeType'], item['file']['parents'])) continue uri = item['file']['alternateLink'] title = item['file']['title'].encode( 'ascii', 'ignore').decode('ascii') localts = modifiedDate.astimezone(LOCALTZ) hits += 1 pfolder = item['file']['parents'][0]['id'] html += """ <tr> <td><a href="https://docs.google.com/folderview?id=%s&usp=drivesdk">%s</a></td> <td><a href="%s">%s</a></td></tr> """ % (pfolder, folders[pfolder]['title'], uri, title) hit = False if 'version' in item['file']: lastmsg = "" try: revisions = drive.revisions().list( fileId=item['file']['id']).execute() except Exception as _exp: print(('[%s] file %s (%s) failed revisions' ) % (regime, title, item['file']['mimeType'])) revisions = {'items': []} for item2 in revisions['items']: # print pprint(item2) md = datetime.datetime.strptime( item2['modifiedDate'][:19], '%Y-%m-%dT%H:%M:%S') md = md.replace(tzinfo=pytz.timezone("UTC")) if md < yesterday: continue localts = md.astimezone(LOCALTZ) # for some reason, some revisions have no user associated # with it. So just skip for now # http://stackoverflow.com/questions/1519072 if 'lastModifyingUser' not in item2: continue luser = item2['lastModifyingUser'] hit = True display_name = luser['displayName'] email_address = luser['emailAddress'] if display_name == CONFIG['service_account']: display_name = "daryl's magic" email_address = "*****@*****.**" thismsg = """ <tr><td colspan="2"><img src="%s" style="height:25px;"/> %s by %s (%s)</td></tr> """ % ((luser['picture']['url'] if 'picture' in luser else ''), localts.strftime("%-d %b %-I:%M %p"), display_name, email_address) if thismsg != lastmsg: html += thismsg lastmsg = thismsg # Now we check revisions if not hit: luser = item['file'].get('lastModifyingUser', dict()) html += """ <tr><td colspan="2"><img src="%s" style="height:25px;"/> %s by %s (%s)</td></tr> """ % (luser['picture']['url'] if 'picture' in luser else '', localts.strftime("%-d %b %-I:%M %p"), luser.get('displayName', 'n/a'), luser.get('emailAddress', 'n/a')) if not page_token: break if changestamp is not None: CONFIG[regime]['changestamp'] = changestamp if hits == 0: html += """<tr><td colspan="5">No Changes Found...</td></tr>\n""" html += """</tbody></table>""" util.save_config(CONFIG) return html
import datetime import pytz import pyiem.cscap_utils as util config = util.get_config() FOLDERS = {} drive = util.get_driveclient(config, "cscap") sprclient = util.get_spreadsheet_client(config) print sprclient.GetWorksheets('1UeEJ0rTQaP0H2nidUDaNVu9D5Mw26BuaHInkAJnF6ms')
""" Add a column to the soil nitrate sheets """ import pyiem.cscap_utils as util config = util.get_config() # Get me a client, stat spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient() res = (drive_client.files().list( q="title contains 'Soil Nitrate Data'").execute()) newcols = [["SOIL22 Soil Ammonium (Optional)", "mg per kg soil"]] for item in res["items"]: feed2 = spr_client.GetWorksheets(item["id"]) for entry2 in feed2.entry: worksheet = entry2.id.text.split("/")[-1] feed3 = spr_client.get_list_feed(item["id"], worksheet) row = feed3.entry[0] data = row.to_dict() plusone = int(entry2.col_count.text) + 1 entry2.col_count.text = str(plusone) spr_client.update(entry2) # Add a column? for i in range(len(newcols)): cell = spr_client.get_cell(item["id"], worksheet, 1, plusone - i)
"""SWROC sites changed to HICKS""" import pyiem.cscap_utils as util config = util.get_config() spr_client = util.get_spreadsheet_client(config) drive = util.get_driveclient(config) # Fake last conditional to make it easy to reprocess one site... res = drive.files().list(q=("(title contains 'Soil Bulk Density' or " "title contains 'Soil Nitrate Data' or " "title contains 'Soil Texture Data' or " "title contains 'Agronomic Data' or " "title contains 'Plot Identifiers') and " "title contains 'HICKS'"), maxResults=999).execute() sz = len(res['items']) for i, item in enumerate(res['items']): if item['mimeType'] != 'application/vnd.google-apps.spreadsheet': continue spreadsheet = util.Spreadsheet(spr_client, item['id']) spreadsheet.get_worksheets() for year in spreadsheet.worksheets: print('%3i/%3i sheet "%s" for "%s"' % (i + 1, sz, year, item['title'])) lf = spreadsheet.worksheets[year].get_list_feed() for entry in lf.entry: dirty = False data = entry.to_dict() if data.get('uniqueid') is None: continue value = data['uniqueid']
def drive_changelog(regime, yesterday, html): """ Do something """ drive = util.get_driveclient(CONFIG, regime) folders = util.get_folders(drive) start_change_id = CONFIG[regime]["changestamp"] html += """<p><table border="1" cellpadding="3" cellspacing="0"> <thead> <tr><th>Folder</th><th>Resource</th></tr> </thead> <tbody>""" largestChangeId = -1 hits = 0 page_token = None changestamp = None param = {"includeDeleted": False, "maxResults": 1000} while True: if start_change_id: param["startChangeId"] = start_change_id if page_token: param["pageToken"] = page_token LOG.debug( "[%s] start_change_id: %s largestChangeId: %s page_token: %s", regime, start_change_id, largestChangeId, page_token, ) response = drive.changes().list(**param).execute() largestChangeId = response["largestChangeId"] page_token = response.get("nextPageToken") for item in response["items"]: if item["file"]["mimeType"] in [FMIME, FORM_MTYPE, SITES_MYTPE]: continue changestamp = item["id"] if item["deleted"]: continue # Files copied in could have a createdDate of interest, but old # modification date created = datetime.datetime.strptime( item["file"]["createdDate"][:19], "%Y-%m-%dT%H:%M:%S" ).replace(tzinfo=datetime.timezone.utc) # don't do more work when this file actually did not change modifiedDate = datetime.datetime.strptime( item["file"]["modifiedDate"][:19], "%Y-%m-%dT%H:%M:%S" ).replace(tzinfo=datetime.timezone.utc) if modifiedDate < yesterday and created < yesterday: continue # Need to see which base folder this file is in! isproject = False for parent in item["file"]["parents"]: if parent["id"] not in folders: LOG.info( "[%s] file: %s has unknown parent: %s", regime, item["id"], parent["id"], ) continue isproject = True if not isproject: LOG.info( "[%s] %s (%s) skipped as basefolders are: %s", regime, repr(item["file"]["title"]), item["file"]["mimeType"], item["file"]["parents"], ) continue uri = item["file"]["alternateLink"] title = ( item["file"]["title"].encode("ascii", "ignore").decode("ascii") ) localts = modifiedDate.astimezone(LOCALTZ) hits += 1 pfolder = item["file"]["parents"][0]["id"] html += """ <tr> <td><a href="https://docs.google.com/folderview?id=%s&usp=drivesdk">%s</a></td> <td><a href="%s">%s</a></td></tr> """ % ( pfolder, folders[pfolder]["title"], uri, title, ) hit = False if "version" in item["file"]: lastmsg = "" try: revisions = ( drive.revisions() .list(fileId=item["file"]["id"]) .execute() ) except Exception: LOG.info( "[%s] file %s (%s) failed revisions", regime, title, item["file"]["mimeType"], ) revisions = {"items": []} for item2 in revisions["items"]: md = datetime.datetime.strptime( item2["modifiedDate"][:19], "%Y-%m-%dT%H:%M:%S" ) md = md.replace(tzinfo=pytz.timezone("UTC")) if md < yesterday: continue localts = md.astimezone(LOCALTZ) # for some reason, some revisions have no user associated # with it. So just skip for now # http://stackoverflow.com/questions/1519072 if "lastModifyingUser" not in item2: continue luser = item2["lastModifyingUser"] hit = True display_name = luser["displayName"] email_address = luser.get("emailAddress", "unknown") if display_name == CONFIG["service_account"]: display_name = "daryl's magic" email_address = "*****@*****.**" thismsg = """ <tr><td colspan="2"><img src="%s" style="height:25px;"/> %s by %s (%s)</td></tr> """ % ( ( luser["picture"]["url"] if "picture" in luser else "" ), localts.strftime("%-d %b %-I:%M %p"), display_name, email_address, ) if thismsg != lastmsg: html += thismsg lastmsg = thismsg # Now we check revisions if not hit: luser = item["file"].get("lastModifyingUser", dict()) html += """ <tr><td colspan="2"><img src="%s" style="height:25px;"/> %s by %s (%s)</td></tr> """ % ( luser["picture"]["url"] if "picture" in luser else "", localts.strftime("%-d %b %-I:%M %p"), luser.get("displayName", "n/a"), luser.get("emailAddress", "n/a"), ) if not page_token: break if changestamp is not None: CONFIG[regime]["changestamp"] = changestamp if hits == 0: html += """<tr><td colspan="5">No Changes Found...</td></tr>\n""" html += """</tbody></table>""" util.save_config(CONFIG) return html
removed = 0 config = utils.get_config() for project in ['td', 'cscap']: cursor.execute(""" SELECT access_level from website_access_levels where appid = %s """, (project, )) access_level = cursor.fetchone()[0] CURRENT = [] cursor.execute(""" SELECT email from website_users WHERE access_level = %s """, (access_level, )) for row in cursor: CURRENT.append(row[0]) drive = utils.get_driveclient(config, project) perms = drive.permissions().list( fileId=config[project]['basefolder']).execute() for item in perms.get('items', []): # Unclear what type of permission this is that does not have this # set, maybe a file with an allow for anybody that has the link to it if 'emailAddress' not in item: continue email = item['emailAddress'].lower() if email not in CURRENT: print(("Adding email: '%s' project: '%s' for datateam access" ) % (email, project)) cursor.execute("""INSERT into website_users(email, access_level) VALUES (%s, %s)""", (email, access_level)) else: CURRENT.remove(email)
def main(): """Go Main""" pgconn = psycopg2.connect(database='sustainablecorn') plotdf = read_sql(""" SELECT upper(uniqueid) as uniqueid, plotid from plotids """, pgconn, index_col=None) plotdf['ipm_usb'] = 'no' drive = utils.get_driveclient(utils.get_config(), 'cscap') spr_client = utils.get_spreadsheet_client(utils.get_config()) res = drive.files().list( q=("'0B4fyEPcRW7IscDcweEwxUFV3YkU' in parents and " "title contains 'USB'")).execute() U = {} rows = [] for item in res['items']: if item['mimeType'] != 'application/vnd.google-apps.spreadsheet': continue uniqueid = item['title'].strip().split()[-1] spreadsheet = utils.Spreadsheet(spr_client, item['id']) for worksheet in spreadsheet.worksheets: lf = spreadsheet.worksheets[worksheet].get_list_feed() for entry in lf.entry: d = entry.to_dict() rows.append(dict(uniqueid=uniqueid, plotid=d['plotid'])) plotipm = pd.DataFrame(rows) for i, row in plotipm.iterrows(): df2 = plotdf[(plotdf['uniqueid'] == row['uniqueid']) & (plotdf['plotid'] == row['plotid'])] if len(df2.index) == 0: key = "%s_%s" % (row['uniqueid'], row['plotid']) if key in U: continue U[key] = True print("Missing uniqueid: |%s| plotid: |%s|" % (row['uniqueid'], row['plotid'])) else: plotdf.at[df2.index, 'ipm_usb'] = 'yes' res = drive.files().list(q="title contains 'Plot Identifiers'").execute() for item in res['items']: if item['mimeType'] != 'application/vnd.google-apps.spreadsheet': continue site = item['title'].split()[0] print site spreadsheet = utils.Spreadsheet(spr_client, item['id']) spreadsheet.get_worksheets() sheet = spreadsheet.worksheets['Sheet 1'] for entry in sheet.get_list_feed().entry: data = entry.to_dict() df2 = plotdf[(plotdf['uniqueid'] == site) & (plotdf['plotid'] == data['plotid'])] res = 'no' if len(df2.index) == 1: res = df2['ipm_usb'].values[0] # print res, site, data['plotid'], df2 if data['ipmusb'] != res: print(" IPM_USB plotid: %s :: %s -> %s" % (data['plotid'], data['ipmusb'], res)) entry.set_value('ipmusb', res) spr_client.update(entry)
def main(): """Go""" config = util.get_config() pgconn = psycopg2.connect(database='sustainablecorn', host=config['database']['host']) pcursor = pgconn.cursor() # Get me a client, stat spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient(config) res = drive_client.files( ).list(q="title contains 'Soil Nitrate Data'").execute() # Load up what data we have for this year current = {} pcursor.execute(""" SELECT uniqueid, plotid, varname, depth, subsample, sampledate from soil_data WHERE year = %s and varname in %s """, (YEAR, tuple(DOMAIN))) for row in pcursor: key = "%s|%s|%s|%s|%s|%s" % row current[key] = True for item in res['items']: if item['mimeType'] != 'application/vnd.google-apps.spreadsheet': continue spreadsheet = util.Spreadsheet(spr_client, item['id']) spreadsheet.get_worksheets() worksheet = spreadsheet.worksheets.get(YEAR) if worksheet is None: # print("Missing Year: %s from %s" % (YEAR, spreadsheet.title)) continue worksheet.get_cell_feed() siteid = item['title'].split()[0] # print 'Processing %s Soil Nitrate Year %s' % (siteid, YEAR), if worksheet.get_cell_value(1, 1) != 'plotid': print(('harvest_soil_nitrate: %s[%s] cell(1,1)="%s", skipping' ) % (siteid, YEAR, worksheet.get_cell_value(1, 1))) continue startcol = 3 if worksheet.get_cell_value(1, 2) == 'depth': depthcol = 2 elif worksheet.get_cell_value(1, 3) == 'depth': depthcol = 3 startcol = 4 if worksheet.get_cell_value(1, 2) == 'location': locationcol = 2 else: locationcol = None for row in range(3, worksheet.rows+1): plotid = worksheet.get_cell_value(row, 1) depth = worksheet.get_cell_value(row, depthcol) if depth.find(" to ") == -1: print(("harvest_soil_nitrate found invalid depth: %s %s %s" ) % (depth, siteid, YEAR)) continue if plotid is None or depth is None: continue subsample = "1" if locationcol is not None: subsample = worksheet.get_cell_value(row, locationcol) for col in range(startcol, worksheet.cols+1): if worksheet.get_cell_value(1, col) is None: print(("h_soil_nitrate site: %s year: %s col: %s is null" ) % (siteid, YEAR, col)) continue colheading = worksheet.get_cell_value(1, col).strip() if not colheading.startswith('SOIL'): print(('Invalid colheading: %s site: %s year: %s' ) % (colheading, siteid, YEAR)) continue # Attempt to tease out the sampledate tokens = colheading.split() varname = tokens[0] datetest = tokens[1] if len(datetest.split("/")) == 3: date = datetime.datetime.strptime(datetest, '%m/%d/%Y') else: if row == 3: print(("h_soil_nitrate %s[%s] unknown sample date %s" ) % (siteid, YEAR, repr(colheading))) date = DUMMY_DATES.get(datetest, None) if date is None and row == 3: print(("FIXME h_soil_nitrate %s[%s] " "double unknown date %s" ) % (siteid, YEAR, repr(colheading))) inval = worksheet.get_cell_value(row, col) val = util.cleanvalue(inval) if inval is not None and val is None: print(("harvest_soil_nitrate found None. " "site: %s year: %s " " row: %s col: %s varname: %s" ) % (siteid, YEAR, row, col, varname)) if varname not in DOMAIN: print(("harvest_soil_nitrate %s[%s] " "found additional var: %s" ) % (siteid, YEAR, varname)) DOMAIN.append(varname) key = ("%s|%s|%s|%s|%s|%s" ) % (siteid, plotid, varname, depth, subsample, date if date is None else date.strftime("%Y-%m-%d")) if key in current: del current[key] continue try: pcursor.execute(""" INSERT into soil_data(uniqueid, plotid, varname, year, depth, value, subsample, sampledate) values (%s, %s, %s, %s, %s, %s, %s, %s) """, (siteid, plotid, varname, YEAR, depth, val, subsample, date)) except Exception as exp: print(('site: %s year: %s HARVEST_SOIL_NITRATE TRACEBACK' ) % (siteid, YEAR)) print(exp) print(('%s %s %s %s %s %s' ) % (siteid, plotid, varname, depth, date, val)) sys.exit() for key in current: (siteid, plotid, varname, depth, subsample, date) = key.split("|") if date != 'None': datesql = " and sampledate = '%s' " % (date, ) else: datesql = " and sampledate is null " print(('h_soil_nitrate rm %s %s %s %s %s %s %s' ) % (YEAR, siteid, plotid, varname, depth, subsample, date)) pcursor.execute(""" DELETE from soil_data where uniqueid = %s and plotid = %s and varname = %s and year = %s and depth = %s and subsample = %s """ + datesql + """ """, (siteid, plotid, varname, YEAR, depth, subsample)) # print "...done" pcursor.close() pgconn.commit() pgconn.close()
"""Gio discovered a mismatch between AG codes and headers """ import pyiem.cscap_utils as util import pandas as pd config = util.get_config() drive = util.get_driveclient(config, "cscap") sheets = util.get_sheetsclient(config, "cscap") res = drive.files().list(q="title contains 'Agronomic Data'").execute() IGNORE = [ "Rep", "Tillage", "Rotation", "Drainage", "PlotID", "ROW", "COLUMN", "UniqueID", "Nitrogen", "Landscape", ] def build_xref(): f = sheets.spreadsheets().get( spreadsheetId="1PKK-vWuOryYFOSYSgt4TosrjIDX_F-opHOvrEo5q-i4", includeGridData=True, ) j = util.exponential_backoff(f.execute)
import pyiem.cscap_utils as util drive = util.get_driveclient(util.get_config(), "cscap") spr_client = util.get_spreadsheet_client(util.get_config()) res = drive.files().list(q="title contains 'Plot Identifiers'").execute() for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue print(item["title"]) spreadsheet = util.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() sheet = spreadsheet.worksheets["Sheet 1"] for col in ["AGRO", "SOIL", "GHG", "IPM_CSCAP", "IPM_USB"]: sheet.add_column(col)
) if len(res["items"]) == 0: body = { "title": title, "mimeType": "application/vnd.google-apps.spreadsheet", "parents": [{"id": colfolder}], } print("Creating Tile Flow Sheet: %s in %s" % (title, colfolder)) res = drive.files().insert(body=body).execute() return res["id"] return res["items"][0]["id"] config = util.get_config() ssclient = util.get_spreadsheet_client(config) drive = util.get_driveclient(config) msheet = util.Spreadsheet(ssclient, config["td"]["site_measurements"]) msheet.get_worksheets() sheet = msheet.worksheets["Plot ID"] sheet.get_list_feed() sites = {} pis = {} for entry in sheet.list_feed.entry: row = entry.to_dict() if row["tileflowandtilenitrate-nyesno"] == "NO": continue site = row["siteid"] d = sites.setdefault(site, []) d.append(row["plotid"]) pis[site] = row["leadpi"]
removed = 0 config = utils.get_config() for project in ['td', 'cscap']: cursor.execute(""" SELECT access_level from website_access_levels where appid = %s """, (project, )) access_level = cursor.fetchone()[0] CURRENT = [] cursor.execute(""" SELECT email from website_users WHERE access_level = %s """, (access_level, )) for row in cursor: CURRENT.append(row[0]) drive = utils.get_driveclient(config, project) perms = drive.permissions().list( fileId=config[project]['basefolder']).execute() for item in perms.get('items', []): # Unclear what type of permission this is that does not have this # set, maybe a file with an allow for anybody that has the link to it if 'emailAddress' not in item: continue email = item['emailAddress'].lower() if email not in CURRENT: print(("Adding email: '%s' project: '%s' for datateam access" ) % (email, project)) cursor.execute("""INSERT into website_users(email, access_level) VALUES (%s, %s)""", (email, access_level)) else: CURRENT.remove(email)
def main(): """Go""" config = util.get_config() pgconn = psycopg2.connect( database="sustainablecorn", host=config["database"]["host"] ) pcursor = pgconn.cursor() # Get me a client, stat spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient(config) res = ( drive_client.files() .list(q="title contains 'Soil Nitrate Data'") .execute() ) # Load up what data we have for this year current = {} pcursor.execute( """ SELECT uniqueid, plotid, varname, depth, subsample, sampledate from soil_data WHERE year = %s and varname in %s """, (YEAR, tuple(DOMAIN)), ) for row in pcursor: key = "%s|%s|%s|%s|%s|%s" % row current[key] = True for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue spreadsheet = util.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() worksheet = spreadsheet.worksheets.get(YEAR) if worksheet is None: continue worksheet.get_cell_feed() siteid = item["title"].split()[0] if worksheet.get_cell_value(1, 1) != "plotid": print( ('harvest_soil_nitrate: %s[%s] cell(1,1)="%s", skipping') % (siteid, YEAR, worksheet.get_cell_value(1, 1)) ) continue startcol = 3 if worksheet.get_cell_value(1, 2) == "depth": depthcol = 2 elif worksheet.get_cell_value(1, 3) == "depth": depthcol = 3 startcol = 4 if worksheet.get_cell_value(1, 2) == "location": locationcol = 2 else: locationcol = None for row in range(3, worksheet.rows + 1): plotid = worksheet.get_cell_value(row, 1) depth = worksheet.get_cell_value(row, depthcol) if depth.find(" to ") == -1: print( ("harvest_soil_nitrate found invalid depth: %s %s %s") % (depth, siteid, YEAR) ) continue if plotid is None or depth is None: continue subsample = "1" if locationcol is not None: subsample = worksheet.get_cell_value(row, locationcol) for col in range(startcol, worksheet.cols + 1): if worksheet.get_cell_value(1, col) is None: print( ("h_soil_nitrate site: %s year: %s col: %s is null") % (siteid, YEAR, col) ) continue colheading = worksheet.get_cell_value(1, col).strip() if not colheading.startswith("SOIL"): print( ("Invalid colheading: %s site: %s year: %s") % (colheading, siteid, YEAR) ) continue # Attempt to tease out the sampledate tokens = colheading.split() varname = tokens[0] datetest = tokens[1] if len(datetest.split("/")) == 3: date = datetime.datetime.strptime(datetest, "%m/%d/%Y") else: if row == 3: print( ("h_soil_nitrate %s[%s] unknown sample date %s") % (siteid, YEAR, repr(colheading)) ) date = DUMMY_DATES.get(datetest, None) if date is None and row == 3: print( ( "FIXME h_soil_nitrate %s[%s] " "double unknown date %s" ) % (siteid, YEAR, repr(colheading)) ) inval = worksheet.get_cell_value(row, col) val = util.cleanvalue(inval) if inval is not None and val is None: print( ( "harvest_soil_nitrate found None. " "site: %s year: %s " " row: %s col: %s varname: %s" ) % (siteid, YEAR, row, col, varname) ) if varname not in DOMAIN: print( ( "harvest_soil_nitrate %s[%s] " "found additional var: %s" ) % (siteid, YEAR, varname) ) DOMAIN.append(varname) key = ("%s|%s|%s|%s|%s|%s") % ( siteid, plotid, varname, depth, subsample, date if date is None else date.strftime("%Y-%m-%d"), ) if key in current: del current[key] continue try: pcursor.execute( """ INSERT into soil_data(uniqueid, plotid, varname, year, depth, value, subsample, sampledate) values (%s, %s, %s, %s, %s, %s, %s, %s) """, ( siteid, plotid, varname, YEAR, depth, val, subsample, date, ), ) except Exception as exp: print( ("site: %s year: %s HARVEST_SOIL_NITRATE TRACEBACK") % (siteid, YEAR) ) print(exp) print( ("%s %s %s %s %s %s") % (siteid, plotid, varname, depth, date, val) ) sys.exit() for key in current: (siteid, plotid, varname, depth, subsample, date) = key.split("|") if date != "None": datesql = " and sampledate = '%s' " % (date,) else: datesql = " and sampledate is null " print( ("h_soil_nitrate rm %s %s %s %s %s %s %s") % (YEAR, siteid, plotid, varname, depth, subsample, date) ) pcursor.execute( """ DELETE from soil_data where uniqueid = %s and plotid = %s and varname = %s and year = %s and depth = %s and subsample = %s """ + datesql + """ """, (siteid, plotid, varname, YEAR, depth, subsample), ) pcursor.close() pgconn.commit() pgconn.close()
def main(): """Go Main""" pgconn = psycopg2.connect(database="sustainablecorn") plotdf = read_sql( """ SELECT upper(uniqueid) as uniqueid, plotid from plotids """, pgconn, index_col=None, ) plotdf["ipm_usb"] = "no" drive = utils.get_driveclient(utils.get_config(), "cscap") spr_client = utils.get_spreadsheet_client(utils.get_config()) res = (drive.files().list( q=("'0B4fyEPcRW7IscDcweEwxUFV3YkU' in parents and " "title contains 'USB'")).execute()) U = {} rows = [] for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue uniqueid = item["title"].strip().split()[-1] spreadsheet = utils.Spreadsheet(spr_client, item["id"]) for worksheet in spreadsheet.worksheets: lf = spreadsheet.worksheets[worksheet].get_list_feed() for entry in lf.entry: d = entry.to_dict() rows.append(dict(uniqueid=uniqueid, plotid=d["plotid"])) plotipm = pd.DataFrame(rows) for _i, row in plotipm.iterrows(): df2 = plotdf[(plotdf["uniqueid"] == row["uniqueid"]) & (plotdf["plotid"] == row["plotid"])] if df2.empty: key = "%s_%s" % (row["uniqueid"], row["plotid"]) if key in U: continue U[key] = True print("Missing uniqueid: |%s| plotid: |%s|" % (row["uniqueid"], row["plotid"])) else: plotdf.at[df2.index, "ipm_usb"] = "yes" res = drive.files().list(q="title contains 'Plot Identifiers'").execute() for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue site = item["title"].split()[0] print(site) spreadsheet = utils.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() sheet = spreadsheet.worksheets["Sheet 1"] for entry in sheet.get_list_feed().entry: data = entry.to_dict() df2 = plotdf[(plotdf["uniqueid"] == site) & (plotdf["plotid"] == data["plotid"])] res = "no" if len(df2.index) == 1: res = df2["ipm_usb"].values[0] if data["ipmusb"] != res: print(" IPM_USB plotid: %s :: %s -> %s" % (data["plotid"], data["ipmusb"], res)) entry.set_value("ipmusb", res) spr_client.update(entry)
def main(): """Go Main""" config = util.get_config() pgconn = get_dbconn("sustainablecorn") pcursor = pgconn.cursor() # Get me a client, stat spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient(config) res = ( drive_client.files() .list(q="title contains 'Plot Identifiers'") .execute() ) translate = {"column": "col"} lookup = { "tillage": "TIL", "rotation": "ROT", "herbicide": "HERB", "drainage": "DWM", "nitrogen": "NIT", "landscape": "LND", } pcursor.execute("""DELETE from plotids""") removed = pcursor.rowcount added = 0 sheets = 0 for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue sheets += 1 spreadsheet = util.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() # A one off worksheet = spreadsheet.worksheets.get("Sheet 1_export") if worksheet is None: worksheet = spreadsheet.worksheets["Sheet 1"] for entry2 in worksheet.get_list_feed().entry: data = entry2.to_dict() cols = [] vals = [] for key in data.keys(): val = data[key] if val is None: continue if key in lookup: if data[key] is not None: val = ( data[key] .strip() .replace("[", "") .replace("]", "") .split()[0] ) if val not in ["N/A", "n/a"]: val = "%s%s" % (lookup.get(key, ""), val) if key == "uniqueid": val = val.upper() if key.startswith("no3") or key.startswith("_"): continue vals.append(val) cols.append(translate.get(key, key)) if not cols: print("No columns for '%s'?" % (item["title"],)) continue if "uniqueid" not in cols: print("No uniqueid column for '%s'" % (item["title"],)) sql = """ INSERT into plotids(%s) VALUES (%s) """ % ( ",".join(cols), ",".join(["%s"] * len(cols)), ) try: pcursor.execute(sql, vals) except Exception as exp: print(exp) print(item["title"]) print(cols) sys.exit() added += 1 # One-time correction of missing nitrogen entries # if data['landscape'] == 'N/A': # print("Updating %s %s for landscape" % (data['uniqueid'], # data['plotid'])) # entry2.set_value('landscape', 'n/a') # spr_client.update(entry2) print( ("harvest_plotids, removed: %s, added: %s, sheets: %s") % (removed, added, sheets) ) if removed > (added + 10): print("harvest_plotids, aborting due to large difference") sys.exit() pcursor.close() pgconn.commit() pgconn.close()
"""Hmmm, unsure.""" import sys import copy import pyiem.cscap_utils as util YEAR = sys.argv[1] config = util.get_config() spr_client = util.get_spreadsheet_client(config) sdc_feed = spr_client.get_list_feed(config["cscap"]["sdckey"], "od6") sdc, sdc_names = util.build_sdc(sdc_feed) drive_client = util.get_driveclient(config) ALLOWED = [ "SOIL26", "SOIL27", "SOIL28", "SOIL6", "SOIL11", "SOIL12", "SOIL13", "SOIL14", ] res = (drive_client.files().list( q="title contains 'Soil Texture Data Data'").execute()) for item in res["items"]:
def main(): """Go Main Go.""" config = util.get_config() drive = util.get_driveclient(config) print(drive.files().delete(fileId=sys.argv[1]).execute())
Harvest the Agronomic Data into the ISU Database """ from __future__ import print_function import sys import psycopg2 import pyiem.cscap_utils as util config = util.get_config() pgconn = psycopg2.connect(database='td', host=config['database']['host']) # Get me a client, stat spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient(config, "td") def delete_entries(current, siteid): for key in current: (plotid, varname) = key.split("|") print(('harvest_agronomic REMOVE %s %s %s' ) % (siteid, plotid, varname)) pcursor.execute("""DELETE from agronomic_data where uniqueid = %s and plotid = %s and varname = %s and year = %s """, (siteid, plotid, varname, YEAR)) res = drive_client.files().list(q="title contains 'Crop Yield Data'").execute() for item in res['items']:
"""Remove a column from all Agronomic Sheets!""" import pyiem.cscap_utils as util config = util.get_config() # Get me a client, stat spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient() res = drive_client.files().list(q="title contains 'Agronomic Data'").execute() for item in res['items']: spreadsheet = util.Spreadsheet(spr_client, item['id']) for yr in ["2011", "2012", "2013", "2014", "2015"]: spreadsheet.worksheets[yr].del_column("AGR392")