def do(spreadkey, sheetlabel, tablename, cols): """Process""" cursor = pgconn.cursor() cursor.execute("""DROP TABLE IF EXISTS %s""" % (tablename, )) spread = util.Spreadsheet(spr_client, spreadkey) listfeed = spr_client.get_list_feed(spreadkey, spread.worksheets[sheetlabel].id) for i, entry in enumerate(listfeed.entry): row = entry.to_dict() if i == 0: # Create the table sql = "CREATE TABLE %s (" % (tablename, ) for key in cols: sql += "%s varchar," % (cleankey(key), ) sql = sql[:-1] + ")" cursor.execute(sql) cursor.execute(""" GRANT SELECT on %s to nobody,apache """ % (tablename, )) values = [] for key in cols: val = row[cleankey(key)] if val is None: val = "Unknown" values.append(val.strip()) sql = "INSERT into %s (%s) VALUES (%s)" % ( tablename, ",".join(cols), ",".join(["%s"] * len(cols))) cursor.execute(sql, values) cursor.close() pgconn.commit()
def process6(spreadkey): sprclient = util.get_spreadsheet_client(util.get_config()) spreadsheet = util.Spreadsheet(sprclient, spreadkey) rows = [] for yr in spreadsheet.worksheets: lf = spreadsheet.worksheets[yr].get_list_feed() for i, entry in enumerate(lf.entry): if i == 0: continue rows.append(entry.to_dict()) df = pd.DataFrame(rows) df['valid'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M:%S') res = {} print(df.columns) for col in df.columns: if col.find('wat4') == -1: print('skipping column %s' % (repr(col), )) continue plotid = col.replace('wat4watertabledepth', '').upper() df['depth'] = pd.to_numeric( df['%swat4watertabledepth' % (plotid.lower(), )], errors='coerce') * 1. # TODO: watch the units here! res[plotid] = df[['valid', 'depth']].copy() return res
def main(): """Go Main Go""" config = util.get_config() spr_client = util.get_spreadsheet_client(config) drive = util.get_driveclient(config) # Fake last conditional to make it easy to reprocess one site... res = (drive.files().list(q=("title contains 'Soil Texture Data'"), maxResults=999).execute()) HEADERS = [ "uniqueid", "plotid", "depth", "tillage", "rotation", "soil6", "nitrogen", "drainage", "rep", "subsample", "landscape", "notes", "herbicide", "sampledate", ] sz = len(res["items"]) for i, item in enumerate(res["items"]): if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue spreadsheet = util.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() for year in spreadsheet.worksheets: print('%3i/%3i sheet "%s" for "%s"' % (i + 1, sz, year, item["title"])) lf = spreadsheet.worksheets[year].get_list_feed() for rownum, entry in enumerate(lf.entry): dirty = False data = entry.to_dict() for key in ["soil13", "soil14"]: if key not in data: continue value = data[key] if rownum == 1 and value == "%": print("updating % to g/kg") entry.set_value(key, "g/kg") dirty = True continue if rownum >= 2: try: newvalue = float(value) * 10.0 except Exception: continue print("%s updating %s to %s" % (key, value, newvalue)) entry.set_value(key, "%.4f" % (newvalue, )) dirty = True if dirty: util.exponential_backoff(spr_client.update, entry)
def main(): """Go Main!""" pgconn = get_dbconn("sustainablecorn") cursor = pgconn.cursor() drive = util.get_driveclient(util.get_config(), "cscap") spr_client = util.get_spreadsheet_client(util.get_config()) res = drive.files().list(q="title contains 'Plot Identifiers'").execute() for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue site = item["title"].split()[0] print(site) cursor.execute( """SELECT distinct plotid from agronomic_data WHERE site = %s""", (site,), ) agr_plotids = [row[0] for row in cursor] cursor.execute( """SELECT distinct plotid from soil_data WHERE site = %s""", (site,), ) soil_plotids = [row[0] for row in cursor] spreadsheet = util.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() sheet = spreadsheet.worksheets["Sheet 1"] for entry in sheet.get_list_feed().entry: dirty = False data = entry.to_dict() res = "yes" if data["plotid"] not in agr_plotids: res = "no" # print("%s %s" % (data['plotid'], agr_plotids)) if data["agro"] != res: print( " AGR plotid: %s :: %s -> %s" % (data["plotid"], data["agro"], res) ) entry.set_value("agro", res) dirty = True res = "yes" if data["plotid"] not in soil_plotids: res = "no" # print("%s %s" % (data['plotid'], soil_plotids)) if data["soil"] != res: print( " SOIL plotid: %s :: %s -> %s" % (data["plotid"], data["soil"], res) ) entry.set_value("soil", res) dirty = True if dirty: spr_client.update(entry)
def main(): """Go Main Go.""" config = util.get_config() # Get me a client, stat spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient() res = (drive_client.files().list( q="title contains 'Agronomic Data'").execute()) for item in res["items"]: spreadsheet = util.Spreadsheet(spr_client, item["id"]) for yr in ["2011", "2012", "2013", "2014", "2015"]: spreadsheet.worksheets[yr].del_column("AGR392")
def do_notes(): spread = util.Spreadsheet(sprclient, '1tQvw-TQFtBI6xcsbZpaCHtYF7oKZEXNXy-CZHBr8Lh8') sheet = spread.worksheets['Research Site Metadata'] for entry in sheet.get_list_feed().entry: d = entry.to_dict() if d['uniqueid'] not in WANTED: continue for year in range(2011, 2016): entry = gdata.spreadsheets.data.ListEntry() entry.set_value('uniqueid', WANTED[d['uniqueid']]) entry.set_value('calendaryear', str(year)) entry.set_value('cropyear', str(year)) entry.set_value('notes', d['notes%s' % (year, )]) entry.set_value('editedby', '*****@*****.**') entry.set_value('updated', '4/20/2016 14:00') sprclient.add_list_entry(entry, TD_ID, td_spread.worksheets['Notes'].id)
def process5(spreadkey): """ SERF, round 2""" sprclient = util.get_spreadsheet_client(util.get_config()) spreadsheet = util.Spreadsheet(sprclient, spreadkey) lf = spreadsheet.worksheets['2011-2015'].get_list_feed() rows = [] for entry in lf.entry: rows.append(entry.to_dict()) df = pd.DataFrame(rows) df['valid'] = pd.to_datetime(df['valid']) res = {} for plotid in [str(s) for s in range(1, 9)]: df['plot%swatertablemm' % (plotid, )] = pd.to_numeric( df['plot%swatertablemm' % (plotid, )], errors='coerce') res[plotid] = df[['valid', 'plot%swatertablemm' % (plotid, )]].copy() res[plotid].columns = ['valid', 'depth'] return res
def main(): """Go Main!""" config = util.get_config() spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient(config) treat_feed = spr_client.get_list_feed(config["cscap"]["treatkey"], "od6") treatments, treatment_names = util.build_treatments(treat_feed) res = ( drive_client.files() .list(q="title contains 'Plot Identifiers'") .execute() ) for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue spreadsheet = util.Spreadsheet(spr_client, item["id"]) print("Processing '%s'..." % (item["title"],)) spreadsheet.get_worksheets() worksheet = spreadsheet.worksheets["Sheet 1"] for entry in worksheet.get_list_feed().entry: data = entry.to_dict() sitekey = data.get("uniqueid").lower() if sitekey is None: continue trt = treatments[sitekey] if data[COLKEY] is not None and data[COLKEY] != "": continue if len(trt[TRTKEY]) != 2: print("can't deal with this: %s" % (trt[TRTKEY],)) break newval = treatment_names.get(trt[TRTKEY][1], "") entry.set_value(COLKEY, newval) print( ("Setting plotid: %s uniqueid: %s column:%s to %s") % (data.get("plotid"), sitekey, COLKEY, newval) ) spr_client.update(entry)
"SOIL26", "SOIL27", "SOIL28", "SOIL6", "SOIL11", "SOIL12", "SOIL13", "SOIL14", ] res = (drive_client.files().list( q="title contains 'Soil Texture Data Data'").execute()) for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue spreadsheet = util.Spreadsheet(spr_client, item["id"]) sitekey = item["title"].split()[0].lower() print("------------> %s [%s] [%s]" % (YEAR, sitekey, item["title"])) if YEAR not in spreadsheet.worksheets: print("%s does not have Year: %s in worksheet" % (sitekey, YEAR)) continue worksheet = spreadsheet.worksheets[YEAR] worksheet.get_list_feed() if len(worksheet.list_feed.entry) == 0: print(" EMPTY sheet, skipping") continue entry2 = worksheet.list_feed.entry[0] data = entry2.to_dict() keys = data.keys() shouldhave = copy.deepcopy(sdc[YEAR][sitekey]) error = False
body = { "title": title, "mimeType": "application/vnd.google-apps.spreadsheet", "parents": [{"id": colfolder}], } print("Creating Tile Flow Sheet: %s in %s" % (title, colfolder)) res = drive.files().insert(body=body).execute() return res["id"] return res["items"][0]["id"] config = util.get_config() ssclient = util.get_spreadsheet_client(config) drive = util.get_driveclient(config) msheet = util.Spreadsheet(ssclient, config["td"]["site_measurements"]) msheet.get_worksheets() sheet = msheet.worksheets["Plot ID"] sheet.get_list_feed() sites = {} pis = {} for entry in sheet.list_feed.entry: row = entry.to_dict() if row["tileflowandtilenitrate-nyesno"] == "NO": continue site = row["siteid"] d = sites.setdefault(site, []) d.append(row["plotid"]) pis[site] = row["leadpi"] # Use SDC to figure out years
def main(): """Do Main Things""" sprclient = util.get_spreadsheet_client(util.get_config()) spreadsheet = util.Spreadsheet(sprclient, BACKEND) spreadsheet.get_worksheets() sheet = spreadsheet.worksheets["Field Operations"] for entry in sheet.get_list_feed().entry: row = entry.to_dict() if row["operation"] != "fertilizer_synthetic": continue if (row["productrate"] is None or row["productrate"] == "-1.0" or row["productrate"] == ""): option = "B" # Option B, values are in lbs per acre phosphoruse = 0.0 potassium = 0.0 nitrogen = float2(row["nitrogen"]) * KGHA_LBA if row["phosphorus"] is not None and float2(row["phosphorus"]) > 0: phosphoruse = float2(row["phosphorus"]) * KGHA_LBA if row["phosphate"] is not None and float2(row["phosphate"]) > 0: phosphoruse = float2(row["phosphate"]) * KGHA_LBA * 0.437 if row["potassium"] is not None and float2(row["potassium"]) > 0: potassium = float2(row["potassium"]) * KGHA_LBA if row["potash"] is not None and float2(row["potash"]) > 0: potassium = float(row["potash"]) * KGHA_LBA * 0.830 sulfur = float2(row["sulfur"]) * KGHA_LBA zinc = float2(row["zinc"]) * KGHA_LBA magnesium = float2(row["magnesium"]) * KGHA_LBA calcium = float2(row["calcium"]) * KGHA_LBA iron = float2(row["iron"]) * KGHA_LBA else: option = "A" # Option A, straight percentages prate = float2(row["productrate"]) nitrogen = prate * float2(row["nitrogen"]) / 100.0 phosphoruse = 0.0 potassium = 0.0 if row["phosphorus"] is not None and float2(row["phosphorus"]) > 0: phosphoruse = prate * float2(row["phosphorus"]) / 100.0 if row["phosphate"] is not None and float2(row["phosphate"]) > 0: phosphoruse = prate * float2(row["phosphate"]) / 100.0 * 0.437 if row["potassium"] is not None and float2(row["potassium"]) > 0: potassium = prate * float2(row["potassium"]) / 100.0 if row["potash"] is not None and float2(row["potash"]) > 0: potassium = prate * float(row["potash"]) / 100.0 * 0.830 sulfur = prate * float2(row["sulfur"]) / 100.0 zinc = prate * float2(row["zinc"]) / 100.0 magnesium = prate * float2(row["magnesium"]) / 100.0 calcium = prate * float2(row["calcium"]) / 100.0 iron = prate * float2(row["iron"]) / 100.0 print(("Option: %s\n nitrogen: Old: %s -> New: %s") % (option, row["nitrogenelem"], nitrogen)) entry.set_value("nitrogenelem", "%.2f" % (nitrogen, )) entry.set_value("phosphoruselem", "%.2f" % (phosphoruse, )) entry.set_value("potassiumelem", "%.2f" % (potassium, )) entry.set_value("sulfurelem", "%.2f" % (sulfur, )) entry.set_value("zincelem", "%.2f" % (zinc, )) entry.set_value("magnesiumelem", "%.2f" % (magnesium, )) entry.set_value("calciumelem", "%.2f" % (calcium, )) entry.set_value("ironelem", "%.2f" % (iron, )) # if option == 'B': util.exponential_backoff(sprclient.update, entry)
def main(): """Go Main""" pgconn = psycopg2.connect(database="sustainablecorn") plotdf = read_sql( """ SELECT upper(uniqueid) as uniqueid, plotid from plotids """, pgconn, index_col=None, ) plotdf["ipm_usb"] = "no" drive = utils.get_driveclient(utils.get_config(), "cscap") spr_client = utils.get_spreadsheet_client(utils.get_config()) res = (drive.files().list( q=("'0B4fyEPcRW7IscDcweEwxUFV3YkU' in parents and " "title contains 'USB'")).execute()) U = {} rows = [] for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue uniqueid = item["title"].strip().split()[-1] spreadsheet = utils.Spreadsheet(spr_client, item["id"]) for worksheet in spreadsheet.worksheets: lf = spreadsheet.worksheets[worksheet].get_list_feed() for entry in lf.entry: d = entry.to_dict() rows.append(dict(uniqueid=uniqueid, plotid=d["plotid"])) plotipm = pd.DataFrame(rows) for _i, row in plotipm.iterrows(): df2 = plotdf[(plotdf["uniqueid"] == row["uniqueid"]) & (plotdf["plotid"] == row["plotid"])] if df2.empty: key = "%s_%s" % (row["uniqueid"], row["plotid"]) if key in U: continue U[key] = True print("Missing uniqueid: |%s| plotid: |%s|" % (row["uniqueid"], row["plotid"])) else: plotdf.at[df2.index, "ipm_usb"] = "yes" res = drive.files().list(q="title contains 'Plot Identifiers'").execute() for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue site = item["title"].split()[0] print(site) spreadsheet = utils.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() sheet = spreadsheet.worksheets["Sheet 1"] for entry in sheet.get_list_feed().entry: data = entry.to_dict() df2 = plotdf[(plotdf["uniqueid"] == site) & (plotdf["plotid"] == data["plotid"])] res = "no" if len(df2.index) == 1: res = df2["ipm_usb"].values[0] if data["ipmusb"] != res: print(" IPM_USB plotid: %s :: %s -> %s" % (data["plotid"], data["ipmusb"], res)) entry.set_value("ipmusb", res) spr_client.update(entry)
"""One off.""" import datetime import pyiem.cscap_utils as util sprclient = util.get_spreadsheet_client(util.get_config()) spread = util.Spreadsheet(sprclient, "1FxKx0GDJxv_8fIjKe2xRJ58FGILLlUSXcb6EuSLQSrI") spread.get_worksheets() sheet = spread.worksheets["BRADFORD.A"] for entry in sheet.get_list_feed().entry: d = entry.to_dict() if d["ghg2"] in [None, "Sampling Date", "-"]: continue ts = datetime.datetime.strptime(d["ghg2"][:15], "%a %b %d %Y") entry.set_value("ghg200", ts.strftime("%m/%d/%Y")) sprclient.update(entry)
def main(): """Do Main""" pgconn = psycopg2.connect(database="sustainablecorn") plotdf = read_sql( """SELECT upper(uniqueid) as uniqueid, upper(plotid) as plotid from plotids""", pgconn, index_col=None, ) plotdf["ghg"] = "no" drive = utils.get_driveclient(utils.get_config(), "cscap") spr_client = utils.get_spreadsheet_client(utils.get_config()) X = { "2011": "1DSHcfeBNJArVowk0CG_YvzI0YQnHIZXhcxOjBi2fPM4", "2012": "1ax_N80tIBBKEnWDnrGxsK4KUa1ssokwMxQZNVHEWWM8", "2013": "1UY5JYKlBHDElwljnEC-1tF7CozplAfyGpbkbd0OFqsA", "2014": "12NqffqVMQ0M4PMT_CP5hYfmydC-vzXQ0lFHbKwwfqzg", "2015": "1FxKx0GDJxv_8fIjKe2xRJ58FGILLlUSXcb6EuSLQSrI", } years = X.keys() years.sort() unknown = ["UniqueID_PlotID", "-_-"] rows = [] for year in years: spreadsheet = utils.Spreadsheet(spr_client, X.get(year)) for worksheet in spreadsheet.worksheets: lf = spreadsheet.worksheets[worksheet].get_list_feed() for entry in lf.entry: d = entry.to_dict() if d.get("uniqueid") is None or d.get("plotid") is None: continue rows.append(d) df2 = plotdf[(plotdf["uniqueid"] == d["uniqueid"].upper()) & (plotdf["plotid"] == d["plotid"].upper())] if len(df2.index) == 0: key = "%s_%s" % (d["uniqueid"], d["plotid"]) if key in unknown: continue unknown.append(key) print(("%s[%s] Unknown uniqueid: |%s| plotid: |%s|") % (year, worksheet, d["uniqueid"], d["plotid"])) else: idx = plotdf[ (plotdf["uniqueid"] == d["uniqueid"].upper()) & (plotdf["plotid"] == d["plotid"].upper())].index plotdf.at[idx, "ghg"] = "yes" df = pd.DataFrame(rows) writer = pd.ExcelWriter("output.xlsx") df.to_excel(writer, "Sheet1") writer.save() res = drive.files().list(q="title contains 'Plot Identifiers'").execute() for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue site = item["title"].split()[0] print(site) spreadsheet = utils.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() sheet = spreadsheet.worksheets["Sheet 1"] for entry in sheet.get_list_feed().entry: data = entry.to_dict() df2 = plotdf[(plotdf["uniqueid"] == site) & (plotdf["plotid"] == data["plotid"])] res = "no" if len(df2.index) == 1: res = df2["ghg"].values[0] if data["ghg"] != res: print(" GHG plotid: %s :: %s -> %s" % (data["plotid"], data["ghg"], res)) entry.set_value("ghg", res) spr_client.update(entry)
current = {} pcursor.execute( """SELECT uniqueid, plotid, varname, depth, subsample, value from soil_data WHERE year = %s and varname in %s """, (YEAR, tuple(DOMAIN))) for row in pcursor: key = "%s|%s|%s|%s|%s" % row[:5] current[key] = row[4] res = drive_client.files().list( q="title contains 'Soil Texture Data'").execute() for item in res['items']: if item['mimeType'] != 'application/vnd.google-apps.spreadsheet': continue spreadsheet = util.Spreadsheet(spr_client, item['id']) spreadsheet.get_worksheets() if YEAR not in spreadsheet.worksheets: # print(("Missing %s from %s") % (YEAR, spreadsheet.title)) continue worksheet = spreadsheet.worksheets[YEAR] worksheet.get_cell_feed() siteid = item['title'].split()[0] # print 'Processing %s Soil Texture Year %s' % (siteid, YEAR) if (worksheet.get_cell_value(1, 1) != 'plotid' or worksheet.get_cell_value(1, 2) != 'depth'): print(('harvest_soil_texture %s[%s] headers: "%s","%s", skipping') % (siteid, YEAR, worksheet.get_cell_value( 1, 1), worksheet.get_cell_value(1, 2))) continue
def main(): """Go Main""" config = util.get_config() pgconn = get_dbconn("sustainablecorn") pcursor = pgconn.cursor() # Get me a client, stat spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient(config) res = ( drive_client.files() .list(q="title contains 'Plot Identifiers'") .execute() ) translate = {"column": "col"} lookup = { "tillage": "TIL", "rotation": "ROT", "herbicide": "HERB", "drainage": "DWM", "nitrogen": "NIT", "landscape": "LND", } pcursor.execute("""DELETE from plotids""") removed = pcursor.rowcount added = 0 sheets = 0 for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue sheets += 1 spreadsheet = util.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() # A one off worksheet = spreadsheet.worksheets.get("Sheet 1_export") if worksheet is None: worksheet = spreadsheet.worksheets["Sheet 1"] for entry2 in worksheet.get_list_feed().entry: data = entry2.to_dict() cols = [] vals = [] for key in data.keys(): val = data[key] if val is None: continue if key in lookup: if data[key] is not None: val = ( data[key] .strip() .replace("[", "") .replace("]", "") .split()[0] ) if val not in ["N/A", "n/a"]: val = "%s%s" % (lookup.get(key, ""), val) if key == "uniqueid": val = val.upper() if key.startswith("no3") or key.startswith("_"): continue vals.append(val) cols.append(translate.get(key, key)) if not cols: print("No columns for '%s'?" % (item["title"],)) continue if "uniqueid" not in cols: print("No uniqueid column for '%s'" % (item["title"],)) sql = """ INSERT into plotids(%s) VALUES (%s) """ % ( ",".join(cols), ",".join(["%s"] * len(cols)), ) try: pcursor.execute(sql, vals) except Exception as exp: print(exp) print(item["title"]) print(cols) sys.exit() added += 1 # One-time correction of missing nitrogen entries # if data['landscape'] == 'N/A': # print("Updating %s %s for landscape" % (data['uniqueid'], # data['plotid'])) # entry2.set_value('landscape', 'n/a') # spr_client.update(entry2) print( ("harvest_plotids, removed: %s, added: %s, sheets: %s") % (removed, added, sheets) ) if removed > (added + 10): print("harvest_plotids, aborting due to large difference") sys.exit() pcursor.close() pgconn.commit() pgconn.close()
"""Copy management entries from CSCAP to TD""" import pyiem.cscap_utils as util import gdata.spreadsheets.data CSCAP_ID = '1CdPi6IEnO3T35t-OFQajKYzFLpu7hwOaWdL2FG1_kVA' TD_ID = '1m797EoMbtAvasqum-pB5ykVMeHsGWwcVvre8MbvE-xM' sprclient = util.get_spreadsheet_client(util.get_config()) cscap_spread = util.Spreadsheet(sprclient, CSCAP_ID) cscap_spread.get_worksheets() td_spread = util.Spreadsheet(sprclient, TD_ID) td_spread.get_worksheets() WANTED = {'SERF': 'SERF_IA', 'DPAC': 'DPAC', 'STJOHNS': 'STJOHNS'} def get_cscap_sheet(sheetname): sheet = cscap_spread.worksheets[sheetname] return sheet.get_list_feed() def get_td_sheet(sheetname): sheet = td_spread.worksheets[sheetname] return sheet.get_list_feed() def one2one(inname, outname, operations): sheetin = get_cscap_sheet(inname)
def main(): """Go Main""" config = util.get_config() pgconn = psycopg2.connect( database="sustainablecorn", user="******", host=config["database"]["host"], ) pcursor = pgconn.cursor() # Get me a client, stat spr_client = util.get_spreadsheet_client(config) spread = util.Spreadsheet(spr_client, config["cscap"]["manstore"]) translate = {"date": "valid"} tabs = ["Field Operations", "Management", "Pesticides", "DWM", "Notes"] tablenames = ["operations", "management", "pesticides", "dwm", "notes"] for sheetkey, table in zip(tabs, tablenames): pcursor.execute("""DELETE from """ + table) deleted = pcursor.rowcount sheet = spread.worksheets[sheetkey] added = 0 for rownum, entry in enumerate(sheet.get_list_feed().entry): # Skip the first row of units if rownum == 0: continue d = entry.to_dict() cols = [] vals = [] for key in d.keys(): if key.startswith("gio"): continue val = d[key] if key in [ "date", "biomassdate1", "biomassdate2", "outletdate", ]: val = val if val not in ["unknown", "N/A", "n/a"] else None vals.append(val) cols.append(translate.get(key, key)) sql = """ INSERT into %s(%s) VALUES (%s) """ % ( table, ",".join(cols), ",".join(["%s"] * len(cols)), ) try: pcursor.execute(sql, vals) except Exception as exp: print("CSCAP harvest_management traceback") print(exp) for a, b in zip(cols, vals): print(" |%s| -> |%s|" % (a, b)) return added += 1 print( ("harvest_management %16s added:%4s deleted:%4s") % (sheetkey, added, deleted) ) pcursor.close() pgconn.commit() pgconn.close()
def main(): """Go""" config = util.get_config() pgconn = psycopg2.connect( database="sustainablecorn", host=config["database"]["host"] ) pcursor = pgconn.cursor() # Get me a client, stat spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient(config) res = ( drive_client.files() .list(q="title contains 'Soil Nitrate Data'") .execute() ) # Load up what data we have for this year current = {} pcursor.execute( """ SELECT uniqueid, plotid, varname, depth, subsample, sampledate from soil_data WHERE year = %s and varname in %s """, (YEAR, tuple(DOMAIN)), ) for row in pcursor: key = "%s|%s|%s|%s|%s|%s" % row current[key] = True for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue spreadsheet = util.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() worksheet = spreadsheet.worksheets.get(YEAR) if worksheet is None: continue worksheet.get_cell_feed() siteid = item["title"].split()[0] if worksheet.get_cell_value(1, 1) != "plotid": print( ('harvest_soil_nitrate: %s[%s] cell(1,1)="%s", skipping') % (siteid, YEAR, worksheet.get_cell_value(1, 1)) ) continue startcol = 3 if worksheet.get_cell_value(1, 2) == "depth": depthcol = 2 elif worksheet.get_cell_value(1, 3) == "depth": depthcol = 3 startcol = 4 if worksheet.get_cell_value(1, 2) == "location": locationcol = 2 else: locationcol = None for row in range(3, worksheet.rows + 1): plotid = worksheet.get_cell_value(row, 1) depth = worksheet.get_cell_value(row, depthcol) if depth.find(" to ") == -1: print( ("harvest_soil_nitrate found invalid depth: %s %s %s") % (depth, siteid, YEAR) ) continue if plotid is None or depth is None: continue subsample = "1" if locationcol is not None: subsample = worksheet.get_cell_value(row, locationcol) for col in range(startcol, worksheet.cols + 1): if worksheet.get_cell_value(1, col) is None: print( ("h_soil_nitrate site: %s year: %s col: %s is null") % (siteid, YEAR, col) ) continue colheading = worksheet.get_cell_value(1, col).strip() if not colheading.startswith("SOIL"): print( ("Invalid colheading: %s site: %s year: %s") % (colheading, siteid, YEAR) ) continue # Attempt to tease out the sampledate tokens = colheading.split() varname = tokens[0] datetest = tokens[1] if len(datetest.split("/")) == 3: date = datetime.datetime.strptime(datetest, "%m/%d/%Y") else: if row == 3: print( ("h_soil_nitrate %s[%s] unknown sample date %s") % (siteid, YEAR, repr(colheading)) ) date = DUMMY_DATES.get(datetest, None) if date is None and row == 3: print( ( "FIXME h_soil_nitrate %s[%s] " "double unknown date %s" ) % (siteid, YEAR, repr(colheading)) ) inval = worksheet.get_cell_value(row, col) val = util.cleanvalue(inval) if inval is not None and val is None: print( ( "harvest_soil_nitrate found None. " "site: %s year: %s " " row: %s col: %s varname: %s" ) % (siteid, YEAR, row, col, varname) ) if varname not in DOMAIN: print( ( "harvest_soil_nitrate %s[%s] " "found additional var: %s" ) % (siteid, YEAR, varname) ) DOMAIN.append(varname) key = ("%s|%s|%s|%s|%s|%s") % ( siteid, plotid, varname, depth, subsample, date if date is None else date.strftime("%Y-%m-%d"), ) if key in current: del current[key] continue try: pcursor.execute( """ INSERT into soil_data(uniqueid, plotid, varname, year, depth, value, subsample, sampledate) values (%s, %s, %s, %s, %s, %s, %s, %s) """, ( siteid, plotid, varname, YEAR, depth, val, subsample, date, ), ) except Exception as exp: print( ("site: %s year: %s HARVEST_SOIL_NITRATE TRACEBACK") % (siteid, YEAR) ) print(exp) print( ("%s %s %s %s %s %s") % (siteid, plotid, varname, depth, date, val) ) sys.exit() for key in current: (siteid, plotid, varname, depth, subsample, date) = key.split("|") if date != "None": datesql = " and sampledate = '%s' " % (date,) else: datesql = " and sampledate is null " print( ("h_soil_nitrate rm %s %s %s %s %s %s %s") % (YEAR, siteid, plotid, varname, depth, subsample, date) ) pcursor.execute( """ DELETE from soil_data where uniqueid = %s and plotid = %s and varname = %s and year = %s and depth = %s and subsample = %s """ + datesql + """ """, (siteid, plotid, varname, YEAR, depth, subsample), ) pcursor.close() pgconn.commit() pgconn.close()