def test_cleanvalue(): """ see what we can do with cleaning strings""" assert abs(10.54 - csu.cleanvalue("10.54%")) < 0.01 assert csu.cleanvalue("Did NOt Collect") == "did not collect" assert csu.cleanvalue("<0.2") == "< 0.2" assert csu.cleanvalue(" ") is None assert csu.cleanvalue("NA") == "n/a" assert abs(csu.cleanvalue("10") - 10.0) < 0.0001 csu.cleanvalue("QQQ") assert "QQQ" in csu.CLEANVALUE_COMPLAINED
val = worksheet.get_cell_value(1, col) # print("considering col:%s val:'%s'" % (col, repr(val))) if val is None: continue if val.lower().replace(" ", "").find("plotid") == 0: plotidcol = col # print("Plot ID column is: %s val: %s" % (plotidcol, val)) if val.find("AGR") != 0: continue varname = val.split()[0] for row in range(3, worksheet.rows+1): plotid = worksheet.get_cell_value(row, plotidcol) if plotid is None: continue inval = worksheet.get_cell_value(row, col) val = util.cleanvalue(inval) if inval is not None and val is None: print(("harvest_agronomic found None. site: %s year: %s " " row: %s col: %s varname: %s" ) % (siteid, YEAR, row, col, varname)) # print row, col, plotid, varname, YEAR, val try: pcursor.execute(""" INSERT into agronomic_data (uniqueid, plotid, varname, year, value) values (%s, %s, %s, %s, %s) RETURNING value """, (siteid, plotid, varname, YEAR, val)) if pcursor.rowcount == 1: newvals += 1 except Exception, exp: print('HARVEST_AGRONOMIC TRACEBACK')
# if depth not in allowed_depths: # print 'site: %s year: %s has illegal depth: %s' % (siteid, YEAR, # depth) # continue if plotid is None or depth is None: continue subsample = "1" for col in range(3, worksheet.cols + 1): if worksheet.get_cell_value(1, col) is None: print( ('harvest_soil_texture Year: %s Site: %s Col: %s is null') % (YEAR, siteid, col)) continue varname = worksheet.get_cell_value(1, col).strip().split()[0] inval = worksheet.get_cell_value(row, col) val = util.cleanvalue(inval) if inval is not None and val is None: print(("harvest_soil_texture found None. site: %s year: %s " " row: %s col: %s varname: %s") % (siteid, YEAR, row, col, varname)) if varname == 'subsample': subsample = "%.0f" % (float(val), ) continue elif varname[:4] != 'SOIL': print(('Invalid varname: %s site: %s year: %s') % (worksheet.get_cell_value(1, col).strip(), siteid, YEAR)) continue # if subsample != "1": # continue try: pcursor.execute(
def main(): """Go""" config = util.get_config() pgconn = psycopg2.connect( database="sustainablecorn", host=config["database"]["host"] ) pcursor = pgconn.cursor() # Get me a client, stat spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient(config) res = ( drive_client.files() .list(q="title contains 'Soil Nitrate Data'") .execute() ) # Load up what data we have for this year current = {} pcursor.execute( """ SELECT uniqueid, plotid, varname, depth, subsample, sampledate from soil_data WHERE year = %s and varname in %s """, (YEAR, tuple(DOMAIN)), ) for row in pcursor: key = "%s|%s|%s|%s|%s|%s" % row current[key] = True for item in res["items"]: if item["mimeType"] != "application/vnd.google-apps.spreadsheet": continue spreadsheet = util.Spreadsheet(spr_client, item["id"]) spreadsheet.get_worksheets() worksheet = spreadsheet.worksheets.get(YEAR) if worksheet is None: continue worksheet.get_cell_feed() siteid = item["title"].split()[0] if worksheet.get_cell_value(1, 1) != "plotid": print( ('harvest_soil_nitrate: %s[%s] cell(1,1)="%s", skipping') % (siteid, YEAR, worksheet.get_cell_value(1, 1)) ) continue startcol = 3 if worksheet.get_cell_value(1, 2) == "depth": depthcol = 2 elif worksheet.get_cell_value(1, 3) == "depth": depthcol = 3 startcol = 4 if worksheet.get_cell_value(1, 2) == "location": locationcol = 2 else: locationcol = None for row in range(3, worksheet.rows + 1): plotid = worksheet.get_cell_value(row, 1) depth = worksheet.get_cell_value(row, depthcol) if depth.find(" to ") == -1: print( ("harvest_soil_nitrate found invalid depth: %s %s %s") % (depth, siteid, YEAR) ) continue if plotid is None or depth is None: continue subsample = "1" if locationcol is not None: subsample = worksheet.get_cell_value(row, locationcol) for col in range(startcol, worksheet.cols + 1): if worksheet.get_cell_value(1, col) is None: print( ("h_soil_nitrate site: %s year: %s col: %s is null") % (siteid, YEAR, col) ) continue colheading = worksheet.get_cell_value(1, col).strip() if not colheading.startswith("SOIL"): print( ("Invalid colheading: %s site: %s year: %s") % (colheading, siteid, YEAR) ) continue # Attempt to tease out the sampledate tokens = colheading.split() varname = tokens[0] datetest = tokens[1] if len(datetest.split("/")) == 3: date = datetime.datetime.strptime(datetest, "%m/%d/%Y") else: if row == 3: print( ("h_soil_nitrate %s[%s] unknown sample date %s") % (siteid, YEAR, repr(colheading)) ) date = DUMMY_DATES.get(datetest, None) if date is None and row == 3: print( ( "FIXME h_soil_nitrate %s[%s] " "double unknown date %s" ) % (siteid, YEAR, repr(colheading)) ) inval = worksheet.get_cell_value(row, col) val = util.cleanvalue(inval) if inval is not None and val is None: print( ( "harvest_soil_nitrate found None. " "site: %s year: %s " " row: %s col: %s varname: %s" ) % (siteid, YEAR, row, col, varname) ) if varname not in DOMAIN: print( ( "harvest_soil_nitrate %s[%s] " "found additional var: %s" ) % (siteid, YEAR, varname) ) DOMAIN.append(varname) key = ("%s|%s|%s|%s|%s|%s") % ( siteid, plotid, varname, depth, subsample, date if date is None else date.strftime("%Y-%m-%d"), ) if key in current: del current[key] continue try: pcursor.execute( """ INSERT into soil_data(uniqueid, plotid, varname, year, depth, value, subsample, sampledate) values (%s, %s, %s, %s, %s, %s, %s, %s) """, ( siteid, plotid, varname, YEAR, depth, val, subsample, date, ), ) except Exception as exp: print( ("site: %s year: %s HARVEST_SOIL_NITRATE TRACEBACK") % (siteid, YEAR) ) print(exp) print( ("%s %s %s %s %s %s") % (siteid, plotid, varname, depth, date, val) ) sys.exit() for key in current: (siteid, plotid, varname, depth, subsample, date) = key.split("|") if date != "None": datesql = " and sampledate = '%s' " % (date,) else: datesql = " and sampledate is null " print( ("h_soil_nitrate rm %s %s %s %s %s %s %s") % (YEAR, siteid, plotid, varname, depth, subsample, date) ) pcursor.execute( """ DELETE from soil_data where uniqueid = %s and plotid = %s and varname = %s and year = %s and depth = %s and subsample = %s """ + datesql + """ """, (siteid, plotid, varname, YEAR, depth, subsample), ) pcursor.close() pgconn.commit() pgconn.close()
def test_cleanvalue(self): """ see what we can do with cleaning strings""" self.assertAlmostEquals(10.54, cleanvalue("10.54%"), 2) self.assertEquals(cleanvalue('Did NOt Collect'), 'did not collect') self.assertEquals(cleanvalue('<0.2'), '< 0.2') self.assertTrue(cleanvalue(' ') is None)
def test_cleanvalue(): """ see what we can do with cleaning strings""" assert abs(10.54 - cleanvalue("10.54%")) < 0.01 assert cleanvalue('Did NOt Collect') == 'did not collect' assert cleanvalue('<0.2') == '< 0.2' assert cleanvalue(' ') is None
def main(): """Go""" config = util.get_config() pgconn = psycopg2.connect(database='sustainablecorn', host=config['database']['host']) pcursor = pgconn.cursor() # Get me a client, stat spr_client = util.get_spreadsheet_client(config) drive_client = util.get_driveclient(config) res = drive_client.files( ).list(q="title contains 'Soil Nitrate Data'").execute() # Load up what data we have for this year current = {} pcursor.execute(""" SELECT uniqueid, plotid, varname, depth, subsample, sampledate from soil_data WHERE year = %s and varname in %s """, (YEAR, tuple(DOMAIN))) for row in pcursor: key = "%s|%s|%s|%s|%s|%s" % row current[key] = True for item in res['items']: if item['mimeType'] != 'application/vnd.google-apps.spreadsheet': continue spreadsheet = util.Spreadsheet(spr_client, item['id']) spreadsheet.get_worksheets() worksheet = spreadsheet.worksheets.get(YEAR) if worksheet is None: # print("Missing Year: %s from %s" % (YEAR, spreadsheet.title)) continue worksheet.get_cell_feed() siteid = item['title'].split()[0] # print 'Processing %s Soil Nitrate Year %s' % (siteid, YEAR), if worksheet.get_cell_value(1, 1) != 'plotid': print(('harvest_soil_nitrate: %s[%s] cell(1,1)="%s", skipping' ) % (siteid, YEAR, worksheet.get_cell_value(1, 1))) continue startcol = 3 if worksheet.get_cell_value(1, 2) == 'depth': depthcol = 2 elif worksheet.get_cell_value(1, 3) == 'depth': depthcol = 3 startcol = 4 if worksheet.get_cell_value(1, 2) == 'location': locationcol = 2 else: locationcol = None for row in range(3, worksheet.rows+1): plotid = worksheet.get_cell_value(row, 1) depth = worksheet.get_cell_value(row, depthcol) if depth.find(" to ") == -1: print(("harvest_soil_nitrate found invalid depth: %s %s %s" ) % (depth, siteid, YEAR)) continue if plotid is None or depth is None: continue subsample = "1" if locationcol is not None: subsample = worksheet.get_cell_value(row, locationcol) for col in range(startcol, worksheet.cols+1): if worksheet.get_cell_value(1, col) is None: print(("h_soil_nitrate site: %s year: %s col: %s is null" ) % (siteid, YEAR, col)) continue colheading = worksheet.get_cell_value(1, col).strip() if not colheading.startswith('SOIL'): print(('Invalid colheading: %s site: %s year: %s' ) % (colheading, siteid, YEAR)) continue # Attempt to tease out the sampledate tokens = colheading.split() varname = tokens[0] datetest = tokens[1] if len(datetest.split("/")) == 3: date = datetime.datetime.strptime(datetest, '%m/%d/%Y') else: if row == 3: print(("h_soil_nitrate %s[%s] unknown sample date %s" ) % (siteid, YEAR, repr(colheading))) date = DUMMY_DATES.get(datetest, None) if date is None and row == 3: print(("FIXME h_soil_nitrate %s[%s] " "double unknown date %s" ) % (siteid, YEAR, repr(colheading))) inval = worksheet.get_cell_value(row, col) val = util.cleanvalue(inval) if inval is not None and val is None: print(("harvest_soil_nitrate found None. " "site: %s year: %s " " row: %s col: %s varname: %s" ) % (siteid, YEAR, row, col, varname)) if varname not in DOMAIN: print(("harvest_soil_nitrate %s[%s] " "found additional var: %s" ) % (siteid, YEAR, varname)) DOMAIN.append(varname) key = ("%s|%s|%s|%s|%s|%s" ) % (siteid, plotid, varname, depth, subsample, date if date is None else date.strftime("%Y-%m-%d")) if key in current: del current[key] continue try: pcursor.execute(""" INSERT into soil_data(uniqueid, plotid, varname, year, depth, value, subsample, sampledate) values (%s, %s, %s, %s, %s, %s, %s, %s) """, (siteid, plotid, varname, YEAR, depth, val, subsample, date)) except Exception as exp: print(('site: %s year: %s HARVEST_SOIL_NITRATE TRACEBACK' ) % (siteid, YEAR)) print(exp) print(('%s %s %s %s %s %s' ) % (siteid, plotid, varname, depth, date, val)) sys.exit() for key in current: (siteid, plotid, varname, depth, subsample, date) = key.split("|") if date != 'None': datesql = " and sampledate = '%s' " % (date, ) else: datesql = " and sampledate is null " print(('h_soil_nitrate rm %s %s %s %s %s %s %s' ) % (YEAR, siteid, plotid, varname, depth, subsample, date)) pcursor.execute(""" DELETE from soil_data where uniqueid = %s and plotid = %s and varname = %s and year = %s and depth = %s and subsample = %s """ + datesql + """ """, (siteid, plotid, varname, YEAR, depth, subsample)) # print "...done" pcursor.close() pgconn.commit() pgconn.close()