# performance data which aren't in the master list from TMI, note it and add # them anyway. perffields = ['clubnumber', 'clubname', 'district', 'area', 'division', 'eligibility', 'color', 'membase', 'activemembers', 'goalsmet'] curs.execute("SELECT clubnumber, clubname, district, area, division, clubstatus as eligibility, color, membase, activemembers, goalsmet FROM clubperf WHERE entrytype = 'L' and district = %s", (parms.district,)) for info in curs.fetchall(): clubnum = Club.stringify(info[0]) try: club = clubs[clubnum] club.addvalues(info, perffields) except KeyError: print('Club %s (%d) not in current CLUBS table, patching in.' % (info[1], info[0])) clubs[clubnum] = Club(info, perffields) clubs[clubnum].charterdate = '' # Now patch in suspension dates curs.execute("SELECT clubnumber, suspenddate FROM distperf WHERE entrytype = 'L' and district = %s", (parms.district,)) for (clubnum, suspenddate) in curs.fetchall(): if clubnum in clubs: clubs[clubnum].addvalues(['suspended'],[suspenddate]) # And read in the alignment. reader = csv.DictReader(open(parms.infile, 'r')) alignfields = ['newarea', 'newdivision', 'likelytoclose', 'meetingday', 'meetingtime', 'place', 'address', 'city', 'state', 'zip', 'latitude', 'longitude'] for row in reader: newarea = row['newarea']
def doDailyClubs(infile, conn, cdate, firsttime=False): """ infile is a file-like object """ global changecount from datetime import datetime, timedelta curs = conn.cursor() reader = csv.reader(infile) hline = next(reader) headers = cleanheaders(hline) try: clubcol = headers.index("clubnumber") except ValueError: if not hline[0].startswith('{"Message"'): print("'clubnumber' not in '%s'" % hline) return try: prospectiveclubcol = headers.index("prospectiveclub") except ValueError: prospectiveclubcol = False # Find out what fields we have in the database itself dbfields = [] curs.execute("describe clubs") for l in curs.fetchall(): dbfields.append(l[0]) inform("clubs for", cdate, suppress=1) dbheaders = [p for p in headers] # Convert between Toastmasters' names for address and location and ours; they've changed it a few times. *sigh* if "address1" in dbheaders: addrcol1 = dbheaders.index("address1") else: addrcol1 = dbheaders.index("location") if "address2" in dbheaders: addrcol2 = dbheaders.index("address2") else: addrcol2 = dbheaders.index("address") dbheaders[addrcol1] = "place" dbheaders[addrcol2] = "address" expectedheaderscount = len(dbheaders) dbheaders.append("firstdate") dbheaders.append("lastdate") # For now... areacol = dbheaders.index("area") divisioncol = dbheaders.index("division") statecol = dbheaders.index("state") # Now, suppress anything in the file that's not in the database: suppress = [] oldheaders = dbheaders dbheaders = [] for i in range(len(oldheaders)): if oldheaders[i] in dbfields: dbheaders.append(oldheaders[i]) else: suppress.append(i) suppress.reverse() # We remove these columns from the input Club.setfieldnames(dbheaders) # We need to get clubs for the most recent update so we know whether to update an entry # or start a new one. yesterday = datetime.strftime( datetime.strptime(cdate, "%Y-%m-%d") - timedelta(1), "%Y-%m-%d") clubhist = Club.getClubsOn(curs, date=yesterday) for row in reader: if len(row) < expectedheaderscount: break # we're finished if prospectiveclubcol is not None and row[prospectiveclubcol]: continue # Ignore prospective clubs for i in suppress: del row[i] if len(row) > expectedheaderscount: # Special case...Millbrae somehow snuck two club websites in! row[16] = row[16] + "," + row[17] del row[17] # print row[addrcol1] # print row[addrcol2] # Now, clean up the address: # Address line 1 is "place" information and can be multiple lines. # Address line 2 is the real address and should be treated as one line, with spaces normalized. place = "\n".join([x.strip() for x in row[addrcol1].split(" ")]) row[addrcol1] = place address = normalize(row[addrcol2]) row[addrcol2] = address # Toastmasters is currently reversing the "Area" and "Division" items. "Area" should be a # number; if not, swap the two. try: thearea = row[areacol] thedivision = row[divisioncol] areanum = int(row[areacol]) except ValueError: row[areacol] = thedivision row[divisioncol] = thearea # Collapse state names into their abbreviations if row[statecol] in statelist: row[statecol] = statelist[row[statecol]] # Get the right number of items into the row by setting today as the # tentative first and last date row.append(cdate) row.append(cdate) # And create the object club = Club(row) # Now, clean up things coming from Toastmasters if club.clubstatus.startswith("Open") or club.clubstatus.startswith( "None"): club.clubstatus = "Open" else: club.clubstatus = "Restricted" # Clean up the club and district numbers and the area club.clubnumber = cleanitem(club.clubnumber) club.district = cleanitem(club.district) club.area = cleanitem(club.area) # If a club is partially unassigned, mark it as completely unassigned. if ((club.area == "0A") or (club.area == "0D") or (club.division == "0D") or (club.division == "0A")): club.area = "0A" club.division = "0D" # Clean up the charter date if not club.charterdate.strip(): continue # This is a prospective club that Toastmasters didn't mark properly; ignore it. club.charterdate = cleandate(club.charterdate) # Clean up advanced status club.advanced = "1" if (club.advanced != "") else "0" # Clean up online status club.allowsonlineattendance = ("1" if (club.allowsonlineattendance != "") else "0") # Add missing schemes to any URLs club.fixURLSchemes() # Now, take care of missing latitude/longitude if ("latitude") in dbheaders: try: club.latitude = float(club.latitude) except ValueError: club.latitude = 0.0 else: club.latitude = 0.0 if ("longitude") in dbheaders: try: club.longitude = float(club.longitude) except ValueError: club.longitude = 0.0 else: club.longitude = 0.0 # Sometimes, Toastmasters gets the latitude and longitude backwards # If that turns out to create an impossible location (which it will in California), # let's swap them. if abs(club.latitude) > 90.0: (club.latitude, club.longitude) = (club.longitude, club.latitude) # And put it into the database if need be if club.clubnumber in clubhist: changes = different(club, clubhist[club.clubnumber], dbheaders[:-2]) else: changes = [] if club.clubnumber not in clubhist and not firsttime: # This is a new (or reinstated) club; note it in the changes database. curs.execute( 'INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, "New Club", "", "")', (club.clubnumber, cdate), ) if club.clubnumber not in clubhist or changes: club.firstdate = club.lastdate # Encode newlines in the place as double-semicolons for the database club.place = club.place.replace("\n", ";;") values = [club.__dict__[x] for x in dbheaders] # And then put the place back into normal form club.place = club.place.replace(";;", "\n") thestr = ("INSERT IGNORE INTO clubs (" + ",".join(dbheaders) + ") VALUES (" + ",".join(["%s" for each in values]) + ");") try: changecount += curs.execute(thestr, values) except Exception as e: print(e) # Capture changes for (item, old, new) in changes: if item == "place": # Clean up the place (old and new) for the database old = old.replace("\n", ";;") new = new.replace("\n", ";;") try: curs.execute( "INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, %s, %s, %s)", (club.clubnumber, cdate, item, old, new), ) except Exception as e: print(e) clubhist[club.clubnumber] = club if different(club, clubhist[club.clubnumber], dbheaders[:-2]): print("it's different after being set.") sys.exit(3) else: # update the lastdate changecount += curs.execute( "UPDATE clubs SET lastdate = %s WHERE clubnumber = %s AND lastdate = %s;", (cdate, club.clubnumber, clubhist[club.clubnumber].lastdate), ) # If all the files were processed, today's work is done. curs.execute( 'INSERT IGNORE INTO loaded (tablename, loadedfor) VALUES ("clubs", %s)', (cdate, ), )
def doDailyClubs(infile, conn, cdate, firsttime=False): """ infile is a file-like object """ global changecount from datetime import datetime, timedelta curs = conn.cursor() reader = csv.reader(infile) hline = next(reader) headers = cleanheaders(hline) try: clubcol = headers.index("clubnumber") except ValueError: if not hline[0].startswith('{"Message"'): print("'clubnumber' not in '%s'" % hline) return try: prospectiveclubcol = headers.index("prospectiveclub") except ValueError: prospectiveclubcol = False # Find out what fields we have in the database itself dbfields = [] curs.execute("describe clubs") for l in curs.fetchall(): dbfields.append(l[0]) inform("clubs for", cdate, suppress=1) dbheaders = [p for p in headers] # Convert between Toastmasters' names for address and location and ours; they've changed it a few times. *sigh* if "address1" in dbheaders: addrcol1 = dbheaders.index("address1") else: addrcol1 = dbheaders.index("location") if "address2" in dbheaders: addrcol2 = dbheaders.index("address2") else: addrcol2 = dbheaders.index("address") dbheaders[addrcol1] = "place" dbheaders[addrcol2] = "address" expectedheaderscount = len(dbheaders) dbheaders.append("firstdate") dbheaders.append("lastdate") # For now... areacol = dbheaders.index("area") divisioncol = dbheaders.index("division") statecol = dbheaders.index("state") # Now, suppress anything in the file that's not in the database: suppress = [] oldheaders = dbheaders dbheaders = [] for i in range(len(oldheaders)): if oldheaders[i] in dbfields: dbheaders.append(oldheaders[i]) else: suppress.append(i) suppress.reverse() # We remove these columns from the input Club.setfieldnames(dbheaders) # We need to get clubs for the most recent update so we know whether to update an entry # or start a new one. yesterday = datetime.strftime( datetime.strptime(cdate, "%Y-%m-%d") - timedelta(1), "%Y-%m-%d" ) clubhist = Club.getClubsOn(curs, date=yesterday) for row in reader: if len(row) < expectedheaderscount: break # we're finished if prospectiveclubcol is not None and row[prospectiveclubcol]: continue # Ignore prospective clubs for i in suppress: del row[i] if len(row) > expectedheaderscount: # Special case...Millbrae somehow snuck two club websites in! row[16] = row[16] + "," + row[17] del row[17] # print row[addrcol1] # print row[addrcol2] # Now, clean up the address: # Address line 1 is "place" information and can be multiple lines. # Address line 2 is the real address and should be treated as one line, with spaces normalized. place = "\n".join([x.strip() for x in row[addrcol1].split(" ")]) row[addrcol1] = place address = normalize(row[addrcol2]) row[addrcol2] = address # Toastmasters is currently reversing the "Area" and "Division" items. "Area" should be a # number; if not, swap the two. try: thearea = row[areacol] thedivision = row[divisioncol] areanum = int(row[areacol]) except ValueError: row[areacol] = thedivision row[divisioncol] = thearea # Collapse state names into their abbreviations if row[statecol] in statelist: row[statecol] = statelist[row[statecol]] # Get the right number of items into the row by setting today as the # tentative first and last date row.append(cdate) row.append(cdate) # And create the object club = Club(row) # Now, clean up things coming from Toastmasters if club.clubstatus.startswith("Open") or club.clubstatus.startswith("None"): club.clubstatus = "Open" else: club.clubstatus = "Restricted" # Clean up the club and district numbers and the area club.clubnumber = cleanitem(club.clubnumber) club.district = cleanitem(club.district) club.area = cleanitem(club.area) # If a club is partially unassigned, mark it as completely unassigned. if ( (club.area == "0A") or (club.area == "0D") or (club.division == "0D") or (club.division == "0A") ): club.area = "0A" club.division = "0D" # Clean up the charter date club.charterdate = cleandate(club.charterdate) # Clean up advanced status club.advanced = "1" if (club.advanced != "") else "0" # Clean up online status club.allowsonlineattendance = ( "1" if (club.allowsonlineattendance != "") else "0" ) # Add missing schemes to any URLs club.fixURLSchemes() # Now, take care of missing latitude/longitude if ("latitude") in dbheaders: try: club.latitude = float(club.latitude) except ValueError: club.latitude = 0.0 else: club.latitude = 0.0 if ("longitude") in dbheaders: try: club.longitude = float(club.longitude) except ValueError: club.longitude = 0.0 else: club.longitude = 0.0 # Sometimes, Toastmasters gets the latitude and longitude backwards # If that turns out to create an impossible location (which it will in California), # let's swap them. if abs(club.latitude) > 90.0: (club.latitude, club.longitude) = (club.longitude, club.latitude) # And put it into the database if need be if club.clubnumber in clubhist: changes = different(club, clubhist[club.clubnumber], dbheaders[:-2]) else: changes = [] if club.clubnumber not in clubhist and not firsttime: # This is a new (or reinstated) club; note it in the changes database. curs.execute( 'INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, "New Club", "", "")', (club.clubnumber, cdate), ) if club.clubnumber not in clubhist or changes: club.firstdate = club.lastdate # Encode newlines in the place as double-semicolons for the database club.place = club.place.replace("\n", ";;") values = [club.__dict__[x] for x in dbheaders] # And then put the place back into normal form club.place = club.place.replace(";;", "\n") thestr = ( "INSERT IGNORE INTO clubs (" + ",".join(dbheaders) + ") VALUES (" + ",".join(["%s" for each in values]) + ");" ) try: changecount += curs.execute(thestr, values) except Exception as e: print(e) # Capture changes for (item, old, new) in changes: if item == "place": # Clean up the place (old and new) for the database old = old.replace("\n", ";;") new = new.replace("\n", ";;") try: curs.execute( "INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, %s, %s, %s)", (club.clubnumber, cdate, item, old, new), ) except Exception as e: print(e) clubhist[club.clubnumber] = club if different(club, clubhist[club.clubnumber], dbheaders[:-2]): print("it's different after being set.") sys.exit(3) else: # update the lastdate changecount += curs.execute( "UPDATE clubs SET lastdate = %s WHERE clubnumber = %s AND lastdate = %s;", (cdate, club.clubnumber, clubhist[club.clubnumber].lastdate), ) # If all the files were processed, today's work is done. curs.execute( 'INSERT IGNORE INTO loaded (tablename, loadedfor) VALUES ("clubs", %s)', (cdate,), )
def doDailyClubs(infile, conn, cdate, firsttime=False): """ infile is a file-like object """ global changecount from datetime import datetime, timedelta curs = conn.cursor() reader = csv.reader(infile) hline = reader.next() headers = cleanheaders(hline) try: clubcol = headers.index('clubnumber') except ValueError: if not hline[0].startswith('{"Message"'): print "'clubnumber' not in '%s'" % hline return try: prospectiveclubcol = headers.index('prospectiveclub') except ValueError: prospectiveclubcol = False # Find out what fields we have in the database itself dbfields = [] curs.execute("describe clubs") for l in curs.fetchall(): dbfields.append(l[0]) inform("clubs for", cdate, suppress=1) dbheaders = [p for p in headers] # Convert between Toastmasters' names for address and location and ours; they've changed it a few times. *sigh* if 'address1' in dbheaders: addrcol1 = dbheaders.index('address1') else: addrcol1 = dbheaders.index('location') if 'address2' in dbheaders: addrcol2 = dbheaders.index('address2') else: addrcol2 = dbheaders.index('address') dbheaders[addrcol1] = 'place' dbheaders[addrcol2] = 'address' expectedheaderscount = len(dbheaders) dbheaders.append('firstdate') dbheaders.append('lastdate') # For now... areacol = dbheaders.index('area') divisioncol = dbheaders.index('division') statecol = dbheaders.index('state') # Now, suppress anything in the file that's not in the database: suppress = [] oldheaders = dbheaders dbheaders = [] for i in xrange(len(oldheaders)): if oldheaders[i] in dbfields: dbheaders.append(oldheaders[i]) else: suppress.append(i) suppress.reverse() # We remove these columns from the input Club.setfieldnames(dbheaders) # We need to get clubs for the most recent update so we know whether to update an entry # or start a new one. yesterday = datetime.strftime(datetime.strptime(cdate, '%Y-%m-%d') - timedelta(1),'%Y-%m-%d') clubhist = Club.getClubsOn(curs, date=yesterday) for row in reader: if len(row) < expectedheaderscount: break # we're finished if prospectiveclubcol is not None and row[prospectiveclubcol]: continue # Ignore prospective clubs for i in suppress: del row[i] # Convert to unicode. Toastmasters usually uses UTF-8 but occasionally uses Windows CP1252 on the wire. try: row = [unicode(t.strip(), "utf8") for t in row] except UnicodeDecodeError: row = [unicode(t.strip(), "CP1252") for t in row] if len(row) > expectedheaderscount: # Special case...Millbrae somehow snuck two club websites in! row[16] = row[16] + ',' + row[17] del row[17] #print row[addrcol1] #print row[addrcol2] # Now, clean up the address: # Address line 1 is "place" information and can be multiple lines. # Address line 2 is the real address and should be treated as one line, with spaces normalized. place = '\n'.join([x.strip() for x in row[addrcol1].split(' ')]) row[addrcol1] = place address = normalize(row[addrcol2]) row[addrcol2] = address # Toastmasters is currently reversing the "Area" and "Division" items. "Area" should be a # number; if not, swap the two. try: thearea = row[areacol] thedivision = row[divisioncol] areanum = int(row[areacol]) except ValueError: row[areacol] = thedivision row[divisioncol] = thearea # Collapse state names into their abbreviations if row[statecol] in statelist: row[statecol] = statelist[row[statecol]] # Get the right number of items into the row by setting today as the # tentative first and last date row.append(cdate) row.append(cdate) # And create the object club = Club(row) # Now, clean up things coming from Toastmasters if club.clubstatus.startswith('Open') or club.clubstatus.startswith('None'): club.clubstatus = 'Open' else: club.clubstatus = 'Restricted' # Clean up the club and district numbers and the area club.clubnumber = cleanitem(club.clubnumber) club.district = cleanitem(club.district) club.area = cleanitem(club.area) # If a club is partially unassigned, mark it as completely unassigned. if (club.area == '0A') or (club.area == '0D') or (club.division == '0D') or (club.division == '0A'): club.area = '0A' club.division = '0D' # Clean up the charter date club.charterdate = cleandate(club.charterdate) # Clean up advanced status club.advanced = '1' if (club.advanced != '') else '0' # Clean up online status club.allowsonlineattendance = '1' if (club.allowsonlineattendance != '') else '0' # Now, take care of missing latitude/longitude if ('latitude') in dbheaders: try: club.latitude = float(club.latitude) except ValueError: club.latitude = 0.0 else: club.latitude = 0.0 if ('longitude') in dbheaders: try: club.longitude = float(club.longitude) except ValueError: club.longitude = 0.0 else: club.longitude = 0.0 # Sometimes, Toastmasters gets the latitude and longitude backwards # If that turns out to create an impossible location (which it will in California), # let's swap them. if abs(club.latitude) > 90.0: (club.latitude, club.longitude) = (club.longitude, club.latitude) # And put it into the database if need be if club.clubnumber in clubhist: changes = different(club, clubhist[club.clubnumber], dbheaders[:-2]) else: changes = [] if club.clubnumber not in clubhist and not firsttime: # This is a new (or reinstated) club; note it in the changes database. curs.execute('INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, "New Club", "", "")', (club.clubnumber, cdate)) if club.clubnumber not in clubhist or changes: club.firstdate = club.lastdate # Encode newlines in the place as double-semicolons for the database club.place = club.place.replace('\n',';;') values = [club.__dict__[x] for x in dbheaders] # And then put the place back into normal form club.place = club.place.replace(';;','\n') thestr = 'INSERT IGNORE INTO clubs (' + ','.join(dbheaders) + ') VALUES (' + ','.join(['%s' for each in values]) + ');' try: changecount += curs.execute(thestr, values) except Exception, e: print e # Capture changes for (item, old, new) in changes: if (item == 'place'): # Clean up the place (old and new) for the database old = old.replace('\n', ';;') new = new.replace('\n', ';;') try: curs.execute('INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, %s, %s, %s)', (club.clubnumber, cdate, item, old, new)) except Exception, e: print e clubhist[club.clubnumber] = club if different(club, clubhist[club.clubnumber], dbheaders[:-2]): print 'it\'s different after being set.' sys.exit(3)
# them anyway. perffields = ['clubnumber', 'clubname', 'district', 'area', 'division', 'eligibility', 'color', 'membase', 'activemembers', 'goalsmet'] print parms.date curs.execute("SELECT clubnumber, clubname, district, area, division, clubstatus as eligibility, color, membase, activemembers, goalsmet FROM clubperf WHERE asof = %s", (parms.date,)) for info in curs.fetchall(): clubnum = Club.stringify(info[0]) try: club = clubs[clubnum] club.addvalues(info, perffields) except KeyError: print 'Club %s (%d) not in CLUBS table, patching in.' % (info[1], info[0]) clubs[clubnum] = Club(info, perffields) clubs[clubnum].charterdate = '' # Now patch in suspension dates curs.execute("SELECT clubnumber, suspdate FROM distperf WHERE asof = %s", (parms.date,)) for (clubnum, suspdate) in curs.fetchall(): if clubnum in clubs: clubs[clubnum].addvalues(['suspended'],[suspdate]) # Now, onward to the alignment. All we care about is the new area and new division, if any. csvfile = open('alignment.csv', 'rbU')