# performance data which aren't in the master list from TMI, note it and add
    # them anyway.

    perffields = ['clubnumber', 'clubname', 'district', 'area', 'division', 'eligibility', 'color', 'membase', 'activemembers', 'goalsmet']

    curs.execute("SELECT clubnumber, clubname, district, area, division, clubstatus as eligibility, color, membase, activemembers, goalsmet FROM clubperf WHERE entrytype = 'L' and district = %s", (parms.district,))

    for info in curs.fetchall():
        clubnum = Club.stringify(info[0])
        try:
            club = clubs[clubnum]
            club.addvalues(info, perffields)
        except KeyError:
            print('Club %s (%d) not in current CLUBS table, patching in.' % (info[1], info[0]))
            clubs[clubnum] = Club(info, perffields)
            clubs[clubnum].charterdate = ''
            
    # Now patch in suspension dates

    curs.execute("SELECT clubnumber, suspenddate FROM distperf WHERE entrytype = 'L' and district = %s", (parms.district,))
    for (clubnum, suspenddate) in curs.fetchall():
        if clubnum in clubs:
            clubs[clubnum].addvalues(['suspended'],[suspenddate])
          

    # And read in the alignment.  
    reader = csv.DictReader(open(parms.infile, 'r'))
    alignfields = ['newarea', 'newdivision', 'likelytoclose', 'meetingday', 'meetingtime', 'place', 'address', 'city',
            'state', 'zip', 'latitude', 'longitude']
    for row in reader:
        newarea = row['newarea']
Exemple #2
0
def doDailyClubs(infile, conn, cdate, firsttime=False):
    """ infile is a file-like object """
    global changecount
    from datetime import datetime, timedelta

    curs = conn.cursor()
    reader = csv.reader(infile)

    hline = next(reader)
    headers = cleanheaders(hline)

    try:
        clubcol = headers.index("clubnumber")
    except ValueError:
        if not hline[0].startswith('{"Message"'):
            print("'clubnumber' not in '%s'" % hline)
        return

    try:
        prospectiveclubcol = headers.index("prospectiveclub")
    except ValueError:
        prospectiveclubcol = False

    # Find out what fields we have in the database itself
    dbfields = []
    curs.execute("describe clubs")
    for l in curs.fetchall():
        dbfields.append(l[0])

    inform("clubs for", cdate, suppress=1)
    dbheaders = [p for p in headers]

    # Convert between Toastmasters' names for address and location and ours; they've changed it a few times.  *sigh*
    if "address1" in dbheaders:
        addrcol1 = dbheaders.index("address1")
    else:
        addrcol1 = dbheaders.index("location")
    if "address2" in dbheaders:
        addrcol2 = dbheaders.index("address2")
    else:
        addrcol2 = dbheaders.index("address")

    dbheaders[addrcol1] = "place"
    dbheaders[addrcol2] = "address"
    expectedheaderscount = len(dbheaders)
    dbheaders.append("firstdate")
    dbheaders.append("lastdate")  # For now...

    areacol = dbheaders.index("area")
    divisioncol = dbheaders.index("division")
    statecol = dbheaders.index("state")

    # Now, suppress anything in the file that's not in the database:
    suppress = []
    oldheaders = dbheaders
    dbheaders = []
    for i in range(len(oldheaders)):
        if oldheaders[i] in dbfields:
            dbheaders.append(oldheaders[i])
        else:
            suppress.append(i)
    suppress.reverse()  # We remove these columns from the input

    Club.setfieldnames(dbheaders)

    # We need to get clubs for the most recent update so we know whether to update an entry
    #   or start a new one.
    yesterday = datetime.strftime(
        datetime.strptime(cdate, "%Y-%m-%d") - timedelta(1), "%Y-%m-%d")
    clubhist = Club.getClubsOn(curs, date=yesterday)

    for row in reader:
        if len(row) < expectedheaderscount:
            break  # we're finished
        if prospectiveclubcol is not None and row[prospectiveclubcol]:
            continue  # Ignore prospective clubs

        for i in suppress:
            del row[i]

        if len(row) > expectedheaderscount:
            # Special case...Millbrae somehow snuck two club websites in!
            row[16] = row[16] + "," + row[17]
            del row[17]

        # print row[addrcol1]
        # print row[addrcol2]
        # Now, clean up the address:
        # Address line 1 is "place" information and can be multiple lines.
        # Address line 2 is the real address and should be treated as one line, with spaces normalized.
        place = "\n".join([x.strip() for x in row[addrcol1].split("  ")])
        row[addrcol1] = place
        address = normalize(row[addrcol2])
        row[addrcol2] = address

        # Toastmasters is currently reversing the "Area" and "Division" items.  "Area" should be a
        #    number; if not, swap the two.
        try:
            thearea = row[areacol]
            thedivision = row[divisioncol]
            areanum = int(row[areacol])
        except ValueError:
            row[areacol] = thedivision
            row[divisioncol] = thearea

        # Collapse state names into their abbreviations
        if row[statecol] in statelist:
            row[statecol] = statelist[row[statecol]]

        # Get the right number of items into the row by setting today as the
        #   tentative first and last date
        row.append(cdate)
        row.append(cdate)

        # And create the object
        club = Club(row)

        # Now, clean up things coming from Toastmasters

        if club.clubstatus.startswith("Open") or club.clubstatus.startswith(
                "None"):
            club.clubstatus = "Open"
        else:
            club.clubstatus = "Restricted"

        # Clean up the club and district numbers and the area
        club.clubnumber = cleanitem(club.clubnumber)
        club.district = cleanitem(club.district)
        club.area = cleanitem(club.area)

        # If a club is partially unassigned, mark it as completely unassigned.
        if ((club.area == "0A") or (club.area == "0D")
                or (club.division == "0D") or (club.division == "0A")):
            club.area = "0A"
            club.division = "0D"

        # Clean up the charter date
        if not club.charterdate.strip():
            continue  # This is a prospective club that Toastmasters didn't mark properly; ignore it.
        club.charterdate = cleandate(club.charterdate)

        # Clean up advanced status
        club.advanced = "1" if (club.advanced != "") else "0"

        # Clean up online status
        club.allowsonlineattendance = ("1" if
                                       (club.allowsonlineattendance != "") else
                                       "0")

        # Add missing schemes to any URLs
        club.fixURLSchemes()

        # Now, take care of missing latitude/longitude
        if ("latitude") in dbheaders:
            try:
                club.latitude = float(club.latitude)
            except ValueError:
                club.latitude = 0.0
        else:
            club.latitude = 0.0

        if ("longitude") in dbheaders:
            try:
                club.longitude = float(club.longitude)
            except ValueError:
                club.longitude = 0.0
        else:
            club.longitude = 0.0

        # Sometimes, Toastmasters gets the latitude and longitude backwards
        # If that turns out to create an impossible location (which it will in California),
        #    let's swap them.
        if abs(club.latitude) > 90.0:
            (club.latitude, club.longitude) = (club.longitude, club.latitude)

        # And put it into the database if need be
        if club.clubnumber in clubhist:
            changes = different(club, clubhist[club.clubnumber],
                                dbheaders[:-2])
        else:
            changes = []

        if club.clubnumber not in clubhist and not firsttime:
            # This is a new (or reinstated) club; note it in the changes database.
            curs.execute(
                'INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, "New Club", "", "")',
                (club.clubnumber, cdate),
            )

        if club.clubnumber not in clubhist or changes:
            club.firstdate = club.lastdate
            # Encode newlines in the place as double-semicolons for the database
            club.place = club.place.replace("\n", ";;")
            values = [club.__dict__[x] for x in dbheaders]

            # And then put the place back into normal form
            club.place = club.place.replace(";;", "\n")

            thestr = ("INSERT IGNORE INTO clubs (" + ",".join(dbheaders) +
                      ") VALUES (" + ",".join(["%s"
                                               for each in values]) + ");")

            try:
                changecount += curs.execute(thestr, values)
            except Exception as e:
                print(e)
            # Capture changes
            for (item, old, new) in changes:
                if item == "place":
                    # Clean up the place (old and new) for the database
                    old = old.replace("\n", ";;")
                    new = new.replace("\n", ";;")
                try:
                    curs.execute(
                        "INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, %s, %s, %s)",
                        (club.clubnumber, cdate, item, old, new),
                    )
                except Exception as e:
                    print(e)
            clubhist[club.clubnumber] = club
            if different(club, clubhist[club.clubnumber], dbheaders[:-2]):
                print("it's different after being set.")
                sys.exit(3)
        else:
            # update the lastdate
            changecount += curs.execute(
                "UPDATE clubs SET lastdate = %s WHERE clubnumber = %s AND lastdate = %s;",
                (cdate, club.clubnumber, clubhist[club.clubnumber].lastdate),
            )

    # If all the files were processed, today's work is done.
    curs.execute(
        'INSERT IGNORE INTO loaded (tablename, loadedfor) VALUES ("clubs", %s)',
        (cdate, ),
    )
Exemple #3
0
def doDailyClubs(infile, conn, cdate, firsttime=False):
    """ infile is a file-like object """
    global changecount
    from datetime import datetime, timedelta

    curs = conn.cursor()
    reader = csv.reader(infile)

    hline = next(reader)
    headers = cleanheaders(hline)

    try:
        clubcol = headers.index("clubnumber")
    except ValueError:
        if not hline[0].startswith('{"Message"'):
            print("'clubnumber' not in '%s'" % hline)
        return

    try:
        prospectiveclubcol = headers.index("prospectiveclub")
    except ValueError:
        prospectiveclubcol = False

    # Find out what fields we have in the database itself
    dbfields = []
    curs.execute("describe clubs")
    for l in curs.fetchall():
        dbfields.append(l[0])

    inform("clubs for", cdate, suppress=1)
    dbheaders = [p for p in headers]

    # Convert between Toastmasters' names for address and location and ours; they've changed it a few times.  *sigh*
    if "address1" in dbheaders:
        addrcol1 = dbheaders.index("address1")
    else:
        addrcol1 = dbheaders.index("location")
    if "address2" in dbheaders:
        addrcol2 = dbheaders.index("address2")
    else:
        addrcol2 = dbheaders.index("address")

    dbheaders[addrcol1] = "place"
    dbheaders[addrcol2] = "address"
    expectedheaderscount = len(dbheaders)
    dbheaders.append("firstdate")
    dbheaders.append("lastdate")  # For now...

    areacol = dbheaders.index("area")
    divisioncol = dbheaders.index("division")
    statecol = dbheaders.index("state")

    # Now, suppress anything in the file that's not in the database:
    suppress = []
    oldheaders = dbheaders
    dbheaders = []
    for i in range(len(oldheaders)):
        if oldheaders[i] in dbfields:
            dbheaders.append(oldheaders[i])
        else:
            suppress.append(i)
    suppress.reverse()  # We remove these columns from the input

    Club.setfieldnames(dbheaders)

    # We need to get clubs for the most recent update so we know whether to update an entry
    #   or start a new one.
    yesterday = datetime.strftime(
        datetime.strptime(cdate, "%Y-%m-%d") - timedelta(1), "%Y-%m-%d"
    )
    clubhist = Club.getClubsOn(curs, date=yesterday)

    for row in reader:
        if len(row) < expectedheaderscount:
            break  # we're finished
        if prospectiveclubcol is not None and row[prospectiveclubcol]:
            continue  # Ignore prospective clubs

        for i in suppress:
            del row[i]


        if len(row) > expectedheaderscount:
            # Special case...Millbrae somehow snuck two club websites in!
            row[16] = row[16] + "," + row[17]
            del row[17]

        # print row[addrcol1]
        # print row[addrcol2]
        # Now, clean up the address:
        # Address line 1 is "place" information and can be multiple lines.
        # Address line 2 is the real address and should be treated as one line, with spaces normalized.
        place = "\n".join([x.strip() for x in row[addrcol1].split("  ")])
        row[addrcol1] = place
        address = normalize(row[addrcol2])
        row[addrcol2] = address

        # Toastmasters is currently reversing the "Area" and "Division" items.  "Area" should be a
        #    number; if not, swap the two.
        try:
            thearea = row[areacol]
            thedivision = row[divisioncol]
            areanum = int(row[areacol])
        except ValueError:
            row[areacol] = thedivision
            row[divisioncol] = thearea

        # Collapse state names into their abbreviations
        if row[statecol] in statelist:
            row[statecol] = statelist[row[statecol]]

        # Get the right number of items into the row by setting today as the
        #   tentative first and last date
        row.append(cdate)
        row.append(cdate)

        # And create the object
        club = Club(row)

        # Now, clean up things coming from Toastmasters

        if club.clubstatus.startswith("Open") or club.clubstatus.startswith("None"):
            club.clubstatus = "Open"
        else:
            club.clubstatus = "Restricted"

        # Clean up the club and district numbers and the area
        club.clubnumber = cleanitem(club.clubnumber)
        club.district = cleanitem(club.district)
        club.area = cleanitem(club.area)

        # If a club is partially unassigned, mark it as completely unassigned.
        if (
            (club.area == "0A")
            or (club.area == "0D")
            or (club.division == "0D")
            or (club.division == "0A")
        ):
            club.area = "0A"
            club.division = "0D"

        # Clean up the charter date
        club.charterdate = cleandate(club.charterdate)

        # Clean up advanced status
        club.advanced = "1" if (club.advanced != "") else "0"

        # Clean up online status
        club.allowsonlineattendance = (
            "1" if (club.allowsonlineattendance != "") else "0"
        )

        # Add missing schemes to any URLs
        club.fixURLSchemes()

        # Now, take care of missing latitude/longitude
        if ("latitude") in dbheaders:
            try:
                club.latitude = float(club.latitude)
            except ValueError:
                club.latitude = 0.0
        else:
            club.latitude = 0.0

        if ("longitude") in dbheaders:
            try:
                club.longitude = float(club.longitude)
            except ValueError:
                club.longitude = 0.0
        else:
            club.longitude = 0.0

        # Sometimes, Toastmasters gets the latitude and longitude backwards
        # If that turns out to create an impossible location (which it will in California),
        #    let's swap them.
        if abs(club.latitude) > 90.0:
            (club.latitude, club.longitude) = (club.longitude, club.latitude)

        # And put it into the database if need be
        if club.clubnumber in clubhist:
            changes = different(club, clubhist[club.clubnumber], dbheaders[:-2])
        else:
            changes = []

        if club.clubnumber not in clubhist and not firsttime:
            # This is a new (or reinstated) club; note it in the changes database.
            curs.execute(
                'INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, "New Club", "", "")',
                (club.clubnumber, cdate),
            )

        if club.clubnumber not in clubhist or changes:
            club.firstdate = club.lastdate
            # Encode newlines in the place as double-semicolons for the database
            club.place = club.place.replace("\n", ";;")
            values = [club.__dict__[x] for x in dbheaders]

            # And then put the place back into normal form
            club.place = club.place.replace(";;", "\n")

            thestr = (
                "INSERT IGNORE INTO clubs ("
                + ",".join(dbheaders)
                + ") VALUES ("
                + ",".join(["%s" for each in values])
                + ");"
            )

            try:
                changecount += curs.execute(thestr, values)
            except Exception as e:
                print(e)
            # Capture changes
            for (item, old, new) in changes:
                if item == "place":
                    # Clean up the place (old and new) for the database
                    old = old.replace("\n", ";;")
                    new = new.replace("\n", ";;")
                try:
                    curs.execute(
                        "INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, %s, %s, %s)",
                        (club.clubnumber, cdate, item, old, new),
                    )
                except Exception as e:
                    print(e)
            clubhist[club.clubnumber] = club
            if different(club, clubhist[club.clubnumber], dbheaders[:-2]):
                print("it's different after being set.")
                sys.exit(3)
        else:
            # update the lastdate
            changecount += curs.execute(
                "UPDATE clubs SET lastdate = %s WHERE clubnumber = %s AND lastdate = %s;",
                (cdate, club.clubnumber, clubhist[club.clubnumber].lastdate),
            )

    # If all the files were processed, today's work is done.
    curs.execute(
        'INSERT IGNORE INTO loaded (tablename, loadedfor) VALUES ("clubs", %s)',
        (cdate,),
    )
Exemple #4
0
def doDailyClubs(infile, conn, cdate, firsttime=False):
    """ infile is a file-like object """
    global changecount
    from datetime import datetime, timedelta
    
    curs = conn.cursor()
    reader = csv.reader(infile)

    hline = reader.next()
    headers = cleanheaders(hline)

    try:
        clubcol = headers.index('clubnumber')    
    except ValueError:
        if not hline[0].startswith('{"Message"'):
            print "'clubnumber' not in '%s'" % hline
        return
        
    try:
        prospectiveclubcol = headers.index('prospectiveclub')
    except ValueError:
        prospectiveclubcol = False
        
    # Find out what fields we have in the database itself
    dbfields = []
    curs.execute("describe clubs")
    for l in curs.fetchall():
        dbfields.append(l[0])
        
    inform("clubs for", cdate, suppress=1)
    dbheaders = [p for p in headers]
    
    # Convert between Toastmasters' names for address and location and ours; they've changed it a few times.  *sigh*
    if 'address1' in dbheaders:
        addrcol1 = dbheaders.index('address1')
    else:
        addrcol1 = dbheaders.index('location')
    if 'address2' in dbheaders:
        addrcol2 = dbheaders.index('address2')
    else:
        addrcol2 = dbheaders.index('address')
        
    dbheaders[addrcol1] = 'place'
    dbheaders[addrcol2] = 'address'
    expectedheaderscount = len(dbheaders)
    dbheaders.append('firstdate')
    dbheaders.append('lastdate')     # For now...
    
    areacol = dbheaders.index('area')
    divisioncol = dbheaders.index('division')
    statecol = dbheaders.index('state')
    
    # Now, suppress anything in the file that's not in the database:
    suppress = []
    oldheaders = dbheaders
    dbheaders = []
    for i in xrange(len(oldheaders)):
        if oldheaders[i] in dbfields:
            dbheaders.append(oldheaders[i])
        else:
            suppress.append(i)
    suppress.reverse()   # We remove these columns from the input
    
        
    Club.setfieldnames(dbheaders)

    # We need to get clubs for the most recent update so we know whether to update an entry 
    #   or start a new one.
    yesterday = datetime.strftime(datetime.strptime(cdate, '%Y-%m-%d') - timedelta(1),'%Y-%m-%d')
    clubhist = Club.getClubsOn(curs, date=yesterday)
   
    for row in reader:
        if len(row) < expectedheaderscount:
            break     # we're finished
        if prospectiveclubcol is not None and row[prospectiveclubcol]:
            continue   # Ignore prospective clubs

        for i in suppress:
            del row[i]

        # Convert to unicode.  Toastmasters usually uses UTF-8 but occasionally uses Windows CP1252 on the wire.
        try:
            row = [unicode(t.strip(), "utf8") for t in row]
        except UnicodeDecodeError:
            row = [unicode(t.strip(), "CP1252") for t in row]
         
        if len(row) > expectedheaderscount:
            # Special case...Millbrae somehow snuck two club websites in!
            row[16] = row[16] + ',' + row[17]
            del row[17]
            
        #print row[addrcol1]
        #print row[addrcol2]
        # Now, clean up the address:
        # Address line 1 is "place" information and can be multiple lines.
        # Address line 2 is the real address and should be treated as one line, with spaces normalized.
        place = '\n'.join([x.strip() for x in row[addrcol1].split('  ')]) 
        row[addrcol1] = place
        address = normalize(row[addrcol2])
        row[addrcol2] = address
        
        # Toastmasters is currently reversing the "Area" and "Division" items.  "Area" should be a
        #    number; if not, swap the two.
        try:
            thearea = row[areacol]
            thedivision = row[divisioncol]
            areanum = int(row[areacol])
        except ValueError:
            row[areacol] = thedivision
            row[divisioncol] = thearea
            
        # Collapse state names into their abbreviations
        if row[statecol] in statelist:
            row[statecol] = statelist[row[statecol]]
            
        # Get the right number of items into the row by setting today as the 
        #   tentative first and last date
        row.append(cdate)
        row.append(cdate)
        
        # And create the object
        club = Club(row)



        
        # Now, clean up things coming from Toastmasters

        if club.clubstatus.startswith('Open') or club.clubstatus.startswith('None'):
            club.clubstatus = 'Open'
        else:
            club.clubstatus = 'Restricted'
        
        # Clean up the club and district numbers and the area
        club.clubnumber = cleanitem(club.clubnumber)
        club.district = cleanitem(club.district)
        club.area = cleanitem(club.area)

        
        # If a club is partially unassigned, mark it as completely unassigned.
        if (club.area == '0A') or (club.area == '0D') or (club.division == '0D') or (club.division == '0A'):
            club.area = '0A'
            club.division = '0D'

    
        # Clean up the charter date
        club.charterdate = cleandate(club.charterdate)

    
        # Clean up advanced status
        club.advanced = '1' if (club.advanced != '') else '0'
        
        # Clean up online status
        club.allowsonlineattendance = '1' if (club.allowsonlineattendance != '') else '0'
    
        # Now, take care of missing latitude/longitude
        if ('latitude') in dbheaders:
            try:
                club.latitude = float(club.latitude)
            except ValueError:
                club.latitude = 0.0
        else:
            club.latitude = 0.0
            
        if ('longitude') in dbheaders:
           try:
               club.longitude = float(club.longitude)
           except ValueError:
               club.longitude = 0.0
        else:
           club.longitude = 0.0
           
        # Sometimes, Toastmasters gets the latitude and longitude backwards
        # If that turns out to create an impossible location (which it will in California),
        #    let's swap them.
        if abs(club.latitude) > 90.0:
            (club.latitude, club.longitude) = (club.longitude, club.latitude)

        # And put it into the database if need be
        if club.clubnumber in clubhist:
            changes = different(club, clubhist[club.clubnumber], dbheaders[:-2])
        else:
            changes = []

        
        if club.clubnumber not in clubhist and not firsttime:
            # This is a new (or reinstated) club; note it in the changes database.
            curs.execute('INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, "New Club", "", "")', (club.clubnumber, cdate))
                
        if club.clubnumber not in clubhist or changes:
            club.firstdate = club.lastdate
            # Encode newlines in the place as double-semicolons for the database
            club.place = club.place.replace('\n',';;')
            values = [club.__dict__[x] for x in dbheaders]
            
            # And then put the place back into normal form
            club.place = club.place.replace(';;','\n')
            
        
            thestr = 'INSERT IGNORE INTO clubs (' + ','.join(dbheaders) + ') VALUES (' + ','.join(['%s' for each in values]) + ');'

            try:
                changecount += curs.execute(thestr, values)
            except Exception, e:
                print e
            # Capture changes
            for (item, old, new) in changes:
                if (item == 'place'):
                    # Clean up the place (old and new) for the database
                    old = old.replace('\n', ';;')
                    new = new.replace('\n', ';;')
                try:
                    curs.execute('INSERT IGNORE INTO clubchanges (clubnumber, changedate, item, old, new) VALUES (%s, %s, %s, %s, %s)', (club.clubnumber, cdate, item, old, new))
                except Exception, e:
                     print e
            clubhist[club.clubnumber] = club
            if different(club, clubhist[club.clubnumber], dbheaders[:-2]):
                print 'it\'s different after being set.'
                sys.exit(3) 
Exemple #5
0
# them anyway.

perffields = ['clubnumber', 'clubname', 'district', 'area', 'division', 'eligibility', 'color', 'membase', 'activemembers', 'goalsmet']

print parms.date
curs.execute("SELECT clubnumber, clubname, district, area, division, clubstatus as eligibility, color, membase, activemembers, goalsmet FROM clubperf WHERE asof = %s", (parms.date,))

for info in curs.fetchall():
    clubnum = Club.stringify(info[0])
    try:
        club = clubs[clubnum]
        club.addvalues(info, perffields)
    except KeyError:
        print 'Club %s (%d) not in CLUBS table, patching in.' % (info[1], info[0])
        clubs[clubnum] = Club(info, perffields)
        clubs[clubnum].charterdate = ''
        
# Now patch in suspension dates

curs.execute("SELECT clubnumber, suspdate FROM distperf WHERE asof = %s", (parms.date,))
for (clubnum, suspdate) in curs.fetchall():
    if clubnum in clubs:
        clubs[clubnum].addvalues(['suspended'],[suspdate])
      
           




# Now, onward to the alignment.  All we care about is the new area and new division, if any.
csvfile = open('alignment.csv', 'rbU')