def main(locname, relLoc="./"): csv.field_size_limit(sys.maxsize) locD = geo.readIdToLoc(locname) flist = glob.glob(relLoc + "20*") fname = flist.pop() fin = csv.reader(open(fname + "/profiles.csv", "r")) pDict = prof.builddict(fin) uin = csv.reader(open(fname + "/users.csv", "r")) uDict = user.builddict(uin) fullR = bcr.buildRosterDict(pDict, uDict, locD) for f in flist: fin = csv.reader(open(f + "/profiles.csv", "r")) addDict = prof.builddict(fin) uin = csv.reader(open(f + "/users.csv", "r")) uDict = user.builddict(uin) addR = bcr.buildRosterDict(addDict, uDict, locD) for i in iter(addR): if i not in fullR: fullR[i] = addR[i] outname = relLoc + "FullRoster.csv" bcr.writeRoster(fullR, outname)
def main(locname, relLoc="./"): csv.field_size_limit(sys.maxsize) locD = geo.readIdToLoc(locname) flist = glob.glob(relLoc + '20*') fname = flist.pop() fin = csv.reader(open(fname + '/profiles.csv', 'r')) pDict = prof.builddict(fin) uin = csv.reader(open(fname + '/users.csv', 'r')) uDict = user.builddict(uin) fullR = bcr.buildRosterDict(pDict, uDict, locD) for f in flist: fin = csv.reader(open(f + '/profiles.csv', 'r')) addDict = prof.builddict(fin) uin = csv.reader(open(f + '/users.csv', 'r')) uDict = user.builddict(uin) addR = bcr.buildRosterDict(addDict, uDict, locD) for i in iter(addR): if i not in fullR: fullR[i] = addR[i] outname = relLoc + 'FullRoster.csv' bcr.writeRoster(fullR, outname)
''' import glob import sys import csv import classData.user as user course = sys.argv[1] flist = glob.glob('harvardx*/' + course + '/users.csv') if len(flist) < 2: exit() f = iter(flist).next() flist.remove(f) ufile = open(f, 'r') oldDict = user.builddict(ufile) ufile.close() out = csv.writer(open(course+'diffs.csv', 'w')) for f in flist: ufile = open(f, 'r') newDict = user.builddict(csv.reader(ufile)) ufile.close() out.writerow(['In older course list, not new']) i = 0 for u in iter(oldDict): if u not in newDict: out.writerow([u, oldDict[u].username]) i += 1 out.writerow(['Total deleted between files: ', str(i)]) i = 0 out.writerow(['In new course list, not old'])
#!/usr/bin/env python ''' Looks for differences between the user listed in the users file and those in the certificates file. In particular, looks for any users not found in the users file who have received a certificate. This was used for basic sanity testing on the invariant that if a user received a certificate, he or she should also be in the users file. There are odd cases where this may not be the case, caused by the habit of the edX group to delete a user (which may have been changed, we have asked for this), but if there are large numbers (order 10s) then something is probably wrong. ''' import csv from classData import certificates, user if __name__ == '__main__': ufile = csv.reader(open('users.csv', 'r')) udict = user.builddict(ufile) cfile = csv.reader(open('certificates.csv', 'r')) cDict = certificates.builddict(cfile) certsMissing = [] for c in iter(cDict): if (cDict[c].status == 'downloadable') and (c not in udict): certsMissing.append(c) if len(certsMissing) > 0: print 'found ' + str( len(certsMissing)) + ' certificates with no associated user' outfile = csv.writer(open('certsAndusers.csv', 'w')) outfile.writerow(['Missing user ids that have certificates']) for u in certsMissing: outfile.writerow([u])
if __name__ == '__main__': ck_course = sys.argv[1] wk1 = sys.argv[2] wk2 = sys.argv[3] week1 = wk1 + '/' + ck_course week2 = wk2 + '/' + ck_course userFile = '/users.csv' certFile = '/certificates.csv' enroll = '/enrollment.csv' uf1 = csv.reader(open(week2 + userFile, 'r')) uf2 = csv.reader(open(week1 + userFile, 'r')) cf1 = csv.reader(open(week2 + certFile, 'r')) cf2 = csv.reader(open(week1 + certFile, 'r')) ef1 = csv.reader(open(week2 + enroll, 'r')) ef2 = csv.reader(open(week1 + enroll, 'r')) u1dict = user.builddict(uf1) u2dict = user.builddict(uf2) c1dict = certificates.builddict(cf1) c2dict = certificates.builddict(cf2) e1dict = course_enrollment.builddict(ef1) e2dict = course_enrollment.builddict(ef2) OneNotTwo = compareUsers(u1dict, u2dict) TwoNotOne = compareUsers(u2dict, u1dict) for u in iter(OneNotTwo): if u in c1dict and c1dict[u].status == 'downloadable': OneNotTwo[u] = 'y' for u in iter(TwoNotOne): if u in c2dict and c2dict[u].status == 'downloadable': TwoNotOne[u] = 'y'
from classData import user, userprofile as userp import sys import csv def split(name): """ Splits a string of form firstname lastname into two strings, returning a list containing those names. """ spIn = name.rfind(' ') first = name[ :spIn] last = name[spIn + 1: ] return [first, last] if __name__ == '__main__': csv.field_size_limit(sys.maxsize) ufile = csv.reader(open('users.csv','r')) udict = user.builddict(ufile) pfile = csv.reader(open('profiles.csv', 'r')) pdict = userp.builddict(pfile) outfile = csv.writer(open('mailAddresses.csv','w')) for uid in iter(udict): if uid in pdict: name = pdict[uid].name else : name = 'Missing Profile' [first, last] = split(name) outfile.writerow([first, last, udict[uid].email])
script is run, and takes as arguments the file names of the two user files to be compared. ''' import csv import sys from classData import certificates, user if __name__ == '__main__': f1 = csv.reader(open(sys.argv[1], 'r')) f2 = csv.reader(open(sys.argv[2], 'r')) f3 = csv.writer(open('additions.csv', 'w')) f4 = csv.reader(open('certificates.csv', 'r')) f3.writerow(['id', 'in certificate file']) f3.writerow(['User ids in first file, not in second']) u1 = user.builddict(f1) u2 = user.builddict(f2) cdict = certificates.builddict(f4) for key in u1.iterkeys(): if u1[key].id not in u2: if key in cdict: f3.writerow([key, 'Yes']) else: f3.writerow([key, 'No']) f3.writerow(['User ids in second file, not in first']) for key in u2.iterkeys(): if u2[key].id not in u1: if key in cdict: f3.writerow([key, 'Yes']) else:
entries in the profiles and enrollment file have entries in the user file. The program takes no arguments, but assumes that the files are name users.csv, profiles.csv, enrollment.csv, and produces a userDiffs.csv file. ''' import csv from classData import user from classData import userprofile as uprofile from classData import course_enrollment as ce if __name__ == '__main__': csv.field_size_limit(1000000) uIn = csv.reader(open('users.csv', 'r')) uDict = user.builddict(uIn) upIn = csv.reader(open('profiles.csv', 'r')) upDict = uprofile.builddict(upIn) ceIn = csv.reader(open('enrollment.csv', 'r')) ceDict = ce.builddict(ceIn) of = csv.writer(open('userDiffs.csv', 'w')) of.writerow(['ids in user file, not in profiles file']) for u in iter(uDict): if u not in upDict: of.writerow([u]) of.writerow(['ids in profiles file, not in user file']) for p in iter(upDict): if p not in uDict: of.writerow([p])
rl = rosterLine(sid, name, uname, maddr, cnt, age, edu, gender) retDict[sid] = rl ofile.close() return retDict def writeRoster(rDict, filein): """ Write a roster dictionary to a .csv file Write a class roster dictionary to a .csv file named by the input parameter. """ ofile = open(filein, 'w') rf = csv.writer(ofile) rf.writerow(['Student ID', 'Name', 'User Name', 'Email', 'Country', 'Age', 'Education Level', 'Gender']) for s in iter(rDict): wl = rDict[s] rf.writerow([wl.sid, wl.name, wl.uname, wl.maddr, wl.cnt, wl.age, wl.ed, wl.gender]) ofile.close() if __name__ == '__main__': csv.field_size_limit(sys.maxsize) cl_name = sys.argv[1] proDict = profile.builddict(csv.reader(open(cl_name + '/profiles.csv', 'r'))) uDict = user.builddict(csv.reader(open(cl_name + '/users.csv', 'r'))) loc_name = sys.argv[2] locDict = geo.readIdToLoc(loc_name) rDict = buildRosterDict(proDict, uDict, locDict) writeRoster(rDict, cl_name + '/class_roster.csv')
self.uname = uname self.country = '' self.classList = [className] if __name__ == '__main__': geoFile = csv.reader(open(sys.argv[1], 'r')) geoDict = loc.builddict(geoFile) fList = glob.glob('*/users.csv') classDict = {} for d in fList: filein = open(d, 'r') fin = csv.reader(filein) cName = d[ :10] fin.next() udict = user.builddict(fin) for u in iter(udict): if u in classDict: classDict[u].numClasses += 1 classDict[u].classList.append(cName) if udict[u].username != classDict[u].uname: classDict[u].uname = 'Duplicate user name' else: classDict[u] = userClasses(udict[u].username, cName) if udict[u].username in geoDict: classDict[u].country = geoDict[udict[u].username] filein.close() outf = csv.writer(open('studentClassList.csv', 'w')) outf.writerow(['user Id', 'User name', 'country', 'number of classes', 'classes'])