コード例 #1
0
def main(locname, relLoc="./"):
    csv.field_size_limit(sys.maxsize)
    locD = geo.readIdToLoc(locname)
    flist = glob.glob(relLoc + '20*')
    fname = flist.pop()
    fin = csv.reader(open(fname + '/profiles.csv', 'r'))
    pDict = prof.builddict(fin)
    uin = csv.reader(open(fname +'/users.csv', 'r'))
    uDict = user.builddict(uin)
    fullR = bcr.buildRosterDict(pDict, uDict, locD)

    for f in flist:
        fin = csv.reader(open(f + '/profiles.csv', 'r'))
        addDict = prof.builddict(fin)
        uin = csv.reader(open(f + '/users.csv', 'r'))
        uDict = user.builddict(uin)
        addR = bcr.buildRosterDict(addDict, uDict, locD)
        for i in iter(addR):
            if i not in fullR:
                fullR[i] = addR[i]
    
    outname = relLoc + 'FullRoster.csv'
    bcr.writeRoster(fullR, outname)
コード例 #2
0
def main():
    ulist = glob.glob('*/*/users.csv')
    idDict = {}
    nameDict = {}
    dupNameDict = {}
    dupIdDict = {}

    for fname in ulist:
        fin = open(fname, 'r')
        fcsv = csv.reader(fin)
        udict = user.builddict(fcsv)
        for u in iter(udict):
            if u not in idDict:
                idDict[u] = udict[u].username
            elif idDict[u] != udict[u].username:
                addDup(dupNameDict, u, idDict[u], udict[u].username)
            if udict[u].username not in nameDict:   
                nameDict[udict[u].username] = u
            elif nameDict[udict[u].username] != u:
                addDup(dupIdDict, udict[u].username, nameDict[udict[u].username], u)
    fin.close()
    
    idOut = csv.writer(open('globalid2name.csv', 'w'))
    idOut.writerow(['User ID', 'User Name'])
    for i in iter(idDict):
        idOut.writerow([i, idDict[i]])
    
    
    nameOut = csv.writer(open('globalname2id.csv', 'w'))
    nameOut.writerow(['User Name', 'User ID'])
    for n in iter(nameDict):
        nameOut.writerow([n, nameDict[n]])

    if len(dupNameDict) > 0:
        nameDupOut = csv.writer(open('nameDups.csv', 'w'))
        for u in iter(dupNameDict):
            nameDupOut.writerow[u, dupNameDict[u]]
    else:
        print("No duplicate names found")       
        
    if len(dupIdDict) > 0:
        idDupOut = csv.writer(open('idDups.csv', 'w'))
        for u in iter(dupIdDict):
            idDupOut.write([u, dupIdDict[u]])
    else:
        print("No duplicate ids found")
コード例 #3
0
def main():
    ulist = glob.glob('*/*/users.csv')
    idDict = {}
    nameDict = {}
    dupNameDict = {}
    dupIdDict = {}

    for fname in ulist:
        fin = open(fname, 'r')
        fcsv = csv.reader(fin)
        udict = user.builddict(fcsv)
        buildMaps(udict, idDict, nameDict, dupNameDict, dupIdDict)
    fin.close()
    
    idOut = csv.writer(open('globalid2name.csv', 'w'))
    idOut.writerow(['User ID', 'User Name'])
    for i in iter(idDict):
        idOut.writerow([i, idDict[i]])
    
    
    nameOut = csv.writer(open('globalname2id.csv', 'w'))
    nameOut.writerow(['User Name', 'User ID'])
    for n in iter(nameDict):
        nameOut.writerow([n, nameDict[n]])

    if len(dupNameDict) > 0:
        nameDupOut = csv.writer(open('nameDups.csv', 'w'))
        for u in iter(dupNameDict):
            nameDupOut.writerow[u, dupNameDict[u]]
    else:
        print("No duplicate names found")       
        
    if len(dupIdDict) > 0:
        idDupOut = csv.writer(open('idDups.csv', 'w'))
        for u in iter(dupIdDict):
            idDupOut.write([u, dupIdDict[u]])
    else:
        print("No duplicate ids found")
コード例 #4
0
if (len(sys.argv) < 2):
    print(
        'Usage: buildAnonProfile.py courseName profileFile userFile countryFile certFile'
    )
    sys.exit()

csv.field_size_limit(1000000)

out_name = sys.argv[1] + 'anonProfile.csv'
o1 = csv.writer(open(out_name, 'w'))

ufile = csv.reader(open(sys.argv[2], 'r'))
uprof = prof.builddict(ufile)

udfile = csv.reader(open(sys.argv[3], 'r'))
udict = user.builddict(udfile)

countryFile = csv.reader(open(sys.argv[4], 'r'))
locDict = geo.builddict(countryFile)

certs = False
if (len(sys.argv) > 5):
    certfile = csv.reader(open(sys.argv[5], 'r'))
    certDict = cs.builddict(certfile)
    certs = True

students = uprof.keys()
for s in students:
    p = uprof[s]
    if (s in udict):
        usrName = udict[s].username
コード例 #5
0
ck_course = sys.argv[1]

dump2 = 'harvardx-2013-06-16'
dump1 = 'harvardx-2013-06-02'
userFile = '/' + ck_course + '/users.csv'
certFile = '/' + ck_course + '/certificates.csv'
enroll = '/' + ck_course + '/enrollment.csv'
uf1 = csv.reader(open(dump1 + userFile, 'r'))
uf2 = csv.reader(open(dump2 + userFile, 'r'))
cf1 = csv.reader(open(dump1 + certFile, 'r'))
cf2 = csv.reader(open(dump2 + certFile, 'r'))
ef1 = csv.reader(open(dump1 + enroll, 'r'))
ef2 = csv.reader(open(dump2 + enroll, 'r'))

u1dict = user.builddict(uf1)
u2dict = user.builddict(uf2)
c1dict = certificates.builddict(cf1)
c2dict = certificates.builddict(cf2)
e1dict = course_enrollment.builddict(ef1)
e2dict = course_enrollment.builddict(ef2)

OneNotTwo = compareUsers(u1dict, u2dict)
TwoNotOne = compareUsers(u2dict, u1dict)

for u in iter(OneNotTwo):
    if u in c1dict and c1dict[u].status == 'downloadable':
        OneNotTwo[u] = 'y'

for u in iter(TwoNotOne):
    if u in c2dict and c2dict[u].status == 'downloadable':
コード例 #6
0
ファイル: buildClassRoster.py プロジェクト: Taranveer/moocRP
        rl = rosterLine(sid, name, uname, maddr, cnt, age, edu, gender)
        retDict[sid] = rl
    ofile.close()
    return retDict

def writeRoster(rDict, filein):
    '''
    Write a roster dictionary to a .csv file
    
    Write a class roster dictionary to a .csv file named by the input parameter. 
    '''
    ofile = open(filein, 'w')
    rf = csv.writer(ofile)
    rf.writerow(['Student ID', 'Name', 'User Name', 'Email', 'Country', 'Age', 'Education Level', 'Gender'])
    for s in iter(rDict):
        wl = rDict[s]
        rf.writerow([wl.sid, wl.name, wl.uname, wl.maddr, wl.cnt, wl.age, wl.ed, wl.gender])
    ofile.close()
    

if __name__ == '__main__':     
    csv.field_size_limit(sys.maxsize)
    cl_name = sys.argv[1]

    proDict = profile.builddict(csv.reader(open(cl_name + '/profiles.csv', 'r')))
    uDict = user.builddict(csv.reader(open(cl_name + '/users.csv', 'r')))
    loc_name = sys.argv[2]
    locDict = geo.readIdToLoc(loc_name)
    rDict = buildRosterDict(proDict, uDict, locDict)
    writeRoster(rDict, cl_name + '/class_roster.csv')
    
コード例 #7
0
ファイル: corrUsers.py プロジェクト: Taranveer/moocRP
Looks at the user, enrollment, and profile file in the directory in which 
the script is run to insure that all of the entries in the user file
have entries in the enrollment and profiles file, and that fall of the
entries in the profiles and enrollment file have entries in the user file.
"""


import csv
import user
import demographics.userprofile as uprofile
import course_enrollment as ce

csv.field_size_limit(1000000)
uIn = csv.reader(open("users.csv", "r"))
uDict = user.builddict(uIn)

upIn = csv.reader(open("profiles.csv", "r"))
upDict = uprofile.builddict(upIn)

ceIn = csv.reader(open("enrollment.csv", "r"))
ceDict = ce.builddict(ceIn)

of = csv.writer(open("userDiffs.csv", "w"))

of.writerow(["ids in user file, not in profiles file"])
for u in iter(uDict):
    if u not in upDict:
        of.writerow([u])

of.writerow(["ids in profiles file, not in user file"])
コード例 #8
0
ファイル: checkUsersTimes.py プロジェクト: Taranveer/moocRP
            
ck_course = sys.argv[1]

dump2 = 'harvardx-2013-06-16'
dump1 = 'harvardx-2013-06-02'
userFile = '/' + ck_course + '/users.csv'
certFile = '/' + ck_course + '/certificates.csv'
enroll = '/' + ck_course + '/enrollment.csv'
uf1 = csv.reader(open(dump1 + userFile, 'r'))
uf2 = csv.reader(open(dump2 + userFile, 'r'))
cf1 = csv.reader(open(dump1 + certFile, 'r'))
cf2 = csv.reader(open(dump2 + certFile, 'r'))
ef1 = csv.reader(open(dump1 + enroll, 'r'))
ef2 = csv.reader(open(dump2 + enroll, 'r'))

u1dict = user.builddict(uf1)
u2dict = user.builddict(uf2)
c1dict = certificates.builddict(cf1)
c2dict = certificates.builddict(cf2)
e1dict = course_enrollment.builddict(ef1)
e2dict = course_enrollment.builddict(ef2)

OneNotTwo = compareUsers(u1dict, u2dict)
TwoNotOne = compareUsers(u2dict, u1dict)

for u in iter(OneNotTwo):
    if u in c1dict and c1dict[u].status =='downloadable':
        OneNotTwo[u] = 'y'

for u in iter(TwoNotOne):
    if u in c2dict and c2dict[u].status == 'downloadable':
コード例 #9
0
        self.country = ''
        self.classList = [className]
    

geoFile = csv.reader(open(sys.argv[1], 'r'))
geoDict = loc.builddict(geoFile)

dirList = glob.glob('[A-Z]*')
classDict = {}

for d in dirList:
    filein = open(d+'/users.csv', 'r')
    fin = csv.reader(filein)
    cName = d
    fin.next()
    udict = user.builddict(fin)
    for u in iter(udict):
        if u in classDict:
            classDict[u].numClasses += 1
            classDict[u].classList.append(cName)
            if udict[u].username != classDict[u].uname:
                classDict[u].uname = 'Duplicate user name'
        else:
            classDict[u] = userClasses(udict[u].username, cName)
            if udict[u].username in geoDict:
                classDict[u].country = geoDict[udict[u].username]

    filein.close()

outf = csv.writer(open('studentClassList.csv', 'w'))
outf.writerow(['user Id', 'User name','country', 'number of classes', 'classes'])
コード例 #10
0
ファイル: makeIdCountryFile.py プロジェクト: Taranveer/moocRP
        retDict[username] = country
    return retDict

if len(sys.argv) > 3:
    cFileName = sys.argv[1]
    userFileName = sys.argv[2]
    clName = sys.argv[3]
else:
    cFileName = utils.getFileName('user name to country file')
    userFileName = utils.getFileName('user file')
    clName = raw_input("Please enter the name of the class : ")

cfile = csv.reader(open(cFileName, 'r'))
nameDict = buildNameCountry(cfile)
ufile = csv.reader(open(userFileName, 'r'))
userDict = user.builddict(ufile)


clfName = clName + '_id_country.csv'
outfile = csv.writer(open(clfName, 'w'))

users = userDict.keys()

outfile.writerow(['User id', 'Country'])
for u in users:
    userName = userDict[u].username
    if (userName in nameDict):
        country = nameDict[userDict[u].username]
        outfile.writerow([u, country])
    else:
        print ('unknown userName ' + userName)
コード例 #11
0

if __name__ == '__main__':
    if len(sys.argv) > 1:
        locname = sys.argv[1]
    else:
        locname = utils.getFileName('Enter name of the id=>location file :')
    
    csv.field_size_limit(sys.maxsize)
    locD = geo.readIdToLoc(locname)
    flist = glob.glob('20*')
    fname = flist.pop()
    fin = csv.reader(open(fname + '/profiles.csv', 'r'))
    pDict = prof.builddict(fin)
    uin = csv.reader(open(fname +'/users.csv', 'r'))
    uDict = user.builddict(uin)
    fullR = bcr.buildRosterDict(pDict, uDict, locD)

    for f in flist:
        fin = csv.reader(open(f + '/profiles.csv', 'r'))
        addDict = prof.builddict(fin)
        uin = csv.reader(open(f + '/users.csv', 'r'))
        uDict = user.builddict(uin)
        addR = bcr.buildRosterDict(addDict, uDict, locD)
        for i in iter(addR):
            if i not in fullR:
                fullR[i] = addR[i]
    
    outname = 'FullRoster.csv'
    bcr.writeRoster(fullR, outname)
コード例 #12
0
ファイル: diffUsers.py プロジェクト: Taranveer/moocRP
'''

import glob
import sys
import csv
import user

course = sys.argv[1]
flist = glob.glob('harvardx*/' + course + '/users.csv')
if len(flist) < 2:
    exit()

f = iter(flist).next()
flist.remove(f)
ufile = open(f, 'r')
oldDict = user.builddict(ufile)
ufile.close()
out = csv.writer(open(course+'diffs.csv', 'w'))
for f in flist:
    ufile = open(f, 'r')
    newDict = user.builddict(csv.reader(ufile))
    ufile.close()
    out.writerow(['In older course list, not new'])
    i = 0
    for u in iter(oldDict):
        if u not in newDict:
            out.writerow([u, oldDict[u].username])
            i += 1
    out.writerow(['Total deleted between files: ', str(i)])
    i = 0
    out.writerow(['In new course list, not old'])
コード例 #13
0
ファイル: compUser.py プロジェクト: yoanyombapro1234/moocRP
script is run, and takes as arguments the file names of the two user
files to be compared.
'''

import user
import csv
import sys
import certificates

f1 = csv.reader(open(sys.argv[1], 'r'))
f2 = csv.reader(open(sys.argv[2], 'r'))
f3 = csv.writer(open('additions.csv', 'w'))
f4 = csv.reader(open('certificates.csv', 'r'))
f3.writerow(['id', 'in certificate file'])
f3.writerow(['User ids in first file, not in second'])
u1 = user.builddict(f1)
u2 = user.builddict(f2)
cdict = certificates.builddict(f4)

for key in u1.iterkeys():
    if u1[key].id not in u2:
        if key in cdict:
            f3.writerow([key, 'Yes'])
        else:
            f3.writerow([key, 'No'])

f3.writerow(['User ids in second file, not in first'])

for key in u2.iterkeys():
    if u2[key].id not in u1:
        if key in cdict:
コード例 #14
0
    Write a roster dictionary to a .csv file
    
    Write a class roster dictionary to a .csv file named by the input parameter. 
    '''
    ofile = open(filein, 'w')
    rf = csv.writer(ofile)
    rf.writerow([
        'Student ID', 'Name', 'User Name', 'Email', 'Country', 'Age',
        'Education Level', 'Gender'
    ])
    for s in iter(rDict):
        wl = rDict[s]
        rf.writerow([
            wl.sid, wl.name, wl.uname, wl.maddr, wl.cnt, wl.age, wl.ed,
            wl.gender
        ])
    ofile.close()


if __name__ == '__main__':
    csv.field_size_limit(sys.maxsize)
    cl_name = sys.argv[1]

    proDict = profile.builddict(
        csv.reader(open(cl_name + '/profiles.csv', 'r')))
    uDict = user.builddict(csv.reader(open(cl_name + '/users.csv', 'r')))
    loc_name = sys.argv[2]
    locDict = geo.readIdToLoc(loc_name)
    rDict = buildRosterDict(proDict, uDict, locDict)
    writeRoster(rDict, cl_name + '/class_roster.csv')
コード例 #15
0
'''

import glob
import sys
import csv
import user

course = sys.argv[1]
flist = glob.glob('harvardx*/' + course + '/users.csv')
if len(flist) < 2:
    exit()

f = iter(flist).next()
flist.remove(f)
ufile = open(f, 'r')
oldDict = user.builddict(ufile)
ufile.close()
out = csv.writer(open(course + 'diffs.csv', 'w'))
for f in flist:
    ufile = open(f, 'r')
    newDict = user.builddict(csv.reader(ufile))
    ufile.close()
    out.writerow(['In older course list, not new'])
    i = 0
    for u in iter(oldDict):
        if u not in newDict:
            out.writerow([u, oldDict[u].username])
            i += 1
    out.writerow(['Total deleted between files: ', str(i)])
    i = 0
    out.writerow(['In new course list, not old'])
コード例 #16
0
    '''
    retDict= {}
    fin.readrow()
    for name, iden in fin:
        retDict[name] = iden
    return retDict

if __name__ == '__main__':
    ulist = glob.glob('*/*/users.csv')
    idDict = {}
    nameDict = {}

    for fname in ulist:
        fin = open(fname, 'r')
        fcsv = csv.reader(fin)
        udict = user.builddict(fcsv)
        for u in iter(udict):
            idDict[u] = udict[u].username
            nameDict[udict[u].username] = u
    fin.close()
    
    idOut = csv.writer(open('globalid2name.csv', 'w'))
    idOut.writerow(['User ID', 'User Name'])
    for i in iter(idDict):
        idOut.writerow([i, idDict[i]])
    
    nameOut = csv.writer(open('globalname2id.csv', 'w'))
    nameOut.writerow(['User Name', 'User ID'])
    for n in iter(nameDict):
        nameOut.writerow([n, nameDict[n]])
コード例 #17
0
ファイル: buildAnonProfile.py プロジェクト: Taranveer/moocRP

if (len(sys.argv) < 2):
    print('Usage: buildAnonProfile.py courseName profileFile userFile countryFile certFile')
    sys.exit()
    
csv.field_size_limit(1000000)

out_name = sys.argv[1] + 'anonProfile.csv'
o1 = csv.writer(open(out_name, 'w'))

ufile = csv.reader(open(sys.argv[2], 'r'))
uprof = prof.builddict(ufile)

udfile = csv.reader(open(sys.argv[3], 'r'))
udict = user.builddict(udfile)

countryFile = csv.reader(open(sys.argv[4], 'r'))
locDict = geo.builddict(countryFile)

certs = False
if (len(sys.argv) > 5):
    certfile = csv.reader(open(sys.argv[5], 'r'))
    certDict = cs.builddict(certfile)
    certs = True
    

students = uprof.keys()
for s in students:
    p = uprof[s]
    if (s in udict):
コード例 #18
0
ファイル: scrapeEmail.py プロジェクト: qjyzwlz/HarvardX-Tools
#!/usr/bin/env python

import user
import certificates
import os
import csv


def getFileName(prompt):
    while True:
        fname = raw_input("Please enter file name for " + prompt + " : ")
        if os.path.exists(fname):
            return fname
        else:
            print("file entered does not exist, please retry")


if __name__ == "__main__":
    f1name = getFileName("Enter name of the user file")
    f1 = csv.reader(open(f1name, "r"))
    f2name = getFileName("Enter the name of the certificates file")
    f2 = csv.reader(open(f2name, "r"))
    udict = user.builddict(f1)
    cdict = certificates.builddict(f2)
    out1 = open("allmail", "w")
    out2 = open("certMail", "w")
    for u in iter(udict):
        out1.write(udict[u].email + "\n")
        if u in cdict:
            out2.write(udict[u].email + "\n")
コード例 #19
0
ファイル: corrUsers.py プロジェクト: yoanyombapro1234/moocRP
Checks to insure that users appear in the enrollment and profiles file

Looks at the user, enrollment, and profile file in the directory in which 
the script is run to insure that all of the entries in the user file
have entries in the enrollment and profiles file, and that fall of the
entries in the profiles and enrollment file have entries in the user file.
'''

import csv
import user
import demographics.userprofile as uprofile
import course_enrollment as ce

csv.field_size_limit(1000000)
uIn = csv.reader(open('users.csv', 'r'))
uDict = user.builddict(uIn)

upIn = csv.reader(open('profiles.csv', 'r'))
upDict = uprofile.builddict(upIn)

ceIn = csv.reader(open('enrollment.csv', 'r'))
ceDict = ce.builddict(ceIn)

of = csv.writer(open('userDiffs.csv', 'w'))

of.writerow(['ids in user file, not in profiles file'])
for u in iter(uDict):
    if u not in upDict:
        of.writerow([u])

of.writerow(['ids in profiles file, not in user file'])
コード例 #20
0
#!/usr/bin/env python

import user
import certificates
import os
import csv


def getFileName(prompt):
    while (True):
        fname = raw_input("Please enter file name for " + prompt + ' : ')
        if os.path.exists(fname):
            return fname
        else:
            print("file entered does not exist, please retry")


if __name__ == '__main__':
    f1name = getFileName('Enter name of the user file')
    f1 = csv.reader(open(f1name, 'r'))
    f2name = getFileName('Enter the name of the certificates file')
    f2 = csv.reader(open(f2name, 'r'))
    udict = user.builddict(f1)
    cdict = certificates.builddict(f2)
    out1 = open('allmail', 'w')
    out2 = open('certMail', 'w')
    for u in iter(udict):
        out1.write(udict[u].email + '\n')
        if u in cdict:
            out2.write(udict[u].email + '\n')
コード例 #21
0
ファイル: compUser.py プロジェクト: Taranveer/moocRP
script is run, and takes as arguments the file names of the two user
files to be compared.
'''

import user
import csv
import sys
import certificates

f1 = csv.reader(open(sys.argv[1], 'r'))
f2 = csv.reader(open(sys.argv[2], 'r'))
f3 = csv.writer(open('additions.csv', 'w'))
f4 = csv.reader(open('certificates.csv', 'r'))
f3.writerow(['id', 'in certificate file'])
f3.writerow(['User ids in first file, not in second'])
u1 = user.builddict(f1)
u2 = user.builddict(f2)
cdict = certificates.builddict(f4)

for key in u1.iterkeys():
    if u1[key].id not in u2:
        if key in cdict:
            f3.writerow([key, 'Yes'])
        else:
            f3.writerow([key, 'No'])
            
f3.writerow(['User ids in second file, not in first'])

for key in u2.iterkeys():
    if u2[key].id not in u1:
        if key in cdict:
コード例 #22
0
    return retDict


if len(sys.argv) > 3:
    cFileName = sys.argv[1]
    userFileName = sys.argv[2]
    clName = sys.argv[3]
else:
    cFileName = utils.getFileName('user name to country file')
    userFileName = utils.getFileName('user file')
    clName = raw_input("Please enter the name of the class : ")

cfile = csv.reader(open(cFileName, 'r'))
nameDict = buildNameCountry(cfile)
ufile = csv.reader(open(userFileName, 'r'))
userDict = user.builddict(ufile)

clfName = clName + '_id_country.csv'
outfile = csv.writer(open(clfName, 'w'))

users = userDict.keys()

outfile.writerow(['User id', 'Country'])
for u in users:
    userName = userDict[u].username
    if (userName in nameDict):
        country = nameDict[userDict[u].username]
        outfile.writerow([u, country])
    else:
        print('unknown userName ' + userName)
コード例 #23
0
        self.country = ''
        self.classList = [className]


geoFile = csv.reader(open(sys.argv[1], 'r'))
geoDict = loc.builddict(geoFile)

dirList = glob.glob('[A-Z]*')
classDict = {}

for d in dirList:
    filein = open(d + '/users.csv', 'r')
    fin = csv.reader(filein)
    cName = d
    fin.next()
    udict = user.builddict(fin)
    for u in iter(udict):
        if u in classDict:
            classDict[u].numClasses += 1
            classDict[u].classList.append(cName)
            if udict[u].username != classDict[u].uname:
                classDict[u].uname = 'Duplicate user name'
        else:
            classDict[u] = userClasses(udict[u].username, cName)
            if udict[u].username in geoDict:
                classDict[u].country = geoDict[udict[u].username]

    filein.close()

outf = csv.writer(open('studentClassList.csv', 'w'))
outf.writerow(