def run(projectName): courseDatasets = FileSystem.loadCourseDatasetInfo() resultsDir = os.path.join(FileSystem.getResultsDir(),projectName) medianDiffs = [] meanDiffs = [] for course in courseDatasets: path = os.path.join(resultsDir, course.name + '_contribution.csv') try: with open(path) as fid: forumUserIds = [r.strip() for r in fid.readlines()] except IOError: continue topUserIds = getTopFivePercent(forumUserIds) DBSetup.switch(course) threads = ForumThreads.objects.all() posts = ForumPosts.objects.all() TC, nonTC = isolateThreadLengths(threads, posts,topUserIds) TCMedian = median(TC) nonTCMedian = median(nonTC) TCMean = mean(TC) nonTCMean = mean(nonTC) medianDiffs.append(TCMedian-nonTCMedian) meanDiffs.append(TCMean-nonTCMean) print(course.name) print('Median thread length for threads with posts by top contributors: ' + str(TCMedian)) print('Median thread length for threads without posts by top contributors: ' + str(nonTCMedian)) print('Mean thread length for threads with posts by top contributors: ' + str(TCMean)) print('Mean thread length for threads without posts by top contributors: ' + str(nonTCMean)) print(' ') print('Average difference between median thread lengths: ' + str(mean(medianDiffs))) print('Average difference between mean thread lengths: ' + str(mean(meanDiffs)))
def run(projectName): courseDatasets = FileSystem.loadCourseDatasetInfo() resultsDir = os.path.join(FileSystem.getResultsDir(),projectName) outputPath = os.path.join(resultsDir,'topContributorPositions.txt') cumulativeResultsTC = {} cumulativeResultsNonTC = {} cumulativeContHistTC = NUMBINS*[0] cumulativeContHistNonTC = NUMBINS*[0] ofid = open(outputPath,'wt') for course in courseDatasets: print(course.name) path = os.path.join(resultsDir, course.name + '_contribution.csv') try: with open(path) as fid: forumUserIds = [r.strip() for r in fid.readlines()] except IOError: continue topUserIds = getTopFivePercent(forumUserIds) DBSetup.switch(course) forumData = CourseForums() resultsTC, resultsNonTC, continuousHistTC, continuousHistNonTC = tallyPositions(forumData, topUserIds) cumulativeResultsTC = addResultsDict(cumulativeResultsTC, resultsTC) cumulativeResultsNonTC = addResultsDict(cumulativeResultsNonTC, resultsNonTC) cumulativeContHistTC = addResultsList(cumulativeContHistTC, continuousHistTC) cumulativeContHistNonTC = addResultsList(cumulativeContHistNonTC, continuousHistNonTC) ofid.write('--------------------------------------------\n') ofid.write('Course: ' + course.name + '\n') ofid.write('Top contributor post position histogram\n') summarization(ofid, resultsTC, 10) ofid.write('\n\n') ofid.write('Non top contributor post position histogram\n') summarization(ofid, resultsNonTC, 10) ofid.write('**************************************\n') ofid.write('Aggregated over courses:\n') ofid.write('Top contributor post position histogram\n') summarization(ofid, cumulativeResultsTC, 20) ofid.write('\n\n') ofid.write('Non top contributor post position histogram\n') summarization(ofid, cumulativeResultsNonTC, 20) ofid.close() normalizedCumulativeContHistTC = normalize(cumulativeContHistTC) normalizedCumulativeContHistNonTC = normalize(cumulativeContHistNonTC) outputPathTC = os.path.join(resultsDir,'normalizedPositionHistTC.csv') with open(outputPathTC,'wt') as ofid: for i in range(NUMBINS): ofid.write(str(i) + ', ' + str(normalizedCumulativeContHistTC[i]) + '\n') outputPathNonTC = os.path.join(resultsDir,'normalizedPositionHistNonTC.csv') with open(outputPathNonTC,'wt') as ofid: for i in range(NUMBINS): ofid.write(str(i) + ', ' + str(normalizedCumulativeContHistNonTC[i]) + '\n')
def run(projectName): courseDatasets = FileSystem.loadCourseDatasetInfo() resultsDir = os.path.join(FileSystem.getResultsDir(), projectName) outputPath = os.path.join(resultsDir, 'topContributorPositions.txt') cumulativeResultsTC = {} cumulativeResultsNonTC = {} cumulativeContHistTC = NUMBINS * [0] cumulativeContHistNonTC = NUMBINS * [0] ofid = open(outputPath, 'wt') for course in courseDatasets: print(course.name) path = os.path.join(resultsDir, course.name + '_contribution.csv') try: with open(path) as fid: forumUserIds = [r.strip() for r in fid.readlines()] except IOError: continue topUserIds = getTopFivePercent(forumUserIds) DBSetup.switch(course) forumData = CourseForums() resultsTC, resultsNonTC, continuousHistTC, continuousHistNonTC = tallyPositions( forumData, topUserIds) cumulativeResultsTC = addResultsDict(cumulativeResultsTC, resultsTC) cumulativeResultsNonTC = addResultsDict(cumulativeResultsNonTC, resultsNonTC) cumulativeContHistTC = addResultsList(cumulativeContHistTC, continuousHistTC) cumulativeContHistNonTC = addResultsList(cumulativeContHistNonTC, continuousHistNonTC) ofid.write('--------------------------------------------\n') ofid.write('Course: ' + course.name + '\n') ofid.write('Top contributor post position histogram\n') summarization(ofid, resultsTC, 10) ofid.write('\n\n') ofid.write('Non top contributor post position histogram\n') summarization(ofid, resultsNonTC, 10) ofid.write('**************************************\n') ofid.write('Aggregated over courses:\n') ofid.write('Top contributor post position histogram\n') summarization(ofid, cumulativeResultsTC, 20) ofid.write('\n\n') ofid.write('Non top contributor post position histogram\n') summarization(ofid, cumulativeResultsNonTC, 20) ofid.close() normalizedCumulativeContHistTC = normalize(cumulativeContHistTC) normalizedCumulativeContHistNonTC = normalize(cumulativeContHistNonTC) outputPathTC = os.path.join(resultsDir, 'normalizedPositionHistTC.csv') with open(outputPathTC, 'wt') as ofid: for i in range(NUMBINS): ofid.write( str(i) + ', ' + str(normalizedCumulativeContHistTC[i]) + '\n') outputPathNonTC = os.path.join(resultsDir, 'normalizedPositionHistNonTC.csv') with open(outputPathNonTC, 'wt') as ofid: for i in range(NUMBINS): ofid.write( str(i) + ', ' + str(normalizedCumulativeContHistNonTC[i]) + '\n')