def generateWeeklyWC (cname, prefix, smartStopWords): p = PostGetter() minWeek = p.getMinWeekForCourse(cname) maxWeek = p.getMaxWeekForCourse(cname) for week in range(minWeek, maxWeek+1): numPosts = p.getNumPosts(cname, week) if (numPosts < 20): continue text=p.getWeeklyDataForCourseAsString(cname, week) num = (week-minWeek) (wordcountDict, w_, c_)=getWordCounts(text,'%s_week%s_stop'%(prefix,week),'%s_week%s'%(prefix, week), smartStopWords) generateWC(w_,c_,'%s_week%s_stop'%(prefix,week))
def generatePenalizeWeeklyWindows (cname, prefix, diff): p = PostGetter () minWeek = p.getMinWeekForCourse (cname) maxWeek = p.getMaxWeekForCourse (cname) d = {} for week in range(minWeek, maxWeek): currText = p.getWeeklyDataForCourseAsString(cname, week) smartStopWords = True (currWordCountDict, currw_, currc_) = getWordCounts(currText,'%s_week%s_stop'%(prefix,week),'%s_week%s'%(prefix, week), smartStopWords) d[week] = currWordCountDict for week in range(minWeek+1, maxWeek): currWordCountDict = d[week] if week == 201412: print "CURRRRRRRRRRRRR" #raw_input('bar') print currWordCountDict #raw_input ('foo') ctr=diff for i in range (week-1, minWeek-1,-1) : ctr= ctr-1 if (ctr == 0): break prevDict = d[i] diffDict = weightWords (prevDict, currWordCountDict, 0.5) currWordCountDict = diffDict print currWordCountDict if week == 201412: #raw_input ('aa') print currWordCountDict #raw_input ('aa') if week == 201412: print "CURRRRRRRRRRRRR" #raw_input('end') print currWordCountDict #raw_input ('end') (w_,c_)=dictToArray (diffDict) print w_ print c_ print diff if (len(w_) > 3): generateWC(w_,c_,'%s_week%s_stop_DIFF_EXP_PENALTY_DIFF%s'%(prefix,week,diff))
def main(): p=PostGetter() #cname='Engineering/CVX101/Winter2014' #cname='Education/EDUC115N/How_to_Learn_Math' #cname = 'Medicine/HRP258/Statistics_in_Medicine' if len(sys.argv) < 2: print 'Usage: python wordcloud.py <cname> sample course names: Engineering/CVX101/Winter2014,Medicine/HRP258/Statistics_in_Medicine' sys.exit(1) cname = sys.argv[1] text = p.getAllDataForCourseAsString(cname) print text smartStopWords = True #(wordcountDict,w_,c_)=getWordCounts(text,'wc_cvx_full_stop','wc_cvx_full', smartStopWords) #generateWC(w_,c_,'wc_cvx_full_stop') #generateWeeklyWC(cname, 'WEEKLYwc_cvx', smartStopWords) generateDiff (cname,'WEEKL_DIFF_lmath', smartStopWords)
def generateDiff (cname, prefix, smartStopWords): p = PostGetter() minWeek = p.getMinWeekForCourse(cname) maxWeek = p.getMaxWeekForCourse(cname) for week in range(minWeek, maxWeek): curr = week next = week+1 currNumPosts = p.getNumPosts(cname, curr) nextNumPosts = p.getNumPosts(cname, next) if (currNumPosts < 20 or nextNumPosts < 20): continue currText = p.getWeeklyDataForCourseAsString(cname, curr) nextText = p.getWeeklyDataForCourseAsString(cname, next) (currWordCountDict, currw_, currc_) = getWordCounts(currText,'%s_week%s_stop'%(prefix,curr),'%s_week%s'%(prefix, curr), smartStopWords) (nextWordCountDict, nextw_, nextc_) = getWordCounts(nextText,'%s_week%s_stop'%(prefix,next),'%s_week%s'%(prefix, next), smartStopWords) diffDict = getDiff (currWordCountDict, nextWordCountDict) (w_,c_)=dictToArray (diffDict) generateWC(w_,c_,'%s_week%s_stop_DIFF'%(prefix,week+1))
def generatePenalizeWeeks (cname, prefix, diff): p = PostGetter () minWeek = p.getMinWeekForCourse (cname) maxWeek = p.getMaxWeekForCourse (cname) for week in range(minWeek, maxWeek): curr = week next = week+1 currNumPosts = p.getNumPosts(cname, curr) nextNumPosts = p.getNumPosts(cname, next) if (currNumPosts < 20 or nextNumPosts < 20): continue currText = p.getWeeklyDataForCourseAsString(cname, curr) nextText = p.getWeeklyDataForCourseAsString(cname, next) smartStopWords = True (currWordCountDict, currw_, currc_) = getWordCounts(currText,'%s_week%s_stop'%(prefix,curr),'%s_week%s'%(prefix, curr), smartStopWords) (nextWordCountDict, nextw_, nextc_) = getWordCounts(nextText,'%s_week%s_stop'%(prefix,next),'%s_week%s'%(prefix, next), smartStopWords) penalty = 0.5 diffDict = weightWords (currWordCountDict, nextWordCountDict , penalty) (w_,c_)=dictToArray (diffDict) generateWC(w_,c_,'%s_week%s_stop_DIFFPENALTY%s'%(prefix,week+1,diff))
def generateDiffConfigurableWeeks (cname, prefix, diff): p = PostGetter () minWeek = p.getMinWeekForCourse (cname) maxWeek = p.getMaxWeekForCourse (cname) for week in range(minWeek, maxWeek): curr = week next = week+diff if next > maxWeek: break currNumPosts = p.getNumPosts(cname, curr) nextNumPosts = p.getNumPosts(cname, next) if (currNumPosts < 20 or nextNumPosts < 20): continue nextText = p.getWeeklyDataForCourseAsString (cname, next) smartStopWords = True (nextWordCountDict, nextw_, nextc_) = getWordCounts(nextText,'%s_week%s_stop'%(prefix,next),'%s_week%s'%(prefix, next), smartStopWords) for i in range (curr, next): currText = p.getWeeklyDataForCourseAsString(cname, i) (currWordCountDict, currw_, currc_) = getWordCounts(currText,'%s_week%s_stop'%(prefix,i),'%s_week%s'%(prefix, i), smartStopWords) diffDict = getDiff (currWordCountDict, nextWordCountDict) nextWordCountDict = diffDict (w_,c_)=dictToArray (diffDict) generateWC(w_,c_,'%s_week%s_stop_DIFFCONF%s'%(prefix,week+1,diff))