Exemple #1
0
def generateWeeklyWC (cname, prefix, smartStopWords):
  p = PostGetter()
  minWeek = p.getMinWeekForCourse(cname)
  maxWeek = p.getMaxWeekForCourse(cname)
  for week in range(minWeek, maxWeek+1):
    numPosts = p.getNumPosts(cname, week)
    if (numPosts < 20):
      continue
    text=p.getWeeklyDataForCourseAsString(cname, week)
    num = (week-minWeek)
    (wordcountDict, w_, c_)=getWordCounts(text,'%s_week%s_stop'%(prefix,week),'%s_week%s'%(prefix, week), smartStopWords)
    generateWC(w_,c_,'%s_week%s_stop'%(prefix,week))
Exemple #2
0
def generatePenalizeWeeklyWindows (cname, prefix, diff):
  p = PostGetter ()
  minWeek = p.getMinWeekForCourse (cname)
  maxWeek = p.getMaxWeekForCourse (cname)

  d = {}
  for week in range(minWeek, maxWeek):
    currText = p.getWeeklyDataForCourseAsString(cname, week)
    smartStopWords = True
    (currWordCountDict, currw_, currc_) = getWordCounts(currText,'%s_week%s_stop'%(prefix,week),'%s_week%s'%(prefix, week), smartStopWords)
    d[week] = currWordCountDict

  for week in range(minWeek+1, maxWeek):
    currWordCountDict = d[week]

    if week == 201412:
      print "CURRRRRRRRRRRRR"
      #raw_input('bar')
      print currWordCountDict
      #raw_input ('foo')

    ctr=diff
    for i in range (week-1, minWeek-1,-1) :
      ctr= ctr-1
      if (ctr == 0):
        break
      prevDict = d[i]
      diffDict = weightWords (prevDict, currWordCountDict, 0.5)
      currWordCountDict = diffDict
      print currWordCountDict
      if week == 201412:
        #raw_input ('aa')
        print currWordCountDict
        #raw_input ('aa')

    if week == 201412:
      print "CURRRRRRRRRRRRR"
      #raw_input('end')
      print currWordCountDict
      #raw_input ('end')
    
      
    (w_,c_)=dictToArray (diffDict)
    print w_
    print c_
    print diff

    if (len(w_) > 3): 
      generateWC(w_,c_,'%s_week%s_stop_DIFF_EXP_PENALTY_DIFF%s'%(prefix,week,diff))
Exemple #3
0
def main():
  p=PostGetter()
  #cname='Engineering/CVX101/Winter2014'
  #cname='Education/EDUC115N/How_to_Learn_Math'
  #cname = 'Medicine/HRP258/Statistics_in_Medicine'
  if len(sys.argv) < 2:
    print 'Usage: python wordcloud.py <cname> sample course names: Engineering/CVX101/Winter2014,Medicine/HRP258/Statistics_in_Medicine' 
    sys.exit(1)

  cname = sys.argv[1]

  text = p.getAllDataForCourseAsString(cname)
  print text
  smartStopWords = True

  #(wordcountDict,w_,c_)=getWordCounts(text,'wc_cvx_full_stop','wc_cvx_full', smartStopWords)
  #generateWC(w_,c_,'wc_cvx_full_stop')
  #generateWeeklyWC(cname, 'WEEKLYwc_cvx', smartStopWords)
  generateDiff (cname,'WEEKL_DIFF_lmath', smartStopWords)
Exemple #4
0
def generateDiff (cname, prefix, smartStopWords):    
  p = PostGetter()
  minWeek = p.getMinWeekForCourse(cname)
  maxWeek = p.getMaxWeekForCourse(cname)
  for week in range(minWeek, maxWeek):
    curr = week
    next = week+1
    currNumPosts = p.getNumPosts(cname, curr)
    nextNumPosts = p.getNumPosts(cname, next)
    if (currNumPosts < 20 or nextNumPosts < 20):
      continue
    currText = p.getWeeklyDataForCourseAsString(cname, curr)
    nextText = p.getWeeklyDataForCourseAsString(cname, next)

    (currWordCountDict, currw_, currc_) = getWordCounts(currText,'%s_week%s_stop'%(prefix,curr),'%s_week%s'%(prefix, curr), smartStopWords)
    (nextWordCountDict, nextw_, nextc_) = getWordCounts(nextText,'%s_week%s_stop'%(prefix,next),'%s_week%s'%(prefix, next), smartStopWords)
    diffDict = getDiff (currWordCountDict, nextWordCountDict)
    (w_,c_)=dictToArray (diffDict)
    generateWC(w_,c_,'%s_week%s_stop_DIFF'%(prefix,week+1))
Exemple #5
0
def generatePenalizeWeeks (cname, prefix, diff):
  p = PostGetter ()
  minWeek = p.getMinWeekForCourse (cname)
  maxWeek = p.getMaxWeekForCourse (cname)
  for week in range(minWeek, maxWeek):
    curr = week
    next = week+1
    currNumPosts = p.getNumPosts(cname, curr)
    nextNumPosts = p.getNumPosts(cname, next)
    if (currNumPosts < 20 or nextNumPosts < 20):
      continue
    currText = p.getWeeklyDataForCourseAsString(cname, curr)
    nextText = p.getWeeklyDataForCourseAsString(cname, next)
    smartStopWords = True

    (currWordCountDict, currw_, currc_) = getWordCounts(currText,'%s_week%s_stop'%(prefix,curr),'%s_week%s'%(prefix, curr), smartStopWords)
    (nextWordCountDict, nextw_, nextc_) = getWordCounts(nextText,'%s_week%s_stop'%(prefix,next),'%s_week%s'%(prefix, next), smartStopWords)
    penalty = 0.5
    diffDict = weightWords (currWordCountDict, nextWordCountDict , penalty)
    (w_,c_)=dictToArray (diffDict)
    generateWC(w_,c_,'%s_week%s_stop_DIFFPENALTY%s'%(prefix,week+1,diff))
Exemple #6
0
def generateDiffConfigurableWeeks (cname, prefix, diff):
  p = PostGetter ()
  minWeek = p.getMinWeekForCourse (cname)
  maxWeek = p.getMaxWeekForCourse (cname)
  for week in range(minWeek, maxWeek):
    curr = week
    next = week+diff
    if next > maxWeek:
      break
    currNumPosts = p.getNumPosts(cname, curr)
    nextNumPosts = p.getNumPosts(cname, next)
    if (currNumPosts < 20 or nextNumPosts < 20):
      continue

    nextText = p.getWeeklyDataForCourseAsString (cname, next)
    smartStopWords = True
    (nextWordCountDict, nextw_, nextc_) = getWordCounts(nextText,'%s_week%s_stop'%(prefix,next),'%s_week%s'%(prefix, next), smartStopWords)

    for i in range (curr, next):
      currText = p.getWeeklyDataForCourseAsString(cname, i)
      (currWordCountDict, currw_, currc_) = getWordCounts(currText,'%s_week%s_stop'%(prefix,i),'%s_week%s'%(prefix, i), smartStopWords)
      diffDict = getDiff (currWordCountDict, nextWordCountDict)
      nextWordCountDict = diffDict

    (w_,c_)=dictToArray (diffDict)
    generateWC(w_,c_,'%s_week%s_stop_DIFFCONF%s'%(prefix,week+1,diff))