예제 #1
0
def main(top_pct,pct_tweets,iterations):    
  print top_pct, pct_tweets, iterations
  p_pd, n_pd = sentParser.createSimpDict('sentmtListsm.txt',2)
  #pd = sentParser.createParseDict()
  #p_pd, n_pd = sentParser.createSimpDict('testsent.txt',2)
  
  tweets = tweetCleaner.cleanTweets('/scratch/nschult1/prunedTweets.txt')
  gs_test_tweets = tweets[1:50]
  train_tweets = tweets[50:]

  for i in range(iterations):

    p_twd , p_num_tweets = tweetParser(train_tweets,p_pd)
    n_twd , n_num_tweets = tweetParser(train_tweets,n_pd)

    thresh = top_pct
    thresh2 = pct_tweets

    p_pd = improvePD(p_twd,p_pd,p_num_tweets,thresh,thresh2,False)
    n_pd = improvePD(n_twd,n_pd,n_num_tweets,thresh,thresh2,True)

    pd = combine(p_pd, n_pd)
  
  f = open('newparser.txt','w')
  for key in pd:
    f.write('%s %f\n' %(key,pd[key]))
  f.close()

  total_score = tweetScorer.scorer(gs_test_tweets,pd,sentParser.createEDict(), True, True)
예제 #2
0
def main():
  txt='newparser.txt'
  txt2='sentmtListsm.txt'
  total_score=scorer('/scratch/nschult1/prunedTweets.txt', sentParser.createSimpDict(txt,1),
      sentParser.createEDict(), True, True)
  print "Total Score: ", total_score