for key in accum_term.keys(): num_pos=0 num_neg =0 adjusted_score=0 term_value=0 total_sum =0 for score in accum_term[key]: total_sum = total_sum + score term_value = (total_sum)/len(accum_term[key]) term_value=int(term_value) add_to_senti_file[key]=term_value #add to new words to database or file with open ('new_words.txt','w') as f: for key,value in add_to_senti_file.items(): f.write(key+'\t'+str(value)+'\n') #f.write('\t'.join(tupl)) return score,actual_tweet,tweet_id res=mongo.load_from_mongo('input','data') for i in res: a,b,c= sentiment_of_tweet(i,abb_list,sentiment) g={} g['_id']= c g['text'] = b g['sentiment'] = a mongo.save_to_mongo(g,'output_final','with_ourscores')
import requests,re import urllib2 from requests.auth import HTTPProxyAuth from bs4 import BeautifulSoup from mongo import save_to_mongo proxy = {'http': 'http://192.168.1.103:3128'} auth=HTTPProxyAuth('ipg_2011101','chitti chinni') url = "http://en.wikipedia.org/wiki/List_of_emoticons" print url r=requests.get(url,proxies=proxy,auth=auth) soup = BeautifulSoup(r.content) table = soup.find_all('table',{'class':'wikitable'}) rows = table[0].find_all('td') for i in range(0,len(rows),2): a=rows[i].text.encode('utf-8') b=rows[i+1].text b = re.sub(r'\[','',b) b = re.sub(r'\]','',b) b = re.sub(r'\d','',b) print b a=a.decode('utf-8').split() g={} g['icons']= a g['meaning'] = b save_to_mongo(g,'emoticons','list')
import requests, re import urllib2 from requests.auth import HTTPProxyAuth from bs4 import BeautifulSoup from mongo import save_to_mongo proxy = {'http': 'http://192.168.1.103:3128'} auth = HTTPProxyAuth('ipg_2011101', 'chitti chinni') url = "http://en.wikipedia.org/wiki/List_of_emoticons" print url r = requests.get(url, proxies=proxy, auth=auth) soup = BeautifulSoup(r.content) table = soup.find_all('table', {'class': 'wikitable'}) rows = table[0].find_all('td') for i in range(0, len(rows), 2): a = rows[i].text.encode('utf-8') b = rows[i + 1].text b = re.sub(r'\[', '', b) b = re.sub(r'\]', '', b) b = re.sub(r'\d', '', b) print b a = a.decode('utf-8').split() g = {} g['icons'] = a g['meaning'] = b save_to_mongo(g, 'emoticons', 'list')
import csv from mongo import save_to_mongo from mongo import load_from_mongo fp = open("E:\\7th_Sem\\senti\\sanders-twitter-0.2\\sanders-twitter-0.2\\full-corpus.csv", "r") reader = csv.reader(fp, delimiter=",", quotechar='"', escapechar="\\") tweets = [] count = 0 for row in reader: count += 1 g = {} g["category"] = row[0] g["sentiment"] = row[1] g["_id"] = count g["created_at"] = row[3] g["text"] = row[4] save_to_mongo(g, "input", "data")
regex = re.compile(r"not\b") if regex.search(tweet): negated = True tweet = re.sub(r"not\b", "", tweet) #preprocessing may increase accuracy tweet_id = text['_id'] tweet = re.sub(r"(?:\@|https?\://)\S+", "", tweet) tweet = re.sub(r"\b\d+\b", "", tweet) tweet = tweet.strip().lower() word_list = tweet.split() for k in word_list: if any(k == s for s in pos_words): score = score + 1 elif any(k == s for s in neg_words): score = score - 1 if negated: score = -score return score, actual_text, tweet_id res = mongo.load_from_mongo('input', 'data') for i in res: a, b, c = sentiment(i, abb_list) g = {} g['_id'] = c g['text'] = b g['sentiment'] = a mongo.save_to_mongo(g, 'output_final', 'without_scores')
from mongo import load_from_mongo from mongo import save_to_mongo stopwords_file='stopwords.txt' stop_lis=[] def stopwords_list(filename): with open(filename,'r') as f: for line in f: line=line.replace('\n','') stop_lis.append(line) stopwords_list(stopwords_file) def remove_stopwords(text): text = ' '.join([word for word in text.split() if word not \ in stop_lis]) return text docs_before=load_from_mongo("hindu","docs1") for each in docs_before: each["text"]=remove_stopwords(each["text"]) save_to_mongo(each,"hindu_modified","docs1")
import csv from mongo import save_to_mongo from mongo import load_from_mongo fp = open( 'E:\\7th_Sem\\senti\\sanders-twitter-0.2\\sanders-twitter-0.2\\full-corpus.csv', 'r') reader = csv.reader(fp, delimiter=',', quotechar='"', escapechar='\\') tweets = [] count = 0 for row in reader: count += 1 g = {} g['category'] = row[0] g['sentiment'] = row[1] g['_id'] = count g['created_at'] = row[3] g['text'] = row[4] save_to_mongo(g, 'input', 'data')
import requests import urllib2 from requests.auth import HTTPProxyAuth from bs4 import BeautifulSoup from mongo import save_to_mongo proxy = {'http': 'http://192.168.1.103:3128'} auth = HTTPProxyAuth('ipg_2011101', 'chitti chinni') lis = [ 1, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ] for i in lis: url = "http://www.noslang.com/dictionary/%s/" % (i) print url r = requests.get(url, proxies=proxy, auth=auth) soup = BeautifulSoup(r.content) table = soup.find_all('table', {'width': "768"}) tab = table[0].find_all('td', {'width': '608'}) lst1 = tab[0].find_all('dt') lst2 = tab[0].find_all('dd') for i, j in zip(lst1, lst2): g = {} g[i.text] = j.text save_to_mongo(g, 'abbrivations', 'list')