for key in accum_term.keys():
        num_pos=0
        num_neg =0
        adjusted_score=0
        term_value=0
        total_sum =0
        for score in accum_term[key]:
            total_sum = total_sum + score
        term_value = (total_sum)/len(accum_term[key])
        term_value=int(term_value)
        add_to_senti_file[key]=term_value
    #add to new words to database or file
    with open ('new_words.txt','w') as f:
        for key,value in add_to_senti_file.items():
            f.write(key+'\t'+str(value)+'\n')
            #f.write('\t'.join(tupl))

    
    
    return score,actual_tweet,tweet_id
   
    
res=mongo.load_from_mongo('input','data')
for i in res:
    a,b,c= sentiment_of_tweet(i,abb_list,sentiment)
    g={}
    g['_id']= c
    g['text'] = b
    g['sentiment'] = a
    mongo.save_to_mongo(g,'output_final','with_ourscores')
import requests,re
import urllib2
from requests.auth import HTTPProxyAuth
from bs4 import BeautifulSoup
from mongo import save_to_mongo
proxy = {'http': 'http://192.168.1.103:3128'}
auth=HTTPProxyAuth('ipg_2011101','chitti   chinni')

url = "http://en.wikipedia.org/wiki/List_of_emoticons"
print url
r=requests.get(url,proxies=proxy,auth=auth)
soup = BeautifulSoup(r.content)
table = soup.find_all('table',{'class':'wikitable'})
rows = table[0].find_all('td')
for i in range(0,len(rows),2):
     a=rows[i].text.encode('utf-8')
     b=rows[i+1].text
     b = re.sub(r'\[','',b)
     b = re.sub(r'\]','',b)
     b = re.sub(r'\d','',b)
     print b
     a=a.decode('utf-8').split()
     g={}
     g['icons']= a
     g['meaning'] = b
     save_to_mongo(g,'emoticons','list')
Beispiel #3
0
import requests, re
import urllib2
from requests.auth import HTTPProxyAuth
from bs4 import BeautifulSoup
from mongo import save_to_mongo
proxy = {'http': 'http://192.168.1.103:3128'}
auth = HTTPProxyAuth('ipg_2011101', 'chitti   chinni')

url = "http://en.wikipedia.org/wiki/List_of_emoticons"
print url
r = requests.get(url, proxies=proxy, auth=auth)
soup = BeautifulSoup(r.content)
table = soup.find_all('table', {'class': 'wikitable'})
rows = table[0].find_all('td')
for i in range(0, len(rows), 2):
    a = rows[i].text.encode('utf-8')
    b = rows[i + 1].text
    b = re.sub(r'\[', '', b)
    b = re.sub(r'\]', '', b)
    b = re.sub(r'\d', '', b)
    print b
    a = a.decode('utf-8').split()
    g = {}
    g['icons'] = a
    g['meaning'] = b
    save_to_mongo(g, 'emoticons', 'list')
Beispiel #4
0
import csv
from mongo import save_to_mongo
from mongo import load_from_mongo

fp = open("E:\\7th_Sem\\senti\\sanders-twitter-0.2\\sanders-twitter-0.2\\full-corpus.csv", "r")
reader = csv.reader(fp, delimiter=",", quotechar='"', escapechar="\\")
tweets = []
count = 0
for row in reader:
    count += 1
    g = {}
    g["category"] = row[0]
    g["sentiment"] = row[1]
    g["_id"] = count
    g["created_at"] = row[3]
    g["text"] = row[4]
    save_to_mongo(g, "input", "data")
    regex = re.compile(r"not\b")
    if regex.search(tweet):
        negated = True
        tweet = re.sub(r"not\b", "", tweet)
    #preprocessing may increase accuracy

    tweet_id = text['_id']
    tweet = re.sub(r"(?:\@|https?\://)\S+", "", tweet)

    tweet = re.sub(r"\b\d+\b", "", tweet)
    tweet = tweet.strip().lower()
    word_list = tweet.split()
    for k in word_list:
        if any(k == s for s in pos_words):
            score = score + 1
        elif any(k == s for s in neg_words):
            score = score - 1
    if negated:
        score = -score
    return score, actual_text, tweet_id


res = mongo.load_from_mongo('input', 'data')
for i in res:
    a, b, c = sentiment(i, abb_list)
    g = {}
    g['_id'] = c
    g['text'] = b
    g['sentiment'] = a
    mongo.save_to_mongo(g, 'output_final', 'without_scores')
from mongo import load_from_mongo
from mongo import save_to_mongo




stopwords_file='stopwords.txt'
stop_lis=[]
def stopwords_list(filename):
    with open(filename,'r') as f:
        for line in f:
            line=line.replace('\n','')
            stop_lis.append(line)
stopwords_list(stopwords_file)


def remove_stopwords(text):
    text = ' '.join([word for word in text.split() if word not \
                     in stop_lis])
    return text


docs_before=load_from_mongo("hindu","docs1")
for each in docs_before:
    each["text"]=remove_stopwords(each["text"])
    save_to_mongo(each,"hindu_modified","docs1")
    
import csv
from mongo import save_to_mongo
from mongo import load_from_mongo
fp = open(
    'E:\\7th_Sem\\senti\\sanders-twitter-0.2\\sanders-twitter-0.2\\full-corpus.csv',
    'r')
reader = csv.reader(fp, delimiter=',', quotechar='"', escapechar='\\')
tweets = []
count = 0
for row in reader:
    count += 1
    g = {}
    g['category'] = row[0]
    g['sentiment'] = row[1]
    g['_id'] = count
    g['created_at'] = row[3]
    g['text'] = row[4]
    save_to_mongo(g, 'input', 'data')
Beispiel #8
0
import requests
import urllib2
from requests.auth import HTTPProxyAuth
from bs4 import BeautifulSoup
from mongo import save_to_mongo
proxy = {'http': 'http://192.168.1.103:3128'}
auth = HTTPProxyAuth('ipg_2011101', 'chitti   chinni')

lis = [
    1, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
    'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
]

for i in lis:
    url = "http://www.noslang.com/dictionary/%s/" % (i)
    print url

    r = requests.get(url, proxies=proxy, auth=auth)
    soup = BeautifulSoup(r.content)
    table = soup.find_all('table', {'width': "768"})
    tab = table[0].find_all('td', {'width': '608'})
    lst1 = tab[0].find_all('dt')
    lst2 = tab[0].find_all('dd')
    for i, j in zip(lst1, lst2):
        g = {}
        g[i.text] = j.text
        save_to_mongo(g, 'abbrivations', 'list')