def duygu_analizi(): file2 = open('Duyuru.txt', 'r', encoding='utf-8') file5 = open('Duyuru_Analiz.txt', 'w', encoding='utf-8') for duyuru in file2.readlines(): blob1 = TextBlob(duyuru) try: blob_eng = blob1.translate(to="en") file5.write(str(blob1.strip())+"\ \n(Kutupluk: " +str(blob_eng.sentiment.polarity)+ \ " Öznellik: "+str(blob_eng.sentiment.subjectivity)+')\n') except: continue
def preprocessing(tweet, sentiment): global count tweet = re.sub('(www\.[^\s]+)', '', str(tweet)) # remove url tweet = re.sub(r'https?:\/\/.*\/\w*', '', str(tweet)) # remove hyperlink tweet = re.sub(r'&\w*', '', str(tweet)) #remove & tweet = re.sub('@[^\s]+', '', tweet) #remove @ tweet = re.sub(r'#\w*', '', str(tweet)) #remove hashtags tweet = re.sub(r'\$\w*', '', str(tweet)) # Remove tickers tweet = tweet.strip( ' ') #remove white spaces from the front and end of a string tweet = tweet.lower() # remove upper case negations_dic = { "isn't": "is not", "aren't": "are not", "wasn't": "was not", "weren't": "were not", "haven't": "have not", "hasn't": "has not", "hadn't": "had not", "won't": "will not", "wouldn't": "would not", "don't": "do not", "doesn't": "does not", "didn't": "did not", "can't": "can not", "couldn't": "could not", "shouldn't": "should not", "mightn't": "might not", "mustn't": "must not" } t = re.compile(r'\b(' + '|'.join(negations_dic.keys()) + r')\b') tweet = t.sub(lambda x: negations_dic[x.group()], str(tweet)) tweet = re.sub('[^a-zA-Z]', ' ', str(tweet)) # take alphabet only tweet = TextBlob(tweet).correct() tweet = re.sub('[\s]+', ' ', str(tweet)) #Remove additional white spaces tweet = tweet.strip( ' ') #remove white spaces from the front and end of a string tweet = tweet.split() ps = PorterStemmer() #removal of suffices, like “ing”, “ly”, “s”, etc tweet = ' '.join(tweet) length = len(tweet.split()) if length != 0: fp2.writelines(sentiment + '\n') fp1.writelines(tweet + '\n')
from textblob import TextBlob with open("SentiPhraseNet/Train_Test_Unknown_Trigrams.txt",'r',encoding='utf-8') as f1: with open("dynmi_pos_tri.txt",'a',encoding='utf-8') as f2: with open("dynmi_neg_tri.txt",'a',encoding='utf-8') as f3: with open("dynmi_neu_tri.txt",'a',encoding='utf-8') as f4: for line in f1: blob = TextBlob(line) tran_blob = blob.translate(to = 'en' ) pol = tran_blob.sentiment.polarity print(blob,tran_blob.sentiment.polarity) if(pol>0.0): #print(str(blob)) f2.write(str(blob.strip('\n'))+"\t"+(str(tran_blob))+"\t"+str(pol)+"\n") elif(pol<0.0): #print(str(blob)) f3.write(str(blob.strip('\n'))+"\t"+(str(tran_blob))+"\t"+str(pol)+"\n") else: f4.write(str(blob.strip('\n'))+"\t"+(str(tran_blob))+"\t"+str(pol)+"\n")
def answer(input,filename): temp=[] contents = [] cindex = -1 if(filename): with open(filename,'r') as f: data_in = f.read() else: data_in = input data_in="1.0 Placeholder\n"+data_in data_in = re.sub('\n{2,}','\n',data_in) if re.search('Summary',data_in): temp_summ = re.search('Summary',data_in).start() data_in = data_in[:temp_summ] # print(data_in) if re.search('[0-9]{1,2}\.1',data_in): temp_summ = re.search('[0-9]{1,2}\.1',data_in).start() data_in = data_in[temp_summ:] # print(data_in) data_in = re.sub(r"\b(FIG)\.\s[0-9\.]{2,}s?\b", "",data_in,flags=re.IGNORECASE) data_in = re.sub("(^|\s)(I|(II)|(III)|(IV)|(V)|(VI)|(VII)|(VIII)|(IX)|X)($|\s)","",data_in,flags=re.IGNORECASE) paras = data_in.split("\n") paras = [i.strip() for i in paras if i is not ""] for ele in paras: # print(ele) if re.search("^[0-9]{1,2}\.[0-9]",ele): # print(ele) cindex+=1 contents.append([]) else: if cindex==0: continue contents[cindex].append(ele) if(input): contents = [] contents.append([input]) # print(contents) for text in contents: if(input): d=nlp(str(input)) temp_text = str(input) # print(d) land=[(x.text,x.label_) for x in d.ents] final=[] for iii in land: if iii[1]=="PERSON" or iii[1]=="LAW": final.append(iii[0]) for i in final: # print(temp_text) toappend_text=temp_text.replace(i,"____________"), # print(temp_text[0]) temp.append((toappend_text[0],i)) global output output=set() if len(text)==0: continue text=' '.join(text) text=re.sub(r'\’',r'',text) text=text.lower() text = TextBlob(text) par_text=TextBlob("") for sentence in text.sentences: par_text+=paraphrase(sentence) par_text+=" " par_text=par_text.strip() par_text=str(par_text) r=Rake() r.extract_keywords_from_text(par_text) par_text=TextBlob(par_text) #print(r.get_ranked_phrases()) counter=0 for i in r.get_ranked_phrases(): counter+=1 if counter>20: break for sentence in par_text.sentences: if i in sentence: pos(sentence,i) count=0 for i in output: count+=1 temp.append(i) # print(i) return temp
def Getsearch(self): auth = OAuthHandler(self.API_KEY, self.API_SECRET_KEY) auth.set_access_token(self.ACESS_TOKEN_KEY, self.ACESS_TOKEN_SECRET_KEY) api = tweepy.API(auth) searchTerm = str(self.search1.get()) NoOfTerms = int(self.search2.get()) tweets = tweepy.Cursor(api.search, q=searchTerm, lang="en").items(NoOfTerms) fp1 = open("tweets.csv", 'w') count = 1 xx = [] count = 1 for tweet in tweets: xx.append(tweet.text) for tweet in xx: tweet = re.sub('(www\.[^\s]+)', '', str(tweet)) # remove url tweet = re.sub(r'https?:\/\/.*\/\w*', '', str(tweet)) # remove hyperlink tweet = re.sub(r'&\w*', '', str(tweet)) #remove & tweet = re.sub('@[^\s]+', '', str(tweet)) #remove @ tweet = re.sub( r'#\w*', '', str(tweet) ) #remove hashtags tweet = re.sub(r'\$\w*', '', str(tweet)) # Remove tickers tweet = tweet.strip( ' ') #remove white spaces from the front and end of a string tweet = tweet.lower() # remove upper case negations_dic = { "isn't": "is not", "aren't": "are not", "wasn't": "was not", "weren't": "were not", "haven't": "have not", "hasn't": "has not", "hadn't": "had not", "won't": "will not", "wouldn't": "would not", "don't": "do not", "doesn't": "does not", "didn't": "did not", "can't": "can not", "couldn't": "could not", "shouldn't": "should not", "mightn't": "might not", "mustn't": "must not" } t = re.compile(r'\b(' + '|'.join(negations_dic.keys()) + r')\b') tweet = t.sub(lambda x: negations_dic[x.group()], str(tweet)) tweet = re.sub('[^a-zA-Z]', ' ', str(tweet)) # take alphabet only tweet = TextBlob(tweet).correct() tweet = re.sub(r'\b\w{1,2}\b', '', str(tweet)) # Remove words with 2 or fewer letters tweet = re.sub('[\s]+', ' ', str(tweet)) #Remove additional white spaces tweet = tweet.strip( ' ') #remove white spaces from the front and end of a string tweet = tweet.split() ps = PorterStemmer( ) #removal of suffices, like “ing”, “ly”, “s”, etc tweet = str(' '.join(tweet)) tweetx = "\n" + tweet + "\n" self.TxtBox.insert(0.0, tweetx) count = count + 1 fp1.writelines(tweet + '\n') fp1.close()