Exemple #1
0
def duygu_analizi():
    file2 = open('Duyuru.txt', 'r', encoding='utf-8')
    file5 = open('Duyuru_Analiz.txt', 'w', encoding='utf-8')
    for duyuru in file2.readlines():
        blob1 = TextBlob(duyuru)
        try:
            blob_eng = blob1.translate(to="en")
            file5.write(str(blob1.strip())+"\
                    \n(Kutupluk: "                               +str(blob_eng.sentiment.polarity)+ \
                      " Öznellik: "+str(blob_eng.sentiment.subjectivity)+')\n')
        except:
            continue
Exemple #2
0
def preprocessing(tweet, sentiment):

    global count
    tweet = re.sub('(www\.[^\s]+)', '', str(tweet))  # remove url
    tweet = re.sub(r'https?:\/\/.*\/\w*', '', str(tweet))  # remove hyperlink
    tweet = re.sub(r'&\w*', '', str(tweet))  #remove &amp
    tweet = re.sub('@[^\s]+', '', tweet)  #remove @
    tweet = re.sub(r'#\w*', '', str(tweet))  #remove hashtags
    tweet = re.sub(r'\$\w*', '', str(tweet))  # Remove tickers
    tweet = tweet.strip(
        ' ')  #remove white spaces from the front and end of a string
    tweet = tweet.lower()  # remove upper case
    negations_dic = {
        "isn't": "is not",
        "aren't": "are not",
        "wasn't": "was not",
        "weren't": "were not",
        "haven't": "have not",
        "hasn't": "has not",
        "hadn't": "had not",
        "won't": "will not",
        "wouldn't": "would not",
        "don't": "do not",
        "doesn't": "does not",
        "didn't": "did not",
        "can't": "can not",
        "couldn't": "could not",
        "shouldn't": "should not",
        "mightn't": "might not",
        "mustn't": "must not"
    }
    t = re.compile(r'\b(' + '|'.join(negations_dic.keys()) + r')\b')
    tweet = t.sub(lambda x: negations_dic[x.group()], str(tweet))
    tweet = re.sub('[^a-zA-Z]', ' ', str(tweet))  # take alphabet only
    tweet = TextBlob(tweet).correct()
    tweet = re.sub('[\s]+', ' ', str(tweet))  #Remove additional white spaces
    tweet = tweet.strip(
        ' ')  #remove white spaces from the front and end of a string
    tweet = tweet.split()
    ps = PorterStemmer()  #removal of suffices, like “ing”, “ly”, “s”, etc
    tweet = ' '.join(tweet)

    length = len(tweet.split())

    if length != 0:

        fp2.writelines(sentiment + '\n')
        fp1.writelines(tweet + '\n')
Exemple #3
0
from textblob import TextBlob
with open("SentiPhraseNet/Train_Test_Unknown_Trigrams.txt",'r',encoding='utf-8') as f1:
   with open("dynmi_pos_tri.txt",'a',encoding='utf-8') as f2:
      with open("dynmi_neg_tri.txt",'a',encoding='utf-8') as f3:
           with open("dynmi_neu_tri.txt",'a',encoding='utf-8') as f4:
     
            for line in f1:
            
               blob = TextBlob(line)
               tran_blob = blob.translate(to = 'en' )
               pol = tran_blob.sentiment.polarity
               print(blob,tran_blob.sentiment.polarity)
               if(pol>0.0):
                   #print(str(blob))
                   f2.write(str(blob.strip('\n'))+"\t"+(str(tran_blob))+"\t"+str(pol)+"\n")
               elif(pol<0.0):
                   #print(str(blob))
                   f3.write(str(blob.strip('\n'))+"\t"+(str(tran_blob))+"\t"+str(pol)+"\n")
               else:
                   f4.write(str(blob.strip('\n'))+"\t"+(str(tran_blob))+"\t"+str(pol)+"\n")
       
Exemple #4
0
def answer(input,filename):
    temp=[]
    contents = []
    cindex = -1
    if(filename):
        with open(filename,'r') as f:
            data_in = f.read()
    else:
        data_in = input
        data_in="1.0 Placeholder\n"+data_in
    
    data_in = re.sub('\n{2,}','\n',data_in)
    if re.search('Summary',data_in):
        temp_summ = re.search('Summary',data_in).start()
        data_in = data_in[:temp_summ]
    # print(data_in)
    if re.search('[0-9]{1,2}\.1',data_in):
        temp_summ = re.search('[0-9]{1,2}\.1',data_in).start()
        data_in = data_in[temp_summ:]

    # print(data_in)

    data_in = re.sub(r"\b(FIG)\.\s[0-9\.]{2,}s?\b", "",data_in,flags=re.IGNORECASE)
    data_in = re.sub("(^|\s)(I|(II)|(III)|(IV)|(V)|(VI)|(VII)|(VIII)|(IX)|X)($|\s)","",data_in,flags=re.IGNORECASE)

    
    paras = data_in.split("\n")
    paras = [i.strip() for i in paras if i is not ""]

    for ele in paras:
        # print(ele)
        if re.search("^[0-9]{1,2}\.[0-9]",ele):
            # print(ele)
            cindex+=1
            contents.append([])
        else:
            if cindex==0:
                continue
            contents[cindex].append(ele)

    if(input):
        contents = []
        contents.append([input])
        # print(contents)

    for text in contents:
        if(input):
            d=nlp(str(input))
            temp_text = str(input)
            # print(d)
            land=[(x.text,x.label_) for x in d.ents]
            final=[]
            for iii in land:
                if iii[1]=="PERSON" or iii[1]=="LAW":
                    final.append(iii[0])
            for i in final:
                # print(temp_text)
                toappend_text=temp_text.replace(i,"____________"),
                # print(temp_text[0])
                temp.append((toappend_text[0],i))
        global output
        output=set()
        if len(text)==0:
            continue
        text=' '.join(text)
        text=re.sub(r'\’',r'',text)
        text=text.lower()
        text = TextBlob(text)
        par_text=TextBlob("")
        for sentence in text.sentences:
            par_text+=paraphrase(sentence)
            par_text+=" "

        par_text=par_text.strip()
        par_text=str(par_text)
        r=Rake()
        r.extract_keywords_from_text(par_text)
        par_text=TextBlob(par_text)
        #print(r.get_ranked_phrases())
        counter=0
        for i in r.get_ranked_phrases():
            counter+=1
            if counter>20:
                break
            for sentence in par_text.sentences:
                if i in sentence:
                    pos(sentence,i)

        count=0
        for i in output:
            count+=1
            temp.append(i)
            # print(i)
    return temp
Exemple #5
0
    def Getsearch(self):

        auth = OAuthHandler(self.API_KEY, self.API_SECRET_KEY)
        auth.set_access_token(self.ACESS_TOKEN_KEY,
                              self.ACESS_TOKEN_SECRET_KEY)
        api = tweepy.API(auth)
        searchTerm = str(self.search1.get())
        NoOfTerms = int(self.search2.get())
        tweets = tweepy.Cursor(api.search, q=searchTerm,
                               lang="en").items(NoOfTerms)
        fp1 = open("tweets.csv", 'w')
        count = 1
        xx = []
        count = 1
        for tweet in tweets:
            xx.append(tweet.text)
        for tweet in xx:
            tweet = re.sub('(www\.[^\s]+)', '', str(tweet))  # remove url
            tweet = re.sub(r'https?:\/\/.*\/\w*', '',
                           str(tweet))  # remove hyperlink
            tweet = re.sub(r'&\w*', '', str(tweet))  #remove &amp
            tweet = re.sub('@[^\s]+', '', str(tweet))  #remove @
            tweet = re.sub(
                r'#\w*', '', str(tweet)
            )  #remove hashtags tweet = re.sub(r'\$\w*', '', str(tweet))   # Remove tickers
            tweet = tweet.strip(
                ' ')  #remove white spaces from the front and end of a string
            tweet = tweet.lower()  # remove upper case
            negations_dic = {
                "isn't": "is not",
                "aren't": "are not",
                "wasn't": "was not",
                "weren't": "were not",
                "haven't": "have not",
                "hasn't": "has not",
                "hadn't": "had not",
                "won't": "will not",
                "wouldn't": "would not",
                "don't": "do not",
                "doesn't": "does not",
                "didn't": "did not",
                "can't": "can not",
                "couldn't": "could not",
                "shouldn't": "should not",
                "mightn't": "might not",
                "mustn't": "must not"
            }
            t = re.compile(r'\b(' + '|'.join(negations_dic.keys()) + r')\b')
            tweet = t.sub(lambda x: negations_dic[x.group()], str(tweet))
            tweet = re.sub('[^a-zA-Z]', ' ', str(tweet))  # take alphabet only
            tweet = TextBlob(tweet).correct()

            tweet = re.sub(r'\b\w{1,2}\b', '',
                           str(tweet))  # Remove words with 2 or fewer letters
            tweet = re.sub('[\s]+', ' ',
                           str(tweet))  #Remove additional white spaces
            tweet = tweet.strip(
                ' ')  #remove white spaces from the front and end of a string
            tweet = tweet.split()
            ps = PorterStemmer(
            )  #removal of suffices, like “ing”, “ly”, “s”, etc
            tweet = str(' '.join(tweet))

            tweetx = "\n" + tweet + "\n"
            self.TxtBox.insert(0.0, tweetx)
            count = count + 1
            fp1.writelines(tweet + '\n')
        fp1.close()