Beispiel #1
0
def buildTestData():
    xtest = []
    sp = SenticPhrase('')

    for i, data in enumerate(testDS):
        data1 = []

        lexScore = 0
        window = 5
        atPos = int(testSents[i][1].rsplit('-', maxsplit=1)[1])
        for lind in range(atPos - window, atPos):
            if lind >= 0:
                if data[1][lind] in posLex:
                    lexScore += 1
                elif data[1][lind] in negLex or data[1][lind] in negWords:
                    lexScore -= 1
        
        for lind in range(atPos + 1, atPos + window + 1):
            if lind >= len(data[1]):
                break
            else:
                if data[1][lind] in posLex:
                    lexScore += 1
                elif data[1][lind] in negLex or data[1][lind] in negWords:
                    lexScore -= 1         

        senScore = sp.get_polarity(testSents[i][0])        
        protoVec = np.append(testXtidf[i].A[0], [lexScore, senScore])
        
        xtest.append(protoVec)

    xtest = np.array(xtest)
    return xtest
Beispiel #2
0
def buildTrainingData():
    X1 = []
    yForSk = []

    sp = SenticPhrase('')

    for i, data in enumerate(dataset):
        data1 = []

        # creating lexicon score with window of 5 to the left and right of aspect
        lexScore = 0
        window = 2
        atPos = int(sentences[i][1].rsplit('-', maxsplit=1)[1])
        for lind in range(atPos - window, atPos):
            if lind >= 0:
                if data[1][lind] in posLex:
                    lexScore += 1
                elif data[1][lind] in negLex or data[1][lind] in negWords:
                    lexScore -= 1
        
        for lind in range(atPos + 1, atPos + window + 1):
            if lind >= len(data[1]):
                break
            else:
                if data[1][lind] in posLex:
                    lexScore += 1
                elif data[1][lind] in negLex or data[1][lind] in negWords:
                    lexScore -= 1
        
        senScore = sp.get_polarity(sentences[i][0])        
        protoVec = np.append(Xtidf[i].A[0], [lexScore, senScore])
        
        X1.append(protoVec)
        yForSk.append(data[-1])

    X1 = np.array(X1)
    yForSk = np.array(yForSk)
    return X1, yForSk
def sentyment(wordlist):
    sp = SenticPhrase(wordlist)
    if sp.info(wordlist)['sentiment'] == 'strong negative':
        text_sentiment = -2
    elif sp.info(wordlist)['sentiment'] == 'weak negative':
        text_sentiment = -1
    elif sp.info(wordlist)['sentiment'] == 'neutral':
        text_sentiment = 0
    elif sp.info(wordlist)['sentiment'] == 'weak positive':
        text_sentiment = 1
    elif sp.info(wordlist)['sentiment'] == 'strong positive':
        text_sentiment = 2
    return text_sentiment
Beispiel #4
0
def sentyki(wordlist):
    sp = SenticPhrase(wordlist)
    try:
        text_aptitude = sp.info(wordlist)['sentics']['aptitude']
    except KeyError:
        text_aptitude = 0
    try:
        text_pleasantness = sp.info(wordlist)['sentics']['pleasantness']
    except KeyError:
        text_pleasantness = 0
    try:
        text_attention = sp.info(wordlist)['sentics']['attention']
    except KeyError:
        text_attention = 0
    try:
        text_sensitivity = sp.info(wordlist)['sentics']['sensitivity']
    except KeyError:
        text_sensitivity = 0
    return text_aptitude, text_attention, text_pleasantness, text_sensitivity
            results[1] = results[1].union(
                set([ancestor.lemma_.lower() for ancestor in token.ancestors if len(ancestor) > 2]).union(
                    set([child.lemma_.lower() for child in token.head.children if child.text != token.text and len(child) > 2])
                )
            )
            results[4] += 1
        elif token.lemma_.lower() in hedging_words:
            results[2] = results[1].union(
                set([ancestor.lemma_.lower() for ancestor in token.ancestors if len(ancestor) > 2]).union(
                    set([child.lemma_.lower() for child in token.head.children if child.text != token.text and len(child) > 2])
                )
            )
            results[5] += 1
    return tuple(results)

sp = SenticPhrase("Hello, World!")

def get_sentics(sent):
    """
        input: Spacy processed sentence
        output: a tuple containing the polarity score and a list of sentic values 
            (pleasantness, attention, sensitiviy, aptitude )
    """
    info = sp.info(sent)
          
    # Sometimes sentic doesn't returns any sentics values, seems to be only when purely neutral. 
    # Some sort of tag to make sure this is true could help with classiciation! (if all 0's not enough)
    sentics = {"pleasantness":0, "attention":0, "sensitivity":0, "aptitude":0}
    sentics.update(info["sentics"])
    return [info['polarity'], sentics['aptitude'], sentics['attention'], sentics['sensitivity'], sentics['pleasantness']]
Beispiel #6
0
                            padding=pad,
                            truncating=pad)
x_test_pad = pad_sequences(x_test_tokens,
                           maxlen=max_tokens,
                           padding=pad,
                           truncating=pad)

x_train_pad[0]

# In[11]:

#Create a polarity Array

x_train_polarityScore = []
for comment in x_train:
    comment = SenticPhrase(comment)
    x_train_polarityScore.append(comment.get_polarity())

x_train_polarityScore = np.array(x_train_polarityScore)

# In[12]:

x_test_polarityScore = []
for comment_test in x_test:
    comment_test = SenticPhrase(comment_test)
    try:
        x_test_polarityScore.append(comment_test.get_polarity())
    except:
        x_test_polarityScore.append(0)

x_test_polarityScore = np.array(x_test_polarityScore)
            else:
                dict[name] += 1
        for i in range(1,len(crew1)):
            for name in crew1[i]:
                if name not in dict:
                    dict[name] = 1
                else:
                    dict[name] += 1
        crew1 = []
        for key in dict:
            if dict[key]>=1:
                crew1.append(key)
        crew.append(crew1)

        for i in range(len(movie_content)):
            sp = SenticPhrase(movie_content[i])
            pol += sp.get_polarity()
            n += 1
        polarity.append(pol/n)
    else:
        polarity.append(0.00)
        crew.append([])
print(crew)
print("\n\n",len(crew),len(title))
print('\n\n\n\n\n\t\t\t\t\t\t\t\tMOVIE REVIEW')
for i in range(len(title)):
    print('Title:    ',title[i])
    print('Stars:    ',stars[i])
    print('Polarity: ',polarity[i])
    print('Crew:\n')
    for name in crew[i]: