Beispiel #1
0
###################################################################################33
# allTweets_file = open('tweets_negative.txt', 'r')
# allTweetsList = allTweets_file.readlines()

# i = 0
# for tweet in allTweetsList:
# sentence = Sentence(tweet)
# classifier.predict(sentence)
# if(sentence.labels[0].value == 'POSITIVE'):
# score = sentence.labels[0].score
# else:
# score = 0 - sentence.labels[0].score
# allTweetsFeatures[i].append(score)
# i += 1

for tweet in posTweetsFeatures:
    print(cosineSimilarity(tweet, positiveFeatures))
#######################################################################################3

#negTweets_afteraugment = augment('tweets_negative.txt', 'tweets_augment_neg.txt')
#negTweets_afteraugment = 'tweets_augment_neg.txt'
#negTweets_afterlemma = lemm(negTweets_afteraugment, 'tweets_augment_lemma_neg.txt')
#print('6')
#negTweetsFeatures = np.genfromtxt(fasttext(negTweets_afterlemma))
negTweetsFeatures = np.genfromtxt(fasttext('tweets_negative.txt'))
negTweetsFeatures = negTweetsFeatures.tolist()

for tweet in negTweetsFeatures:
    print(cosineSimilarity(tweet, positiveFeatures))
Beispiel #2
0
    aux_emotion_embeddings = get_emotion_embeddings('currentIterTweets.txt')

    i = 0
    for tweetFeature in aux_emotion_embeddings:
        for val in tweetFeature:
            currentIterFeat_withemotion[i].append(val)
        i+=1
        
    iterationLabels = []
    iterationTweets = []
    iterationTweets_withemotion = []
    positiveFeat = []
    negativeFeat = []
    count = 0
    for tweetFeature in currentIterFeat:
        meancosineSim = cosineSimilarity(tweetFeature, positiveTrainingFeatures) 
        #print(meancosineSim)
        #print(tweetFeature)
        #print(meancosineSim)
        if(meancosineSim > minSimilarityThreshold):
            iterationLabels.append(1.0)
            iterationTweets.append(tweetFeature)
            iterationTweets_withemotion.append(currentIterFeat_withemotion[count])
            positiveFeat.append(tweetFeature)
            logger.info(str(1) + ' ' +  str(meancosineSim) + ' ' + currentIterTweets[count])
            #print('yes')
            #print(iterationLabels)
        elif meancosineSim > minSimilarityThreshold - similarityWindowSize:
            #print('pass')
            logger.info(str(-1) + ' ' +  str(meancosineSim) +' ' + currentIterTweets[count])
            pass
 fout.close()
 #log and write to auxTweets
 
 #auxiliaryDataFile = fasttext('auxTweets.txt')
 #currentIterFeat = np.genfromtxt(auxiliaryDataFile)
 
 currentIterFeat = bert('auxTweets.txt', document_embeddings)
 
 iterationLabels = []
 iterationTweets = []
 #iterationTweets_bert = []
 positiveFeat = []
 negativeFeat = []
 count = 0
 for tweetFeature in currentIterFeat:
     poscosineSim = cosineSimilarity(tweetFeature, positiveTrainingFeatures) 
     negcosineSim = cosineSimilarity(tweetFeature, negativeTrainingFeatures)
     #print(meancosineSim)
     #print(tweetFeature)
     #print(meancosineSim)
     # if abs(poscosineSim - negcosineSim) < 0.05:
         # #print('pass')
         # logger.info(str(-1) + ' ' +  str(meancosineSim) +' ' + currentIterTweets[count])
         # pass
     if poscosineSim > negcosineSim:
         iterationLabels.append(1.0)
         iterationTweets.append(tweetFeature)
         #iterationTweets_bert.append(currentIterFeat_bert[count])
         positiveFeat.append(tweetFeature)
         logger.info(str(1) + ' ' +  str(poscosineSim) + ' ' + str(negcosineSim) + ' ' + currentIterTweets[count])
         #print('yes')