Esempio n. 1
0
logger.info("Trial4 Hurricane Matthew -- emotion cv")
logger.info("\n")

#pre_trained_fasttext_model = "cc.en.bin.300"

trainingFile = 'train.txt'
trainLabelsFile = 'train_labels.txt'

validationFile = 'validation.txt'
validationLabelsFile = 'validation_labels.txt'

# Run run_fasttext.py to create feature vectors 
# Run run_process_vec.py to add commas to feature vectors

trainingResultFile = fasttext(trainingFile)
trainFeatures = np.genfromtxt(trainingResultFile)
trainFeatures = trainFeatures.tolist()
trainLabels = np.genfromtxt(trainLabelsFile)

trainFeatures_withemotion = np.genfromtxt(trainingResultFile)
trainFeatures_withemotion = trainFeatures_withemotion.tolist()

train_emotion_embeddings = get_emotion_embeddings(trainingFile)

i = 0
for tweetFeature in train_emotion_embeddings:
    for val in tweetFeature:
        trainFeatures_withemotion[i].append(val)
    i+=1
Esempio n. 2
0
import numpy as np
from numpy import genfromtxt
from logisticRegression_cv import lr
from run_fasttext import fasttext

train_source = np.genfromtxt(fasttext('train.txt'))
train_source_labels = np.genfromtxt('train_labels.txt')
validation_source = np.genfromtxt(fasttext('validation.txt'))
validation_source_labels = np.genfromtxt('validation_labels.txt')

# data_source = []
# for x in train_source:
# data_source.append(x)
# for x in validation_source:
# data_source.append(x)

# data_source_labels = []
# for y in train_source_labels:
# data_source_labels.append(y)
# for y in validation_source_labels:
# data_source_labels.append(y)

scores = lr(train_source, train_source_labels, validation_source,
            validation_source_labels)
print(scores)
Esempio n. 3
0
from numpy import genfromtxt
from cosineSim import cosineSimilarity
from run_fasttext import fasttext
#from augment_synonyms import augment
#from lemmatize import lemm

# from flair.models import TextClassifier
# from flair.data import Sentence
# classifier = TextClassifier.load('en-sentiment')

# positiveTweets_afteraugment = augment('positive_examples.txt', 'tweets_augment.txt')
# positiveTweets_afteraugment = 'positive_examples_augment.txt'

# positiveTweets_afterlemma = lemm(positiveTweets_afteraugment, 'positive_examples_augment_lemma.txt')
# positiveTweets_afterlemma = 'positive_examples_augment_lemma.txt'
positiveFeatures = np.genfromtxt(fasttext('positive_examples.txt'))
positiveFeatures = positiveFeatures.tolist()

##############################################################################
# positive_file = open('positive_examples.txt', 'r')
# positiveTweetList = positive_file.readlines()

# i = 0
# for tweet in positiveTweetList:
# sentence = Sentence(tweet)
# classifier.predict(sentence)
# if(sentence.labels[0].value == 'POSITIVE'):
# score = sentence.labels[0].score
# else:
# score = 0 - sentence.labels[0].score
# positiveFeatures[i].append(score)