import numpy as np import nltk # for pos tags import features import polarity import ngramGenerator import preprocessing KERNEL_FUNCTION='linear' C_PARAMETER=0.6 print "Initializing dictionnaries" stopWords = preprocessing.getStopWordList('../resources/stopWords.txt') slangs = preprocessing.loadSlangs('../resources/internetSlangs.txt') afinn=polarity.loadAfinn('../resources/afinn.txt') #sentiWordnet=polarity.loadSentiWordnet('../resources/sentiWordnetBig.csv') emoticonDict=features.createEmoticonDictionary("../resources/emoticon.txt") print "Bulding Bag of words ..." positive=ngramGenerator.mostFreqList('../data/used/positive1.csv',3000) negative=ngramGenerator.mostFreqList('../data/used/negative1.csv',3000) neutral=ngramGenerator.mostFreqList('../data/used/neutral1.csv',3000) for w in positive: if w in negative+neutral : positive.remove(w)
import preprocessing import sys # to suppress warning - https://stackoverflow.com/questions/14463277/how-to-disable-python-warnings import warnings if not sys.warnoptions: warnings.simplefilter("ignore") stopWords = preprocessing.getStopWordList('../resources/stopWords.txt') slangs = preprocessing.loadSlangs('../resources/internetSlangs.txt') f = open('../data/sem/positive.tsv', 'r') fo = open('../data/positive_processed.csv', 'w') line = f.readline() while line: a = line.split('\t') b = a[len(a) - 1][:-1] c = preprocessing.processTweet(b, stopWords, slangs) d = preprocessing.removeStopWords(c, stopWords) fo.write(d + '\n') line = f.readline() f.close() fo.close() print "positive samples processed" f = open('../data/sem/negative.tsv', 'r') fo = open('../data/negative_processed.csv', 'w')