def make_twitter_scores_lexicon(): polarities = {} for line in util.lines(constants.LEXICONS + "twitter/MaxDiff-Twitter-Lexicon/Maxdiff-Twitter-Lexicon_-1to1.txt"): info = line.split() if len(info[1].split()) > 1: continue polarities[info[1]] = float(info[0]) util.write_json(polarities, constants.PROCESSED_LEXICONS + 'twitter-scores.json')
def make_qwn_scores_lexicon(): polarities = collections.defaultdict(float) for line in util.lines(constants.LEXICONS + "qwn/turneyLittman_propSyn_08_mcr30-noAntGloss.dict"): info = line.split("\t") mod = float(info[3]) for word in info[2].split(", "): if not "_" in word: polarities[word.split("#")[0]] += mod util.write_json(polarities, constants.PROCESSED_LEXICONS + 'qwn-scores.json')
def make_kuperman_scores_lexicon(): polarities = {} for i, line in enumerate(util.lines(constants.LEXICONS + "kuperman/raw_ratings.csv")): if i == 0: continue info = line.split(",") if len(info[1].split()) == 1: polarities[info[1]] = float(info[2]) util.write_json(polarities, constants.PROCESSED_LEXICONS + 'kuperman.json')
def make_140_scores_lexicon(): polarities = {} for line in util.lines( constants.LEXICONS + "Sentiment140-Lexicon-v0.1/unigrams-pmilexicon.txt"): info = line.split() polarities[info[0]] = float(info[1]) util.write_json(polarities, constants.PROCESSED_LEXICONS + '140-scores.json')
def make_bingliu_lexicon(): polarities = {} for polarity in ['positive', 'negative']: for line in util.lines(constants.LEXICONS + 'bl_opinion_lexicon/{:}-words.txt' .format(polarity)): try: line = line.strip().encode('ascii', 'ignore') if len(line) == 0 or line[0] == ';': continue polarities[line] = 1 if polarity == 'positive' else -1 except: print("skipping", line) util.write_json(polarities, constants.PROCESSED_LEXICONS + 'bingliu.json')
def make_qwn_lexicon(): polarities = collections.defaultdict(float) for line in util.lines(constants.LEXICONS + "qwn/turneyLittman_propSyn_08_mcr30-noAntGloss.dict"): info = line.split("\t") if info[1] == "neg": mod = -1 else: mod = 1 for word in info[2].split(", "): if not "_" in word: polarities[word.split("#")[0]] += mod polarities = {word:np.sign(val) for word, val in polarities.iteritems() if val != 0} util.write_json(polarities, constants.PROCESSED_LEXICONS + 'qwn.json')
def make_mpqa_lexicon(): polarities = {} for line in util.lines(constants.LEXICONS + 'mpqa_subjectivity.txt'): split = line.strip().split() w = split[2].split("=")[1] polarity = split[-1].split("=")[1] if polarity == 'neutral': polarities[w] = 0 elif polarity == 'positive': polarities[w] = 1 else: polarities[w] = -1 util.write_json(polarities, constants.PROCESSED_LEXICONS + 'mpqa.json')
def make_finance_lexicon(): fp = open(constants.LEXICONS + "finance.csv") fp.readline() polarities = {} for line in fp: info = line.split(",") word = info[0].lower() if info[7] != '0': polarities[word] = -1 elif info[8] != '0': polarities[word] = 1 else: polarities[word] = 0 util.write_json(polarities, constants.PROCESSED_LEXICONS + "finance.json")
def make_concreteness_lexicon(top=75, bottom=25): raw_scores = {} fp = open(constants.LEXICONS + "concreteness/raw_ratings.csv") fp.readline() for line in fp: info = line.split(",") if len(info[0].split()) > 1: continue raw_scores[info[0]] = float(info[2]) pos_thresh = np.percentile(raw_scores.values(), top) neg_thresh = np.percentile(raw_scores.values(), bottom) polarities = {} label_func = lambda s : 1 if s > pos_thresh else -1 if s < neg_thresh else 0 for word, score in raw_scores.iteritems(): polarities[word] = label_func(score) util.write_json(polarities, constants.PROCESSED_LEXICONS + "concreteness.json")
def make_inquirer_lexicon(): polarities = {} for line in util.lines(constants.LEXICONS + 'inquirerbasic.csv'): for l in line.strip().split('\r'): split = l.split(",") w = split[0].lower() if "#" in w: if w.split("#")[1] != "1": continue w = w.split("#")[0] polarity_neg = split[-1] polarity_pos = split[-2] if polarity_neg == 'Negativ' and polarity_pos == 'Positiv': continue elif polarity_neg == 'Negativ': polarities[w] = -1 elif polarity_pos == 'Positiv': polarities[w] = 1 else: polarities[w] = 0 util.write_json(polarities, constants.PROCESSED_LEXICONS + 'inquirer.json')