def contrastingFeatures(words): affectscores = [] sentiscores = [] bigrams = [] trigrams = [] poscount = 0 possum = 0 negcount = 0 negsum = 0 c = 0 new_words = [] for w in words.split(" "): affectscores.append(getAffect(w)) sentiscores.append(getSentiStrength(w)) new_words += (words.split(" ")) l = len(new_words) while c <= l - 2: bigrams.append(new_words[c] + new_words[c + 1]) c = c + 1 c = 0 while c <= l - 3: trigrams.append(new_words[c] + new_words[c + 1] + new_words[c + 2]) c = c + 1 for bi in bigrams: if bi in bidict: if bidict[bi] > 0: possum += float(bidict[bi]) poscount = poscount + 1 # print "here1" else: negsum += float(bidict[bi]) negcount = negcount + 1 for tri in trigrams: if tri in tridict: if tridict[tri] > 0: possum += float(tridict[tri]) poscount = poscount + 1 # print "here2" else: negsum += float(tridict[tri]) negcount = negcount + 1 delta_affect = (max(affectscores) - min(affectscores)) delta_sentiment = (max(sentiscores) - min(sentiscores)) output = [ delta_affect, delta_sentiment, poscount, possum, negcount, negsum ] return output
def bigram_model(sentences): model = {} bigrams = [] for sent in sentences: for w1, w2 in ngrams(sent.split(), 2, pad_left=True, pad_right=True): bigrams.append((w1, w2)) if w1 not in model: model[w1] = {} if w2 not in model[w1]: model[w1][w2] = 0 model[w1][w2] += 1 for w1 in model: tot_count = float(sum(model[w1].values())) for w2 in model[w1]: model[w1][w2] /= tot_count return model, dict(Counter(bigrams))
def gen_bigrams(tokens): """I created my own bigram generator function because I forgot I could use nltk....""" test_tokens = tokenize_corpus(corpus_text) bigrams = [] gram_1 = 0 gram_2 = 1 #loop appends bigram tuple to a list for i in range(len(tokens)): if gram_1 == len(tokens) - 1: break else: bigrams.append((tokens[gram_1], tokens[gram_2])) gram_1 += 1 gram_2 += 1 #Returns a list of tuples, the tuples contains the bigrams return bigrams
def get_bigrams(self): bigrams = [] for bigram_tuple in self.get_bigrams_tuple(): bigram = " ".join(bigram_tuple) bigrams.append(bigram) return bigrams
for i in temp: # Removing unnecessary items! i = i.replace(" ", "") i = i.replace(",", " ") i = i.replace("'", "") i = i.replace("(", "") i = i.replace(")", "") bg = i.split() i = i.replace(" ", "_") if i not in bigrams: # Append Bigrams bigrams.append(str(i)) # Fill Bigrams as attributes in tsv file outputfile.write(str(i)) outputfile.write("\t") # Adding attributes in arff file outputfile1.write("@attribute " + str(i) + " Numeric \n") outputfile1.write("@attribute classlabel {yes,no,CANNOT_DECIDE} \n") outputfile.write("\n") outputfile1.write("\n") # Writing @data line in .arff file! outputfile1.write("@data\n")