def getDW(self,
           listOfDicts):  #using difficult words in answerBody as a feature
     listOfDW = []
     ts = readability.TextStatistics()
     for row in listOfDicts:
         listOfDW.append(ts.difficult_words(row['answerBody']))
     return self.assignValue(listOfDW)
 def getRead(
         self,
         listOfDicts):  #using readabilty index of answerBody as a feature
     listOfRead = []
     ts = readability.TextStatistics()
     for row in listOfDicts:
         listOfRead.append(ts.text_standard(row['answerBody']))
     return self.assignValue(listOfRead)
import information
import textProp
import numpy as np
from sklearn.svm import SVC

features_train = []
labels_train = []

dataset.createDataset()
data = dataset.data
data = sorted(data, key=lambda k: k['qId'])

count = 0
for row in data:
    featureVector = []
    ts = readability.TextStatistics()
    tp = textProp.TextProperties()
    inf = information.Informativity()
    totalEntropy = 0
    model, stats = inf.markov_model(inf.chars(row['answerBody']), 3)
    for prefix in stats:
        totalEntropy = totalEntropy + inf.entropy(stats, stats[prefix])
    featureVector.append(abs(totalEntropy))  # information
    featureVector.append(
        tp.relevancy(row['answerBody'], row['qBody'],
                     row['qTags']))  # relevancy
    featureVector.append(tp.UniqueWords(row['answerBody']))  # unique
    featureVector.append(tp.NonstopWords(row['answerBody']))  # nonstop
    featureVector.append(tp.subjective(row['answerBody']))  # subjectivity
    featureVector.append(row['answerScore'] * 100)  # answerScore
    featureVector.append(row['answererDownvotes'])  # downvotes