def preprocessing(StemmedDict,fileName): v = set() f = open(fileName,'r') for line in f: line = line.strip() #print line lineCleaned = Cleaner.getProcessedData(line,1) #print lineCleaned Id = lineCleaned.split('\x01')[0] lineStem = StemmerClass.Stemmer() if not Id in v: v.add(Id) StemmedDict[Id] = lineStem.getStemmedCorpus(lineCleaned)
def createFeature(fileName, docRumourScore,docFactScore,classLabel): f = open(fileName,'r') rmax = max(docRumourScore.itervalues()) rmin = min(docRumourScore.itervalues()) lmax = max(docFactScore.itervalues()) lmin = min(docFactScore.itervalues()) for line in f: line = line.strip() #print line lineCleaned = Cleaner.getProcessedData(line,1) data = lineCleaned.split('\x01') id = data[0] rumorScore = docRumourScore[id] factScore = docFactScore[id] liscence = 0 if data[3] == 'false' else 1 defination = 0 if data[4] == 'sd' else 1 views = float(data[5]) print id+','+str((rumorScore-rmin)/rmax)+','+str((factScore-lmin)/lmax)+','+str(liscence)+','+str(defination)+','+str(float(data[6])/views)+','+str(float(data[7])/views)+','+str(float(data[8])/views)+','+str(float(data[9])/views)+","+str(classLabel)