def load_data(self): X, Y = [], [] for file in os.listdir(self.path): if file == 'truth.txt' or file == '.DS_Store': continue print "loading file -->" + file tree = ET.parse(os.path.join(self.path, file)) root = tree.getroot() document = Document(language=root.get('lang'), name=root.get('id')) for d in root.findall('document'): document.add_document(d.text) user, gender, age_group, extroverted, stable, agreeable, conscientious, open = self.truth[ root.get('id')].split(":::") traits = PersonalityTraits(extroverted=float(extroverted), stable=float(stable), agreeable=float(agreeable), conscientious=float(conscientious), open=float(open)) usr = Author(gender=gender, age_group=age_group, traits=traits) document.author = usr X.append(document) Y.append(self.truth[root.get('id')]) print "done loading files" self.X = X self.Y = Y return self
def run(self): result = {} x, y, y_actual = [], [], [] for file in os.listdir(self.path): if file == 'truth.txt' or file == '.DS_Store': continue tree = ET.parse(os.path.join(self.path, file)) root = tree.getroot() document = Document(language=root.get('lang'), name=root.get('id')) for d in root.findall('document'): document.add_document(d.text) x_test = [document] # vector temp_result = {} for predictor in self.model: # print predictor if predictor.name == 'age_gender': prediction = predictor.clf.predict(x_test) # predict temp_result.update( predictor.label_extractor(list(predictor.label_encoder.inverse_transform(prediction))[0])) document.author.gender = temp_result['gender'] document.author.age_group = temp_result['age_group'] if predictor.name == 'personality': target = predictor.label_encoder.classes_ prediction = list(predictor.clf.predict_proba(x_test))[0] prediction = [change_range(p, 1.0, 0.0, 0.5, -0.5) for p in prediction] temp_result.update(predictor.label_extractor(target, prediction)) document.author.personality_traits.extroverted = temp_result['extroverted'] document.author.personality_traits.agreeable = temp_result['agreeable'] document.author.personality_traits.conscientious = temp_result['conscientious'] document.author.personality_traits.stable = temp_result['stable'] document.author.personality_traits.open = temp_result['open'] result[os.path.splitext(file)[0]] = document # y.extend(prediction) # print y x.append(os.path.splitext(file)[0]) # y_actual.append(predictor.label_extractor(self.truth[root.get('id')])) self.x_test = x_test # self.y_prediction = y # self.y_actual = self.label_encoder.transform(y_actual) self.result = result