def load_data(self): X, Y = [], [] for file in os.listdir(self.path): if file == 'truth.txt' or file == '.DS_Store': continue print "loading file -->" + file tree = ET.parse(os.path.join(self.path, file)) root = tree.getroot() document = Document(language=root.get('lang'), name=root.get('id')) for d in root.findall('document'): document.add_document(d.text) user, gender, age_group, extroverted, stable, agreeable, conscientious, open = self.truth[ root.get('id')].split(":::") traits = PersonalityTraits(extroverted=float(extroverted), stable=float(stable), agreeable=float(agreeable), conscientious=float(conscientious), open=float(open)) usr = Author(gender=gender, age_group=age_group, traits=traits) document.author = usr X.append(document) Y.append(self.truth[root.get('id')]) print "done loading files" self.X = X self.Y = Y return self