Example #1
0
    def load_data(self):
        X, Y = [], []
        for file in os.listdir(self.path):
            if file == 'truth.txt' or file == '.DS_Store':
                continue
            print "loading file -->" + file
            tree = ET.parse(os.path.join(self.path, file))
            root = tree.getroot()
            document = Document(language=root.get('lang'), name=root.get('id'))
            for d in root.findall('document'):
                document.add_document(d.text)
            user, gender, age_group, extroverted, stable, agreeable, conscientious, open = self.truth[
                root.get('id')].split(":::")

            traits = PersonalityTraits(extroverted=float(extroverted), stable=float(stable), agreeable=float(agreeable),
                                       conscientious=float(conscientious), open=float(open))
            usr = Author(gender=gender, age_group=age_group, traits=traits)
            document.author = usr
            X.append(document)
            Y.append(self.truth[root.get('id')])
        print "done loading files"

        self.X = X
        self.Y = Y
        return self