Example #1
0
    def load_data(self):
        X, Y = [], []
        for file in os.listdir(self.path):
            if file == 'truth.txt' or file == '.DS_Store':
                continue
            print "loading file -->" + file
            tree = ET.parse(os.path.join(self.path, file))
            root = tree.getroot()
            document = Document(language=root.get('lang'), name=root.get('id'))
            for d in root.findall('document'):
                document.add_document(d.text)
            user, gender, age_group, extroverted, stable, agreeable, conscientious, open = self.truth[
                root.get('id')].split(":::")

            traits = PersonalityTraits(extroverted=float(extroverted), stable=float(stable), agreeable=float(agreeable),
                                       conscientious=float(conscientious), open=float(open))
            usr = Author(gender=gender, age_group=age_group, traits=traits)
            document.author = usr
            X.append(document)
            Y.append(self.truth[root.get('id')])
        print "done loading files"

        self.X = X
        self.Y = Y
        return self
Example #2
0
    def run(self):
        result = {}
        x, y, y_actual = [], [], []
        for file in os.listdir(self.path):
            if file == 'truth.txt' or file == '.DS_Store':
                continue
            tree = ET.parse(os.path.join(self.path, file))
            root = tree.getroot()
            document = Document(language=root.get('lang'), name=root.get('id'))

            for d in root.findall('document'):
                document.add_document(d.text)
            x_test = [document]  # vector

            temp_result = {}
            for predictor in self.model:
                # print predictor

                if predictor.name == 'age_gender':
                    prediction = predictor.clf.predict(x_test)  # predict
                    temp_result.update(
                        predictor.label_extractor(list(predictor.label_encoder.inverse_transform(prediction))[0]))
                    document.author.gender = temp_result['gender']
                    document.author.age_group = temp_result['age_group']
                if predictor.name == 'personality':
                    target = predictor.label_encoder.classes_
                    prediction = list(predictor.clf.predict_proba(x_test))[0]
                    prediction = [change_range(p, 1.0, 0.0, 0.5, -0.5) for p in prediction]
                    temp_result.update(predictor.label_extractor(target, prediction))



            document.author.personality_traits.extroverted = temp_result['extroverted']
            document.author.personality_traits.agreeable = temp_result['agreeable']
            document.author.personality_traits.conscientious = temp_result['conscientious']
            document.author.personality_traits.stable = temp_result['stable']
            document.author.personality_traits.open = temp_result['open']

            result[os.path.splitext(file)[0]] = document
            # y.extend(prediction)
            # print y
            x.append(os.path.splitext(file)[0])
            # y_actual.append(predictor.label_extractor(self.truth[root.get('id')]))
        self.x_test = x_test
        # self.y_prediction = y
        # self.y_actual = self.label_encoder.transform(y_actual)
        self.result = result