Example #1
0
    def trainClassifiers(self, xml_file, type):
        self.prepareModels(xml_file, type)
        comentarios = self.procesar(xml_file, type)

        if type == 1:
            data = load_data_from_disk(tfidfModel)
            data_expanded = []
            for i in data:
                vec = expand(i)
                data_expanded.append(vec)
            labels = []
            for i in comentarios:
                labels.append(i[1])
            fileClassifiers = [SVM, NB, ME, DT]

            for i in range(4):
                classifier = SC(data_expanded, labels, i + 1)
                fClass = classifier.train()
                write_data_to_disk(fileClassifiers[i], fClass)
        else:
            data = load_data_from_disk(tfidfModelp)
            data_expanded = []
            for i in data:
                vec = expand(i)
                data_expanded.append(vec)
            labels = []
            for i in comentarios:
                labels.append(i[1])
            fileClassifiers = [SVMp, NBp, MEp, DTp]

            for i in range(4):
                classifier = SC(data_expanded, labels, i + 1)
                fClass = classifier.train()
                write_data_to_disk(fileClassifiers[i], fClass)
Example #2
0
    def test(self, comment, type, corpus):
        vectorizer = []
        transformer = []
        if corpus == 1:
            vectorizer = load_data_from_disk(simpleVectorizer)
            transformer = load_data_from_disk(tfidfVectorizer)
        else:
            vectorizer = load_data_from_disk(simpleVectorizerp)
            transformer = load_data_from_disk(tfidfVectorizerp)

        model = VM()
        model.set_models(vectorizer, transformer)
        comentario = comment[0]
        # seg = Segmentation(comentario)
        seg = Segmentation2()
        # segmentos = seg.find_sentences()
        segmentos = seg.segment_text(comentario)
        entities = comment[1].items()

        classSVM = ""
        classNB = ""
        classME = ""
        classDT = ""
        if corpus == 1:
            classSVM = SVM
            classNB = NB
            classME = ME
            classDT = DT
        else:
            classSVM = SVMp
            classNB = NBp
            classME = MEp
            classDT = DTp

        if type == 1:
            return self.__testClassifier(segmentos, entities, model, classSVM)
        elif type == 2:
            return self.__testClassifier(segmentos, entities, model, classNB)
        elif type == 3:
            return self.__testClassifier(segmentos, entities, model, classME)
        elif type == 4:
            return self.__testClassifier(segmentos, entities, model, classDT)
        elif type == 5:
            return self.__testUnsup(segmentos, entities)
Example #3
0
    def __testClassifier(self, segmentos, entities, model, fileClass):
        results = []
        for j in segmentos:
            proc = TextCleaner(j)
            procesado = proc.get_processed_comment()

            vector = model.get_comment_tf_idf_vector([procesado])
            supClass = load_data_from_disk(fileClass)
            classifier = SC()
            classifier.set_classifier(supClass)
            result = classifier.classify(vector)

            polaridadSup = result[0][0]
            for i in entities:
                if j.find(i[0]) != -1:
                    value = (i[0], polaridadSup)
                    results.append(value)
        return results