def trainClassifiers(self, xml_file, type): self.prepareModels(xml_file, type) comentarios = self.procesar(xml_file, type) if type == 1: data = load_data_from_disk(tfidfModel) data_expanded = [] for i in data: vec = expand(i) data_expanded.append(vec) labels = [] for i in comentarios: labels.append(i[1]) fileClassifiers = [SVM, NB, ME, DT] for i in range(4): classifier = SC(data_expanded, labels, i + 1) fClass = classifier.train() write_data_to_disk(fileClassifiers[i], fClass) else: data = load_data_from_disk(tfidfModelp) data_expanded = [] for i in data: vec = expand(i) data_expanded.append(vec) labels = [] for i in comentarios: labels.append(i[1]) fileClassifiers = [SVMp, NBp, MEp, DTp] for i in range(4): classifier = SC(data_expanded, labels, i + 1) fClass = classifier.train() write_data_to_disk(fileClassifiers[i], fClass)
def test(self, comment, type, corpus): vectorizer = [] transformer = [] if corpus == 1: vectorizer = load_data_from_disk(simpleVectorizer) transformer = load_data_from_disk(tfidfVectorizer) else: vectorizer = load_data_from_disk(simpleVectorizerp) transformer = load_data_from_disk(tfidfVectorizerp) model = VM() model.set_models(vectorizer, transformer) comentario = comment[0] # seg = Segmentation(comentario) seg = Segmentation2() # segmentos = seg.find_sentences() segmentos = seg.segment_text(comentario) entities = comment[1].items() classSVM = "" classNB = "" classME = "" classDT = "" if corpus == 1: classSVM = SVM classNB = NB classME = ME classDT = DT else: classSVM = SVMp classNB = NBp classME = MEp classDT = DTp if type == 1: return self.__testClassifier(segmentos, entities, model, classSVM) elif type == 2: return self.__testClassifier(segmentos, entities, model, classNB) elif type == 3: return self.__testClassifier(segmentos, entities, model, classME) elif type == 4: return self.__testClassifier(segmentos, entities, model, classDT) elif type == 5: return self.__testUnsup(segmentos, entities)
def __testClassifier(self, segmentos, entities, model, fileClass): results = [] for j in segmentos: proc = TextCleaner(j) procesado = proc.get_processed_comment() vector = model.get_comment_tf_idf_vector([procesado]) supClass = load_data_from_disk(fileClass) classifier = SC() classifier.set_classifier(supClass) result = classifier.classify(vector) polaridadSup = result[0][0] for i in entities: if j.find(i[0]) != -1: value = (i[0], polaridadSup) results.append(value) return results