def trainClassifiers(self, xml_file, type): self.prepareModels(xml_file, type) comentarios = self.procesar(xml_file, type) if type == 1: data = load_data_from_disk(tfidfModel) data_expanded = [] for i in data: vec = expand(i) data_expanded.append(vec) labels = [] for i in comentarios: labels.append(i[1]) fileClassifiers = [SVM, NB, ME, DT] for i in range(4): classifier = SC(data_expanded, labels, i + 1) fClass = classifier.train() write_data_to_disk(fileClassifiers[i], fClass) else: data = load_data_from_disk(tfidfModelp) data_expanded = [] for i in data: vec = expand(i) data_expanded.append(vec) labels = [] for i in comentarios: labels.append(i[1]) fileClassifiers = [SVMp, NBp, MEp, DTp] for i in range(4): classifier = SC(data_expanded, labels, i + 1) fClass = classifier.train() write_data_to_disk(fileClassifiers[i], fClass)
def __testClassifier(self, segmentos, entities, model, fileClass): results = [] for j in segmentos: proc = TextCleaner(j) procesado = proc.get_processed_comment() vector = model.get_comment_tf_idf_vector([procesado]) supClass = load_data_from_disk(fileClass) classifier = SC() classifier.set_classifier(supClass) result = classifier.classify(vector) polaridadSup = result[0][0] for i in entities: if j.find(i[0]) != -1: value = (i[0], polaridadSup) results.append(value) return results