def test_model(self, test_data, empty_solution, evaluate = False): model_weka = None if os.path.isfile(self.prediction_file): print 'Model ' + self.name + ' already tested.' elif not os.path.isfile(self.model_file): print 'Impossible testing this model. It should be trained first.' return else: print 'Starting to test_model model ' + self.name + '.' model_weka = Classifier(jobject = serialization.read(self.model_file)) evaluation = Evaluation(data = test_data) evaluation.test_model(classifier = model_weka, data = test_data) predictions = evaluation.predictions() rows = read_sheet(file_name = empty_solution) solutions = [] for row in rows: solution = [row['userid'], row['tweetid'], predictions.pop(0).predicted()] solutions.append(solution) write_the_solution_file(solutions, self.prediction_file) print 'Model ' + self.name + ' tested.' if evaluate == True: if os.path.isfile(self.evaluation_file): print 'Model ' + self.name + ' already evaluated.' return elif model_weka == None: model_weka = Classifier(jobject = serialization.read(self.model_file)) evaluation = Evaluation(data = test_data) evaluation.test_model(classifier = model_weka, data = test_data) save_file(file_name = self.evaluation_file, content = evaluation.to_summary()) print 'Model ' + self.name + ' evaluated.'
def train_model(self, training_data): model_weka = None if os.path.isfile(self.model_file): print 'Model ' + self.name + ' already trained.' else: print 'Starting to train_model model ' + self.name + '.' model_weka = Classifier(classname = self.classname, options = self.options) model_weka.build_classifier(data = training_data) serialization.write(filename = self.model_file, jobject = model_weka) print 'Model ' + self.name + ' trained and saved.' if os.path.isfile(self.parameter_file): print 'Parameters of the model ' + self.name + ' already saved.' else: if model_weka == None: model_weka = Classifier(jobject = serialization.read(self.model_file)) save_file(file_name = self.parameter_file, content = str(model_weka)) print 'Parameters of the model ' + self.name + ' saved.'