for row in x_tester: for var in row: if var not in new_x_tester: new_x_tester[var] = [] new_x_tester[var].append(row[var]) #print(new_x_tester) t = clf.classifier.predict_proba(new_x_tester) results_y = [] predicted_y = [] for t, i in enumerate(t): print(y_tester[t], i[1]*100) results_y.append(y_tester[t]) predicted_y.append(i[1]*100) mean_abs_err, mean_squ_err, r2score, kl_divergence = regressionMetrics(results_y, predicted_y) printer.regressionEvaluation(mean_abs_err, mean_squ_err, r2score, kl_divergence, "Regression Evaluation") # coefs = clf.classifier.named_steps['classifier'].coef_ # feature_names = clf.classifier.named_steps['feats'].get_params()['transformer_list'][0][1].named_steps['feature'].get_feature_names() # coef_dict = {} # for coef, feat in zip(coefs,feature_names): # coef_dict[feat] = coef #print(coef_dict) # print(coef_dict) # for word in coef_dict: # print(len(coef_dict[word]))
class SVM: X_train = [] Y_train = [] X_development = [] Y_development = [] X_test = [] Y_predicted = [] labels = [] features = [] def __init__(self, data, predict_method, show_fitting): self.X_train = data.X_train self.Y_train = data.Y_train self.X_development = data.X_development self.Y_development = data.Y_development self.X_test = data.X_test self.labels = data.labels self.predict_method = predict_method self.show_fitting = show_fitting def classify(self, features, classifier=None): feature_union = ('feats', FeatureUnion(features)) if classifier == None: classifier = SGDClassifier(loss='hinge', random_state=42, max_iter=50, tol=None) self.classifier = Pipeline([feature_union, ('classifier', classifier)]) print(self.classifier) self.printer = Printer('Model Fitting', self.show_fitting) self.classifier.fit(self.X_train, self.Y_train) self.printer.duration() def evaluate(self): if self.X_development: self.Y_development_predicted = self.classifier.predict( self.X_development) print(self.X_development) #print(self.classifier.predict_proba(self.X_development)) #print(self.Y_development[:20], self.Y_development_predicted[:20]) if self.X_test: self.Y_test_predicted = self.classifier.predict(self.X_test) if self.predict_method == 'classification': self.accuracy, self.precision, self.recall, self.f1score = classificationMetrics( self.Y_development, self.Y_development_predicted, self.labels) elif self.predict_method == 'regression': # self.Y_development_predicted = self.classifier.score(self.X_development, self.Y_development) # print(self.Y_development_predicted) self.mean_abs_err, self.mean_squ_err, self.r2score, self.kl_divergence = regressionMetrics( self.Y_development, self.Y_development_predicted, self.labels) def printBasicEvaluation(self): if self.predict_method == 'classification': self.printer.evaluation(self.accuracy, self.precision, self.recall, self.f1score, "Classification Evaluation") elif self.predict_method == 'regression': self.printer.regressionEvaluation(self.mean_abs_err, self.mean_squ_err, self.r2score, self.kl_divergence, "Regression Evaluation") def printClassEvaluation(self): self.printer.classEvaluation(self.Y_development, self.Y_development_predicted, self.labels)