for row in x_tester:
    for var in row:
      if var not in new_x_tester:
        new_x_tester[var] = []
      new_x_tester[var].append(row[var])

  #print(new_x_tester)
  t = clf.classifier.predict_proba(new_x_tester)
  results_y = []
  predicted_y = []
  for t, i in enumerate(t):
    print(y_tester[t], i[1]*100)
    results_y.append(y_tester[t])
    predicted_y.append(i[1]*100)
  mean_abs_err, mean_squ_err, r2score, kl_divergence = regressionMetrics(results_y, predicted_y)
  printer.regressionEvaluation(mean_abs_err, mean_squ_err, r2score, kl_divergence, "Regression Evaluation")

  # coefs = clf.classifier.named_steps['classifier'].coef_
  # feature_names = clf.classifier.named_steps['feats'].get_params()['transformer_list'][0][1].named_steps['feature'].get_feature_names()

  # coef_dict = {}
  
  # for coef, feat in zip(coefs,feature_names):
  #   coef_dict[feat] = coef
  #print(coef_dict)
  

  # print(coef_dict)
  
  # for word in coef_dict:
  #   print(len(coef_dict[word]))
Exemple #2
0
class SVM:
    X_train = []
    Y_train = []
    X_development = []
    Y_development = []
    X_test = []

    Y_predicted = []

    labels = []

    features = []

    def __init__(self, data, predict_method, show_fitting):

        self.X_train = data.X_train
        self.Y_train = data.Y_train

        self.X_development = data.X_development
        self.Y_development = data.Y_development
        self.X_test = data.X_test

        self.labels = data.labels

        self.predict_method = predict_method
        self.show_fitting = show_fitting

    def classify(self, features, classifier=None):

        feature_union = ('feats', FeatureUnion(features))

        if classifier == None:
            classifier = SGDClassifier(loss='hinge',
                                       random_state=42,
                                       max_iter=50,
                                       tol=None)

        self.classifier = Pipeline([feature_union, ('classifier', classifier)])
        print(self.classifier)

        self.printer = Printer('Model Fitting', self.show_fitting)
        self.classifier.fit(self.X_train, self.Y_train)
        self.printer.duration()

    def evaluate(self):
        if self.X_development:
            self.Y_development_predicted = self.classifier.predict(
                self.X_development)
            print(self.X_development)
            #print(self.classifier.predict_proba(self.X_development))
            #print(self.Y_development[:20], self.Y_development_predicted[:20])
        if self.X_test:
            self.Y_test_predicted = self.classifier.predict(self.X_test)

        if self.predict_method == 'classification':
            self.accuracy, self.precision, self.recall, self.f1score = classificationMetrics(
                self.Y_development, self.Y_development_predicted, self.labels)

        elif self.predict_method == 'regression':
            # self.Y_development_predicted = self.classifier.score(self.X_development, self.Y_development)
            # print(self.Y_development_predicted)
            self.mean_abs_err, self.mean_squ_err, self.r2score, self.kl_divergence = regressionMetrics(
                self.Y_development, self.Y_development_predicted, self.labels)

    def printBasicEvaluation(self):
        if self.predict_method == 'classification':
            self.printer.evaluation(self.accuracy, self.precision, self.recall,
                                    self.f1score, "Classification Evaluation")
        elif self.predict_method == 'regression':
            self.printer.regressionEvaluation(self.mean_abs_err,
                                              self.mean_squ_err, self.r2score,
                                              self.kl_divergence,
                                              "Regression Evaluation")

    def printClassEvaluation(self):
        self.printer.classEvaluation(self.Y_development,
                                     self.Y_development_predicted, self.labels)