def test_thresholded_scorers_multilabel_indicator_data(): """Test that the scorer work with multilabel-indicator format for multilabel and multi-output multi-class classifier """ X, y = make_multilabel_classification(return_indicator=True, allow_unlabeled=False, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) # Multi-output multi-class predict_proba clf = DecisionTreeClassifier() clf.fit(X_train, y_train) y_proba = clf.predict_proba(X_test) score1 = SCORERS['roc_auc'](clf, X_test, y_test) score2 = roc_auc_score(y_test, np.vstack(p[:, -1] for p in y_proba).T) assert_almost_equal(score1, score2) # Multi-output multi-class decision_function # TODO Is there any yet? clf = DecisionTreeClassifier() clf.fit(X_train, y_train) clf._predict_proba = clf.predict_proba clf.predict_proba = None clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)] y_proba = clf.decision_function(X_test) score1 = SCORERS['roc_auc'](clf, X_test, y_test) score2 = roc_auc_score(y_test, np.vstack(p for p in y_proba).T) assert_almost_equal(score1, score2) # Multilabel predict_proba clf = OneVsRestClassifier(DecisionTreeClassifier()) clf.fit(X_train, y_train) score1 = SCORERS['roc_auc'](clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.predict_proba(X_test)) assert_almost_equal(score1, score2) # Multilabel decision function clf = OneVsRestClassifier(LinearSVC(random_state=0)) clf.fit(X_train, y_train) score1 = SCORERS['roc_auc'](clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.decision_function(X_test)) assert_almost_equal(score1, score2)
def test_thresholded_scorers_multilabel_indicator_data(): # Test that the scorer work with multilabel-indicator format # for multilabel and multi-output multi-class classifier X, y = make_multilabel_classification(allow_unlabeled=False, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) # Multi-output multi-class predict_proba clf = DecisionTreeClassifier() clf.fit(X_train, y_train) y_proba = clf.predict_proba(X_test) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, np.vstack([p[:, -1] for p in y_proba]).T) assert_almost_equal(score1, score2) # Multi-output multi-class decision_function # TODO Is there any yet? clf = DecisionTreeClassifier() clf.fit(X_train, y_train) clf._predict_proba = clf.predict_proba clf.predict_proba = None clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)] y_proba = clf.decision_function(X_test) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, np.vstack([p for p in y_proba]).T) assert_almost_equal(score1, score2) # Multilabel predict_proba clf = OneVsRestClassifier(DecisionTreeClassifier()) clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.predict_proba(X_test)) assert_almost_equal(score1, score2) # Multilabel decision function clf = OneVsRestClassifier(LinearSVC(random_state=0)) clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.decision_function(X_test)) assert_almost_equal(score1, score2)
from sklearn.ensemble import IsolationForest # In[101]: model = IsolationForest(n_estimators=50, max_samples='auto', contamination=float(0.1), max_features=1.0) # In[102]: model.fit(df) # In[103]: df['scores'] = model.decision_function(df) df['anomaly'] = model.predict(df) df.head(20) # In[104]: anomaly = df.loc[df['anomaly'] == -1] anomaly_index = list(anomaly.index) print(anomaly) # # support vector machine model # In[105]: model = SVC()
def do_cv(X, y, X_test, y_test, f_sel, modality,n_feats,split): max_n_feats = n_feats feat_iters = [] X2 = pd.DataFrame(X, copy=True) # copies the original feature dataframe y2 = pd.DataFrame(y, copy=True) # copies the original feature dataframe feat_selected = select_features(X, y, modality, f_sel, max_n_feats) test_acc = [] train_acc = [] test_auc = [] train_auc = [] num_feats = [] res = [] bff = [] best_clfs = [] best_feats = [] best_params = [] if modality == 'CNV': hmap_data = np.array(['min_samples_split','min_samples_leaf','max_features','num_feats','score']) else: hmap_data = np.array(['kernel', 'C', 'num_feats', 'score']) for i in range(2, max_n_feats, split): print(i) num_feats.append(i) X3 = pd.DataFrame(X2, copy=True) # copies the original feature dataframe y3 = pd.DataFrame(y2, copy=True) # copies the original feature dataframe if modality == 'CNV': # # clf = DecisionTreeClassifier(min_samples_split=2,min_samples_leaf=5) # clf = DecisionTreeClassifier(min_samples_split=10,min_samples_leaf=1) # clf.fit(X3[feat_selected[0:i]],y3.values.ravel()) # sc = clf.score(X_test[feat_selected[0:i]],y_test) # print(sc) # # fsc = f1_score(y_test, clf.predict(X_test[feat_selected[0:i]])) # # print(fsc) # probas_ = clf.predict_proba(X_test[feat_selected[0:i]]) # c1, c2, thresholds = roc_curve(y_test, probas_[:, 1]) # area = auc(c1, c2) # print(area) # res.append(sc) # bff.append(area) # # print(clf.predict(X_test[feat_selected[0:i]])) # # print(y_test.values.ravel()) clf, fea_ = tr(X3, y3, modality, f_sel, 'none', feat_selected[0:i]) res.append(clf.best_score_) best_feats.append(fea_) best_params.append([clf.best_params_, i]) for param, score in zip(clf.cv_results_['params'], clf.cv_results_['mean_test_score']): hmap_data = np.vstack((hmap_data, np.array([param['min_samples_split'], param['min_samples_leaf'],param['max_features'], i, score]))) # do this is picking clf from f1 score or cohens kappa. makes it give an accuracy output X5 = X2[fea_] clf = DecisionTreeClassifier(min_samples_split=clf.best_params_['min_samples_split'], min_samples_leaf=clf.best_params_['min_samples_leaf'], max_features=clf.best_params_['max_features'],class_weight='balanced') # TRY WITH WITHOUT CLASS WEIGHT = BALANCED!!!!! ALSO WOULD NEED TO CHANGE IT IN CV_2 clf.fit(X5, y2.values.ravel()) best_clfs.append(clf) # this is list from which final clf is selected # stuff for test X4 = X2[fea_] X_test2 = X_test[fea_] train_acc.append(clf.score(X4, y2)) test_acc.append(clf.score(X_test2, y_test)) probas_ = clf.predict_proba(X4) c1, c2, thresholds = roc_curve(y2.values.ravel(), probas_[:, 1]) train_auc.append(auc(c1, c2)) probas_ = clf.predict_proba(X_test2) c1, c2, _ = roc_curve(y_test.values.ravel(), probas_[:, 1]) test_auc.append(auc(c1, c2)) else: clf, fea_ = tr(X3, y3, modality, f_sel, 'none', feat_selected[0:i]) res.append(clf.best_score_) best_feats.append(fea_) best_params.append([clf.best_params_,i]) for param, score in zip(clf.cv_results_['params'], clf.cv_results_['mean_test_score']): hmap_data = np.vstack((hmap_data,np.array([param['kernel'],param['C'],i,score]))) # do this is picking clf from f1 score or cohens kappa. makes it give an accuracy output X5 = X2[fea_] clf = svm.SVC(gamma='auto',class_weight='balanced',C=clf.best_params_['C'],kernel=clf.best_params_['kernel'],probability=True) # clf = svm.SVC(gamma='auto',C=clf.best_params_['C'],kernel=clf.best_params_['kernel']) clf.fit(X5, y2.values.ravel()) best_clfs.append(clf) #this is list from which final clf is selected # stuff for test X4 = X2[fea_] X_test2 = X_test[fea_] train_acc.append(clf.score(X4, y2)) test_acc.append(clf.score(X_test2, y_test)) c1, c2, _ = roc_curve(y2.values.ravel(), clf.decision_function(X4).ravel()) train_auc.append(auc(c1, c2)) c1, c2, _ = roc_curve(y_test.values.ravel(), clf.decision_function(X_test2).ravel()) test_auc.append(auc(c1, c2)) # print(max(res)) # print(max(bff)) print(res) print(best_feats) ndx = np.argmax(res) print(hmap_data) print('acc',test_acc[ndx]) print('auc',test_auc[ndx]) print(best_params[ndx]) print('max',res[ndx]) fig = plt.figure() ax1 = fig.add_axes([0.1, 0.6, 0.85, .2], ylim=(0, 1)) ax2 = fig.add_axes([0.1, 0.1, 0.85, .2], ylim=(0, 1)) # ax1.tick_params(labelsize=10) # ax1.tick_params(labelsize=10) ax1.plot(num_feats, train_acc, 'r',label='train') ax1.plot(num_feats, test_acc, 'b',label='test') ax1.set_title('CNV Accuracy', fontsize=15) ax1.set_xlabel('Number of Features', fontsize=10) ax1.set_ylabel('Accuracy', fontsize=10) ax1.legend(loc='bottom left') ax2.plot(num_feats, train_auc, 'r') ax2.plot(num_feats, test_auc, 'b') ax2.set_title('CNV AUC', fontsize=15) ax2.set_xlabel('Number of Features', fontsize=10) ax2.set_ylabel('AUC', fontsize=10) plt.show() return best_clfs[ndx],best_feats[ndx],hmap_data
y : datatype A dataframe of the response. model : datatype A fitted SVC model. Returns ------- plot A matplotlib plot of "model"s decision function. """ # we need a grid to help us plot the decision function xx1, xx2 = np.meshgrid(np.linspace(X.min()[0]-1, X.max()[0]+1, 200), np.linspace(X.min()[1]-1, X.max()[1]+1, 200)) # use the model to calculate predictions across the grid Z = model.decision_function(np.c_[xx1.ravel(), xx2.ravel()]) Z = Z.reshape(xx1.shape) # make the plot plt.subplots(1,1,figsize=(8,8)) plt.scatter(X[X.columns[0]], X[X.columns[1]], s=140, ec='k', c=y[y.columns[0]], zorder=3) # plot raw data plt.pcolormesh(xx1, xx2, -Z, cmap='RdBu', zorder=0) # plot decision function plt.ylim(0, 5) plt.xlim(0, 5) plt.xlabel(X.columns[0]) plt.ylabel(X.columns[1]) #### <font color="green">Solution 3.4</font> df = pd.DataFrame({'feature_1': [3, 2, 4, 1, 2, 4, 4, 3], 'feature_2': [4, 2, 4, 4, 1, 3, 1, 1], 'response': ['red', 'red', 'red', 'red', 'blue', 'blue', 'blue', 'red']})
class Model: model = None vectorizer = None def __init__(self, model_type=None, model_params=""): if (model_type == None): self.model = None self.vectorizer = None return if (model_type == "baseline"): self.model = baseline.Baseline() elif (model_type == "svm"): self.model = eval("SVC(" + model_params + ")") #self.model = SVC(kernel="linear") elif (model_type == "knn"): self.model = eval("KNeighborsClassifier(" + model_params + ")") #self.model = KNeighborsClassifier(n_neighbors=3) elif (model_type == "naive_bayes"): self.model = MultinomialNB() elif (model_type == "decision_trees"): self.model = DecisionTreeClassifier(random_state=0) elif (model_type == "log_regression"): self.model = eval("LogisticRegression(" + model_params + ")") elif (model_type == "perceptron"): self.model = eval("Perceptron(" + model_params + ")") else: print >> sys.stderr, "Model of type " + model_type + " is not supported." self.vectorizer = DictVectorizer(sparse=True) def fit(self, X, y): X = self.vectorizer.fit_transform(X) self.model.fit(X, y) def predict(self, x): x = self.vectorizer.transform(x) return self.model.predict(x) def predict_proba(self, x): x = self.vectorizer.transform(x) return self.model.predict_proba(x) def predict_loss(self, X): if self.model.__class__.__name__ == "Perceptron": X = self.vectorizer.transform(X) return -self.model.decision_function(X) probs = self.predict_proba(X) return probs[:, 0] def score(self, X, y): X = self.vectorizer.transform(X) return self.model.score(X, y) def save(self, file_path): f = open(file_path, "w") cPickle.dump((self.model, self.vectorizer), f) f.close() def load(self, file_path): f = open(file_path, "r") (self.model, self.vectorizer) = cPickle.load(f) f.close() def print_params(self, file_path): f = open(file_path, "w") if (self.model.__class__.__name__ == "DecisionTreeClassifier"): f = tree.export_graphviz(self.model, out_file=f) f.close()
''' DECISION TREES:: nonparametric discriminartive learning method. goal is to predict a binary tree based model that predicts the traget value by learning simple decision tules from the data. Given a training data (X, y), a decision tree recursively partitions the space such that samples with same lables are grouped together. Controling parameters are max_depth. loss functions used are gini/entropy to measure impurity of datasplits. ''' from sklearn.tree import DecisionTreeClassifier clf = DecisionTreeClassifier(max_depth=5) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) if hasattr(clf, "decision_function"): Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) else: Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] ax = plt.subplot(1, nC, 2) # Put the result into a color plot Z = Z.reshape(xx.shape) ax.contourf(xx, yy, Z, cmap=cm, alpha=.8) # Plot also the training points ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright) # and testing points ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max())
else: ds.ix[i, 'Y3'] = 1 features = ds.loc[:, 'X3':'X7'] X = features.values y = ds['Y3'].values ###################(2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) ###################(3) from sklearn.ensemble import RandomForestClassifier forest = RandomForestClassifier(n_estimators=5, random_state=2) from sklearn.tree import DecisionTreeClassifier tree = DecisionTreeClassifier(max_depth=4, random_state=0) tree.fit(X_train, y_train) ### f1 score from sklearn.metrics import f1_score print("트리 모델의 f1 score: {:.2f}".format(f1_score(y_test, pred_tree))) print("랜덤포레스트 회귀 모델의 f1 score: {:.2f}".format(f1_score(y_test, pred_forest))) from sklearn.metrics import roc_auc_score rf_auc = roc_auc_score(y_test, rf.predict_proba(X_test)[:, 1]) tree_auc = roc_auc_score(y_test, tree.decision_function(X_test)) print("랜덤 포레스트의 AUC: {:.3f}".format(rf_auc)) print("SVC의 AUC: {:.3f}".format(tree_auc))
c1 = 1 y1_Best_data = np.array([]) yhat1_Best_data = np.array([]) for train_index, test_index in kf.split(data): #print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = data[train_index], data[test_index] y_train, y_test = labels[train_index], labels[test_index] #clf_Rf = RandomForestClassifier(max_depth=10, random_state=0) clf_Rf = DecisionTreeClassifier(max_depth=10) clf_Rf.fit(X_train, y_train) #print('Spars Data Accuracy for fold:',c3,clf3.score(X_test, y_test)) c1 += 1 y1_Best_data = np.concatenate([y1_Best_data, y_test]) yhat1 = clf_Rf.predict(X_test) yhat1_Best_data = np.concatenate([yhat1_Best_data, yhat1]) score = clf_Rf.decision_function(X_test) fpr, tpr, _ = metrics.roc_curve(y_test, score, pos_label=clf_Rf.classes_[1]) metrics.RocCurveDisplay(fpr=fpr, tpr=tpr).plot() plt.show() CM1 = confusion_matrix(y1_Best_data, yhat1_Best_data) CM1 = CM1.astype(float) TN1 = CM1[0][0] FN1 = CM1[1][0] TP1 = CM1[1][1] FP1 = CM1[0][1] Sen1 = np.divide(TP1, (TP1 + FN1)) print('Sen1 of Best_data with random forest is: ', Sen1) Spec1 = np.divide(TN1, (FP1 + TN1))
class Model: model = None vectorizer = None def __init__(self, model_type=None, model_params=""): if (model_type == None): self.model = None self.vectorizer = None return if (model_type == "baseline"): self.model = baseline.Baseline() elif (model_type == "svm"): self.model = eval("SVC(" + model_params + ")") #self.model = SVC(kernel="linear") elif (model_type == "knn"): self.model = eval("KNeighborsClassifier(" + model_params + ")") #self.model = KNeighborsClassifier(n_neighbors=3) elif (model_type == "naive_bayes"): self.model = MultinomialNB() elif (model_type == "decision_trees"): self.model = DecisionTreeClassifier(random_state=0) elif (model_type == "log_regression"): self.model = eval("LogisticRegression(" + model_params + ")") elif (model_type == "perceptron"): self.model = eval("Perceptron(" + model_params + ")") else: print >> sys.stderr, "Model of type " + model_type + " is not supported." self.vectorizer = DictVectorizer(sparse=True) def fit(self, X, y): X = self.vectorizer.fit_transform(X) self.model.fit(X, y) def predict(self, x): x = self.vectorizer.transform(x) return self.model.predict(x) def predict_proba(self, x): x = self.vectorizer.transform(x) return self.model.predict_proba(x) def predict_loss(self, X): if self.model.__class__.__name__ == "Perceptron": X = self.vectorizer.transform(X) return -self.model.decision_function(X) probs = self.predict_proba(X) return probs[:,0] def score(self, X, y): X = self.vectorizer.transform(X) return self.model.score(X, y) def save(self, file_path): f = open(file_path, "w") cPickle.dump((self.model, self.vectorizer), f) f.close() def load(self, file_path): f = open(file_path, "r") (self.model, self.vectorizer) = cPickle.load(f) f.close() def print_params(self, file_path): f = open(file_path, "w") if (self.model.__class__.__name__ == "DecisionTreeClassifier"): f = tree.export_graphviz(self.model, out_file=f) f.close()
np.set_printoptions(precision=2) # Plot non-normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix, without normalization') # Plot normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, title='Normalized confusion matrix') plt.show() #BASIC TEST basic_test=["This is just a long sentence, to make sure that it's not how long the sentence is that matters the most",\ 'I just love when you make me feel like shit','Life is odd','Just got back to the US !', \ "Isn'it great when your girlfriend dumps you ?", "I love my job !", 'I love my son !'] feature_basictest = [] for tweet in basic_test: feature_basictest.append(feature_extraction.getallfeatureset(tweet)) feature_basictest = np.array(feature_basictest) feature_basictestvec = vector.transform(feature_basictest) print(basic_test) print(classifier.predict(feature_basictestvec)) print(classifier.decision_function(feature_basictestvec))
def train(object_name, data_dir, output_dir, train_type, classifier_type, learned_model=None, debug=False): from sklearn import linear_model, tree from sklearn.svm import SVR from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier from sklearn.ensemble import AdaBoostRegressor if classifier_type == 'Earth': from pyearth import Earth import numpy as np have_graphviz = True try: import graphviz except: have_graphviz = False ans = None saso_data = load_data_file(object_name, data_dir) if train_type == 'gripper_status': action_str = 'gs' actions = range(CLOSE_ACTION_ID + 1) x = [] y = [] x_index = [] for action in actions: for sasor in saso_data[action]: #x_entry = sasor['touch_prev'] + sasor['init_joint_values'] x_entry = sasor['next_joint_values'] x_entry = x_entry + sasor['next_gripper'] + sasor['next_object'] x_entry.append(sasor['next_object'][0] - sasor['next_gripper'][0]) x_entry.append(sasor['next_object'][1] - sasor['next_gripper'][1]) x.append(x_entry) x_index.append(sasor['index']) if action == CLOSE_ACTION_ID: y.append(1) else: y.append(0) #gripper open if train_type == 'pick_success_probability': action_str = repr(PICK_ACTION_ID) x = [] y = [] x_index = [] for sasor in saso_data[PICK_ACTION_ID]: #x_entry = sasor['touch_prev'] + sasor['init_joint_values'] x_entry = sasor['init_joint_values'] x_entry = x_entry + sasor['init_gripper'][0:3] + sasor[ 'init_object'][0:3] x_entry.append(sasor['init_object'][0] - sasor['init_gripper'][0]) x_entry.append(sasor['init_object'][1] - sasor['init_gripper'][1]) x.append(x_entry) x_index.append(sasor['index']) if sasor['reward'] > 0: y.append(1) else: y.append(0) if train_type in ['pick_success_probability', 'gripper_status']: if learned_model is not None: logistic = learned_model else: print classifier_type if classifier_type == 'DTC': logistic = DecisionTreeClassifier(criterion='entropy') else: logistic = linear_model.LogisticRegression(max_iter=400, C=1.0) logistic.fit(x, y) joblib.dump( logistic, output_dir + '/' + classifier_type + '-' + action_str + '.pkl') ans = logistic print logistic.score(x, y) print logistic.get_params() print len(x) if classifier_type != 'DTC': print logistic.coef_ print logistic.intercept_ yaml_out = {} yaml_out['coef'] = logistic.coef_.tolist()[0] yaml_out['intercept'] = logistic.intercept_.tolist()[0] write_config_in_file( output_dir + '/' + classifier_type + '-' + action_str + ".yaml", yaml_out) else: print logistic.feature_importances_ #feature_names=['t1','t2', 'j1', 'j2'] feature_names = [ 'j1', 'j2' ] #Touch not required when object coordinates are known feature_names = feature_names + [ 'gx', 'gy', 'gz', 'gxx', 'gyy', 'gzz', 'gw' ][0:3] feature_names = feature_names + [ 'ox', 'oy', 'oz', 'oxx', 'oyy', 'ozz', 'ow' ][0:3] feature_names = feature_names + ['xrel', 'yrel'] if have_graphviz: dot_data = tree.export_graphviz(logistic, out_file=None, feature_names=feature_names, filled=True) graph = graphviz.Source(dot_data) graph.render(output_dir + '/' + classifier_type + '-' + action_str) yaml_out = {} yaml_out["max_depth"] = logistic.tree_.max_depth yaml_out["values"] = logistic.tree_.value yaml_out['n_nodes'] = logistic.tree_.node_count yaml_out['children_left'] = logistic.tree_.children_left yaml_out['children_right'] = logistic.tree_.children_right yaml_out['feature'] = logistic.tree_.feature yaml_out['threshold'] = logistic.tree_.threshold write_config_in_file( output_dir + '/' + classifier_type + '-' + action_str + ".yaml", yaml_out) if debug: for i in range(0, len(x)): y_bar = logistic.predict([x[i]]) if y_bar != y[i]: print x_index[i] print x[i] print y[i] print logistic.predict_proba([x[i]]) if classifier_type != 'DTC': print logistic.decision_function([x[i]]) prob = (np.dot(logistic.coef_[0], x[i]) + logistic.intercept_[0]) print prob prob *= -1 prob = np.exp(prob) prob += 1 prob = np.reciprocal(prob) print prob if 'next_state' in train_type: actions = range(10) # predictions can be 18, 7 for gripper pose, 7 for objct pose # 2 for joint values # 2 for touch values predictions = range(NUM_PREDICTIONS) train_type_array = train_type.split('_') for s in train_type_array: if 'action' in s: actions = s.split('-')[1:] if 'pred' in s: predictions = s.split('-')[1:] ans = {} for action_ in actions: action = int(action_) x = [] y = [] y_c = [] l_reg = [] l_reg_c = [] x_index = [] for i in range(0, NUM_PREDICTIONS): y.append([]) y_c.append([]) l_reg.append('') l_reg_c.append('') for sasor in saso_data[action]: if sasor['reward'] > -999: #discard invalid states x_entry = sasor['init_joint_values'] x_entry = x_entry + sasor['init_gripper'][0:3] + sasor[ 'init_object'][0:3] x_entry.append(sasor['init_object'][0] - sasor['init_gripper'][0]) x_entry.append(sasor['init_object'][1] - sasor['init_gripper'][1]) x.append(x_entry) x_index.append(sasor['index']) for p_ in predictions: p = int(p_) y[p].append(get_prediction_value(sasor, p)) y_default = get_default_value(sasor, p) y_c[p].append(is_correct(p, y[p][-1], y_default)) """ try: check_array(x) check_array(y[p]) except: print x[-1] print y[p][-1] print sasor['index'] assert(0==1) """ print len(x) ans[action] = {} for p_ in predictions: p = int(p_) if learned_model is not None: l_reg[p] = learned_model[action][p] else: if classifier_type == 'ridge': l_reg[p] = linear_model.Ridge(alpha=0.5, normalize=True) elif classifier_type == 'SVR': l_reg[p] = SVR(epsilon=0.2) elif classifier_type in ['DTR', 'DTRM']: l_reg[p] = DecisionTreeRegressor() elif classifier_type == 'DTC': l_reg[p] = DecisionTreeClassifier() elif classifier_type == 'Earth': l_reg[p] = Earth() elif classifier_type == 'AdaLinear': l_reg[p] = AdaBoostRegressor( linear_model.LinearRegression()) else: l_reg[p] = linear_model.LinearRegression() if classifier_type == 'DTRM': l_reg[p].fit(x, np.transpose(np.array(y))) elif classifier_type == 'DTC': l_reg[p].fit(x, y_c[p]) else: l_reg[p].fit(x, y[p]) joblib.dump( l_reg[p], output_dir + '/' + classifier_type + "-" + repr(action) + "-" + repr(p) + '.pkl') ans[action][p] = l_reg[p] if classifier_type == 'DTRM': print repr(action) + " " + repr(p) + " " + repr( l_reg[p].score(x, np.transpose(np.array(y)))) elif classifier_type == 'DTC': print repr(action) + " " + repr(p) + " " + repr( l_reg[p].score(x, y_c[p])) else: print repr(action) + " " + repr(p) + " " + repr( l_reg[p].score(x, y[p])) print l_reg[p].get_params() if classifier_type not in [ 'SVR', 'DTR', 'DTRM', 'AdaLinear', 'DTC' ]: print l_reg[p].coef_ if classifier_type not in [ 'DTR', 'DTRM', 'AdaLinear', 'DTC', 'Earth' ]: print l_reg[p].intercept_ if classifier_type in ['Earth']: for j in range(0, len(x)): predict_earth(l_reg[p], x[j]) print l_reg[p].summary() if learned_model is None: if classifier_type in ['DTR', 'DTRM', 'AdaLinear', 'DTC']: print l_reg[p].feature_importances_ feature_names = ['j1', 'j2'] feature_names = feature_names + [ 'gx', 'gy', 'gz', 'gxx', 'gyy', 'gzz', 'gw' ][0:3] feature_names = feature_names + [ 'ox', 'oy', 'oz', 'oxx', 'oyy', 'ozz', 'ow' ][0:3] feature_names = feature_names + ['xrel', 'yrel'] if have_graphviz: dot_data = tree.export_graphviz( l_reg[p], out_file=None, feature_names=feature_names, filled=True) graph = graphviz.Source(dot_data) graph.render(output_dir + '/' + classifier_type + "-" + repr(action) + "-" + repr(p)) yaml_out = {} yaml_out['max_depth'] = l_reg[p].tree_.max_depth yaml_out["values"] = l_reg[p].tree_.value.tolist() yaml_out['n_nodes'] = l_reg[p].tree_.node_count yaml_out['children_left'] = l_reg[ p].tree_.children_left.tolist() yaml_out['children_right'] = l_reg[ p].tree_.children_right.tolist() yaml_out['feature'] = l_reg[p].tree_.feature.tolist() yaml_out['threshold'] = l_reg[ p].tree_.threshold.tolist() write_config_in_file( output_dir + '/' + classifier_type + "-" + repr(action) + "-" + repr(p) + ".yaml", yaml_out) if classifier_type in ['Earth']: yaml_out = get_yaml_earth(l_reg[p]) write_config_in_file( output_dir + '/' + classifier_type + "-" + repr(action) + "-" + repr(p) + ".yaml", yaml_out) if classifier_type == 'DTRM': i = 0 y_bar = l_reg[p].predict([x[i]]) print x_index[i] print x[i] y_t = np.transpose(np.array(y)) print repr(y_t[i]) + ' Prediction ' + repr(y_bar) break if debug: for i in range(0, len(x)): y_bar = l_reg[p].predict([x[i]]) if classifier_type == 'DTC': if y_bar != y_c[p][i]: print x_index[i] print x[i] print y_c[p][i] print y[p][i] print l_reg[p].predict_proba([x[i]]) else: if is_correct(p, y_bar, y[p][i]) == 0: print x_index[i] print x[i] print repr( y[p][i]) + ' Prediction ' + repr(y_bar) return ans