def test_thresholded_scorers_multilabel_indicator_data():
    """Test that the scorer work with multilabel-indicator format
    for multilabel and multi-output multi-class classifier
    """
    X, y = make_multilabel_classification(return_indicator=True,
                                          allow_unlabeled=False,
                                          random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    # Multi-output multi-class predict_proba
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    y_proba = clf.predict_proba(X_test)
    score1 = SCORERS['roc_auc'](clf, X_test, y_test)
    score2 = roc_auc_score(y_test, np.vstack(p[:, -1] for p in y_proba).T)
    assert_almost_equal(score1, score2)

    # Multi-output multi-class decision_function
    # TODO Is there any yet?
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    clf._predict_proba = clf.predict_proba
    clf.predict_proba = None
    clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)]

    y_proba = clf.decision_function(X_test)
    score1 = SCORERS['roc_auc'](clf, X_test, y_test)
    score2 = roc_auc_score(y_test, np.vstack(p for p in y_proba).T)
    assert_almost_equal(score1, score2)

    # Multilabel predict_proba
    clf = OneVsRestClassifier(DecisionTreeClassifier())
    clf.fit(X_train, y_train)
    score1 = SCORERS['roc_auc'](clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.predict_proba(X_test))
    assert_almost_equal(score1, score2)

    # Multilabel decision function
    clf = OneVsRestClassifier(LinearSVC(random_state=0))
    clf.fit(X_train, y_train)
    score1 = SCORERS['roc_auc'](clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.decision_function(X_test))
    assert_almost_equal(score1, score2)
def test_thresholded_scorers_multilabel_indicator_data():
    # Test that the scorer work with multilabel-indicator format
    # for multilabel and multi-output multi-class classifier
    X, y = make_multilabel_classification(allow_unlabeled=False,
                                          random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    # Multi-output multi-class predict_proba
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    y_proba = clf.predict_proba(X_test)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, np.vstack([p[:, -1] for p in y_proba]).T)
    assert_almost_equal(score1, score2)

    # Multi-output multi-class decision_function
    # TODO Is there any yet?
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    clf._predict_proba = clf.predict_proba
    clf.predict_proba = None
    clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)]

    y_proba = clf.decision_function(X_test)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, np.vstack([p for p in y_proba]).T)
    assert_almost_equal(score1, score2)

    # Multilabel predict_proba
    clf = OneVsRestClassifier(DecisionTreeClassifier())
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.predict_proba(X_test))
    assert_almost_equal(score1, score2)

    # Multilabel decision function
    clf = OneVsRestClassifier(LinearSVC(random_state=0))
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.decision_function(X_test))
    assert_almost_equal(score1, score2)
from sklearn.ensemble import IsolationForest

# In[101]:

model = IsolationForest(n_estimators=50,
                        max_samples='auto',
                        contamination=float(0.1),
                        max_features=1.0)

# In[102]:

model.fit(df)

# In[103]:

df['scores'] = model.decision_function(df)
df['anomaly'] = model.predict(df)
df.head(20)

# In[104]:

anomaly = df.loc[df['anomaly'] == -1]
anomaly_index = list(anomaly.index)
print(anomaly)

# # support vector machine model

# In[105]:

model = SVC()
Beispiel #4
0
def do_cv(X, y, X_test, y_test, f_sel, modality,n_feats,split):
    max_n_feats = n_feats
    feat_iters = []
    X2 = pd.DataFrame(X, copy=True)  # copies the original feature dataframe
    y2 = pd.DataFrame(y, copy=True)  # copies the original feature dataframe
    feat_selected = select_features(X, y, modality, f_sel, max_n_feats)


    test_acc = []
    train_acc = []
    test_auc = []
    train_auc = []
    num_feats = []

    res = []
    bff = []
    best_clfs = []
    best_feats = []
    best_params = []
    if modality == 'CNV':
        hmap_data = np.array(['min_samples_split','min_samples_leaf','max_features','num_feats','score'])
    else:
        hmap_data = np.array(['kernel', 'C', 'num_feats', 'score'])

    for i in range(2, max_n_feats, split):
        print(i)
        num_feats.append(i)
        X3 = pd.DataFrame(X2, copy=True)  # copies the original feature dataframe
        y3 = pd.DataFrame(y2, copy=True)  # copies the original feature dataframe

        if modality == 'CNV':
            # # clf = DecisionTreeClassifier(min_samples_split=2,min_samples_leaf=5)
            # clf = DecisionTreeClassifier(min_samples_split=10,min_samples_leaf=1)
            # clf.fit(X3[feat_selected[0:i]],y3.values.ravel())
            # sc = clf.score(X_test[feat_selected[0:i]],y_test)
            # print(sc)
            # # fsc = f1_score(y_test, clf.predict(X_test[feat_selected[0:i]]))
            # # print(fsc)
            # probas_ = clf.predict_proba(X_test[feat_selected[0:i]])
            # c1, c2, thresholds = roc_curve(y_test, probas_[:, 1])
            # area = auc(c1, c2)
            # print(area)
            # res.append(sc)
            # bff.append(area)
            # # print(clf.predict(X_test[feat_selected[0:i]]))
            # # print(y_test.values.ravel())

            clf, fea_ = tr(X3, y3, modality, f_sel, 'none', feat_selected[0:i])
            res.append(clf.best_score_)
            best_feats.append(fea_)
            best_params.append([clf.best_params_, i])
            for param, score in zip(clf.cv_results_['params'], clf.cv_results_['mean_test_score']):
                hmap_data = np.vstack((hmap_data, np.array([param['min_samples_split'],
                                                            param['min_samples_leaf'],param['max_features'],
                                                            i, score])))

            # do this is picking clf from f1 score or cohens kappa. makes it give an accuracy output
            X5 = X2[fea_]
            clf = DecisionTreeClassifier(min_samples_split=clf.best_params_['min_samples_split'],
                                         min_samples_leaf=clf.best_params_['min_samples_leaf'],
                                         max_features=clf.best_params_['max_features'],class_weight='balanced')
            # TRY WITH WITHOUT CLASS WEIGHT = BALANCED!!!!! ALSO WOULD NEED TO CHANGE IT IN CV_2
            clf.fit(X5, y2.values.ravel())
            best_clfs.append(clf)  # this is list from which final clf is selected

            # stuff for test
            X4 = X2[fea_]
            X_test2 = X_test[fea_]
            train_acc.append(clf.score(X4, y2))
            test_acc.append(clf.score(X_test2, y_test))
            probas_ = clf.predict_proba(X4)
            c1, c2, thresholds = roc_curve(y2.values.ravel(), probas_[:, 1])
            train_auc.append(auc(c1, c2))
            probas_ = clf.predict_proba(X_test2)
            c1, c2, _ = roc_curve(y_test.values.ravel(), probas_[:, 1])
            test_auc.append(auc(c1, c2))

        else:
            clf, fea_ = tr(X3, y3, modality, f_sel, 'none', feat_selected[0:i])

            res.append(clf.best_score_)
            best_feats.append(fea_)
            best_params.append([clf.best_params_,i])
            for param, score in zip(clf.cv_results_['params'], clf.cv_results_['mean_test_score']):
                hmap_data = np.vstack((hmap_data,np.array([param['kernel'],param['C'],i,score])))

            # do this is picking clf from f1 score or cohens kappa. makes it give an accuracy output
            X5 = X2[fea_]
            clf = svm.SVC(gamma='auto',class_weight='balanced',C=clf.best_params_['C'],kernel=clf.best_params_['kernel'],probability=True)
            # clf = svm.SVC(gamma='auto',C=clf.best_params_['C'],kernel=clf.best_params_['kernel'])
            clf.fit(X5, y2.values.ravel())
            best_clfs.append(clf) #this is list from which final clf is selected


            # stuff for test
            X4 = X2[fea_]
            X_test2 = X_test[fea_]
            train_acc.append(clf.score(X4, y2))
            test_acc.append(clf.score(X_test2, y_test))
            c1, c2, _ = roc_curve(y2.values.ravel(), clf.decision_function(X4).ravel())
            train_auc.append(auc(c1, c2))
            c1, c2, _ = roc_curve(y_test.values.ravel(), clf.decision_function(X_test2).ravel())
            test_auc.append(auc(c1, c2))

    # print(max(res))
    # print(max(bff))
    print(res)
    print(best_feats)
    ndx = np.argmax(res)
    print(hmap_data)
    print('acc',test_acc[ndx])
    print('auc',test_auc[ndx])
    print(best_params[ndx])
    print('max',res[ndx])


    fig = plt.figure()
    ax1 = fig.add_axes([0.1, 0.6, 0.85, .2], ylim=(0, 1))
    ax2 = fig.add_axes([0.1, 0.1, 0.85, .2], ylim=(0, 1))
    # ax1.tick_params(labelsize=10)
    # ax1.tick_params(labelsize=10)

    ax1.plot(num_feats, train_acc, 'r',label='train')
    ax1.plot(num_feats, test_acc, 'b',label='test')
    ax1.set_title('CNV Accuracy', fontsize=15)
    ax1.set_xlabel('Number of Features', fontsize=10)
    ax1.set_ylabel('Accuracy', fontsize=10)
    ax1.legend(loc='bottom left')

    ax2.plot(num_feats, train_auc, 'r')
    ax2.plot(num_feats, test_auc, 'b')
    ax2.set_title('CNV AUC', fontsize=15)
    ax2.set_xlabel('Number of Features', fontsize=10)
    ax2.set_ylabel('AUC', fontsize=10)

    plt.show()

    return best_clfs[ndx],best_feats[ndx],hmap_data
    y : datatype
        A dataframe of the response.
    model : datatype
        A fitted SVC model.
    
    Returns
    -------
    plot
        A matplotlib plot of "model"s decision function.
    """
    
    # we need a grid to help us plot the decision function
    xx1, xx2 = np.meshgrid(np.linspace(X.min()[0]-1, X.max()[0]+1, 200),
                           np.linspace(X.min()[1]-1, X.max()[1]+1, 200))
    # use the model to calculate predictions across the grid
    Z = model.decision_function(np.c_[xx1.ravel(), xx2.ravel()])
    Z = Z.reshape(xx1.shape)
    # make the plot
    plt.subplots(1,1,figsize=(8,8))
    plt.scatter(X[X.columns[0]], X[X.columns[1]], s=140, ec='k', c=y[y.columns[0]], zorder=3) # plot raw data
    plt.pcolormesh(xx1, xx2, -Z, cmap='RdBu', zorder=0) # plot decision function
    plt.ylim(0, 5)
    plt.xlim(0, 5)
    plt.xlabel(X.columns[0])
    plt.ylabel(X.columns[1])     

#### <font color="green">Solution 3.4</font>

df = pd.DataFrame({'feature_1': [3, 2, 4, 1, 2, 4, 4, 3],
                   'feature_2': [4, 2, 4, 4, 1, 3, 1, 1],
                   'response': ['red', 'red', 'red', 'red', 'blue', 'blue', 'blue', 'red']})
Beispiel #6
0
class Model:

    model = None
    vectorizer = None

    def __init__(self, model_type=None, model_params=""):
        if (model_type == None):
            self.model = None
            self.vectorizer = None
            return

        if (model_type == "baseline"):
            self.model = baseline.Baseline()
        elif (model_type == "svm"):
            self.model = eval("SVC(" + model_params + ")")
            #self.model = SVC(kernel="linear")
        elif (model_type == "knn"):
            self.model = eval("KNeighborsClassifier(" + model_params + ")")
            #self.model = KNeighborsClassifier(n_neighbors=3)
        elif (model_type == "naive_bayes"):
            self.model = MultinomialNB()
        elif (model_type == "decision_trees"):
            self.model = DecisionTreeClassifier(random_state=0)
        elif (model_type == "log_regression"):
            self.model = eval("LogisticRegression(" + model_params + ")")
        elif (model_type == "perceptron"):
            self.model = eval("Perceptron(" + model_params + ")")
        else:
            print >> sys.stderr, "Model of type " + model_type + " is not supported."

        self.vectorizer = DictVectorizer(sparse=True)

    def fit(self, X, y):
        X = self.vectorizer.fit_transform(X)
        self.model.fit(X, y)

    def predict(self, x):
        x = self.vectorizer.transform(x)
        return self.model.predict(x)

    def predict_proba(self, x):
        x = self.vectorizer.transform(x)
        return self.model.predict_proba(x)

    def predict_loss(self, X):
        if self.model.__class__.__name__ == "Perceptron":
            X = self.vectorizer.transform(X)
            return -self.model.decision_function(X)
        probs = self.predict_proba(X)
        return probs[:, 0]

    def score(self, X, y):
        X = self.vectorizer.transform(X)
        return self.model.score(X, y)

    def save(self, file_path):
        f = open(file_path, "w")
        cPickle.dump((self.model, self.vectorizer), f)
        f.close()

    def load(self, file_path):
        f = open(file_path, "r")
        (self.model, self.vectorizer) = cPickle.load(f)
        f.close()

    def print_params(self, file_path):
        f = open(file_path, "w")
        if (self.model.__class__.__name__ == "DecisionTreeClassifier"):
            f = tree.export_graphviz(self.model, out_file=f)
        f.close()
Beispiel #7
0
'''
DECISION TREES::
nonparametric discriminartive learning method. goal is to predict a binary tree based model that
predicts the traget value by learning simple decision tules from the data. Given a training data (X, y), 
a decision tree recursively partitions the space such that samples with same lables are grouped together.

Controling parameters are max_depth.
loss functions used are gini/entropy to measure impurity of datasplits. 
'''
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(max_depth=5)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
if hasattr(clf, "decision_function"):
   Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
else:
     Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
ax = plt.subplot(1, nC, 2)
# Put the result into a color plot
Z = Z.reshape(xx.shape)
ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

# Plot also the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
# and testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
                   alpha=0.6)

ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
Beispiel #8
0
    else:
        ds.ix[i, 'Y3'] = 1
features = ds.loc[:, 'X3':'X7']

X = features.values
y = ds['Y3'].values

###################(2)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

###################(3)

from sklearn.ensemble import RandomForestClassifier
forest = RandomForestClassifier(n_estimators=5, random_state=2)

from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier(max_depth=4, random_state=0)
tree.fit(X_train, y_train)

### f1 score
from sklearn.metrics import f1_score
print("트리 모델의 f1 score: {:.2f}".format(f1_score(y_test, pred_tree)))
print("랜덤포레스트 회귀 모델의 f1 score: {:.2f}".format(f1_score(y_test, pred_forest)))

from sklearn.metrics import roc_auc_score
rf_auc = roc_auc_score(y_test, rf.predict_proba(X_test)[:, 1])
tree_auc = roc_auc_score(y_test, tree.decision_function(X_test))
print("랜덤 포레스트의 AUC: {:.3f}".format(rf_auc))
print("SVC의 AUC: {:.3f}".format(tree_auc))
Beispiel #9
0
c1 = 1
y1_Best_data = np.array([])
yhat1_Best_data = np.array([])
for train_index, test_index in kf.split(data):
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = data[train_index], data[test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    #clf_Rf = RandomForestClassifier(max_depth=10, random_state=0)
    clf_Rf = DecisionTreeClassifier(max_depth=10)
    clf_Rf.fit(X_train, y_train)
    #print('Spars Data Accuracy for fold:',c3,clf3.score(X_test, y_test))
    c1 += 1
    y1_Best_data = np.concatenate([y1_Best_data, y_test])
    yhat1 = clf_Rf.predict(X_test)
    yhat1_Best_data = np.concatenate([yhat1_Best_data, yhat1])
    score = clf_Rf.decision_function(X_test)
    fpr, tpr, _ = metrics.roc_curve(y_test,
                                    score,
                                    pos_label=clf_Rf.classes_[1])
    metrics.RocCurveDisplay(fpr=fpr, tpr=tpr).plot()
    plt.show()

CM1 = confusion_matrix(y1_Best_data, yhat1_Best_data)
CM1 = CM1.astype(float)
TN1 = CM1[0][0]
FN1 = CM1[1][0]
TP1 = CM1[1][1]
FP1 = CM1[0][1]
Sen1 = np.divide(TP1, (TP1 + FN1))
print('Sen1 of Best_data with random forest is: ', Sen1)
Spec1 = np.divide(TN1, (FP1 + TN1))
Beispiel #10
0
class Model:

    model = None
    vectorizer = None

    def __init__(self, model_type=None, model_params=""):
        if (model_type == None):
            self.model = None
            self.vectorizer = None
            return

        if (model_type == "baseline"):
            self.model = baseline.Baseline()
        elif (model_type == "svm"):
            self.model = eval("SVC(" + model_params + ")")
            #self.model = SVC(kernel="linear")
        elif (model_type == "knn"):
            self.model = eval("KNeighborsClassifier(" + model_params + ")")
            #self.model = KNeighborsClassifier(n_neighbors=3)
        elif (model_type == "naive_bayes"):
            self.model = MultinomialNB()
        elif (model_type == "decision_trees"):
            self.model = DecisionTreeClassifier(random_state=0)
        elif (model_type == "log_regression"):
            self.model = eval("LogisticRegression(" + model_params + ")")
        elif (model_type == "perceptron"):
            self.model = eval("Perceptron(" + model_params + ")")
        else:
            print >> sys.stderr, "Model of type " + model_type + " is not supported."

        self.vectorizer = DictVectorizer(sparse=True)
    
    def fit(self, X, y):
        X = self.vectorizer.fit_transform(X)
        self.model.fit(X, y)

    def predict(self, x):
        x = self.vectorizer.transform(x)
        return self.model.predict(x)
    
    def predict_proba(self, x):
        x = self.vectorizer.transform(x)
        return self.model.predict_proba(x)

    def predict_loss(self, X):
        if self.model.__class__.__name__ == "Perceptron":
            X = self.vectorizer.transform(X)
            return -self.model.decision_function(X)
        probs = self.predict_proba(X)
        return probs[:,0]

    def score(self, X, y):
        X = self.vectorizer.transform(X)
        return self.model.score(X, y)

    def save(self, file_path):
        f = open(file_path, "w")
        cPickle.dump((self.model, self.vectorizer), f)
        f.close()

    def load(self, file_path):
        f = open(file_path, "r")
        (self.model, self.vectorizer) = cPickle.load(f)
        f.close()
        
    def print_params(self, file_path):
        f = open(file_path, "w")
        if (self.model.__class__.__name__ == "DecisionTreeClassifier"):
            f = tree.export_graphviz(self.model, out_file=f)
        f.close()
Beispiel #11
0
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix,
                      classes=class_names,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix,
                      classes=class_names,
                      normalize=True,
                      title='Normalized confusion matrix')

plt.show()

#BASIC TEST
basic_test=["This is just a long sentence, to make sure that it's not how long the sentence is that matters the most",\
            'I just love when you make me feel like shit','Life is odd','Just got back to the US !', \
            "Isn'it great when your girlfriend dumps you ?", "I love my job !", 'I love my son !']
feature_basictest = []
for tweet in basic_test:
    feature_basictest.append(feature_extraction.getallfeatureset(tweet))
feature_basictest = np.array(feature_basictest)
feature_basictestvec = vector.transform(feature_basictest)

print(basic_test)
print(classifier.predict(feature_basictestvec))
print(classifier.decision_function(feature_basictestvec))
def train(object_name,
          data_dir,
          output_dir,
          train_type,
          classifier_type,
          learned_model=None,
          debug=False):
    from sklearn import linear_model, tree
    from sklearn.svm import SVR
    from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
    from sklearn.ensemble import AdaBoostRegressor
    if classifier_type == 'Earth':
        from pyearth import Earth
    import numpy as np
    have_graphviz = True
    try:
        import graphviz
    except:
        have_graphviz = False
    ans = None
    saso_data = load_data_file(object_name, data_dir)
    if train_type == 'gripper_status':
        action_str = 'gs'
        actions = range(CLOSE_ACTION_ID + 1)
        x = []
        y = []
        x_index = []
        for action in actions:
            for sasor in saso_data[action]:
                #x_entry = sasor['touch_prev'] + sasor['init_joint_values']
                x_entry = sasor['next_joint_values']
                x_entry = x_entry + sasor['next_gripper'] + sasor['next_object']
                x_entry.append(sasor['next_object'][0] -
                               sasor['next_gripper'][0])
                x_entry.append(sasor['next_object'][1] -
                               sasor['next_gripper'][1])
                x.append(x_entry)
                x_index.append(sasor['index'])
                if action == CLOSE_ACTION_ID:
                    y.append(1)
                else:
                    y.append(0)  #gripper open
    if train_type == 'pick_success_probability':
        action_str = repr(PICK_ACTION_ID)
        x = []
        y = []
        x_index = []
        for sasor in saso_data[PICK_ACTION_ID]:
            #x_entry = sasor['touch_prev'] + sasor['init_joint_values']
            x_entry = sasor['init_joint_values']
            x_entry = x_entry + sasor['init_gripper'][0:3] + sasor[
                'init_object'][0:3]
            x_entry.append(sasor['init_object'][0] - sasor['init_gripper'][0])
            x_entry.append(sasor['init_object'][1] - sasor['init_gripper'][1])
            x.append(x_entry)
            x_index.append(sasor['index'])
            if sasor['reward'] > 0:
                y.append(1)
            else:
                y.append(0)
    if train_type in ['pick_success_probability', 'gripper_status']:
        if learned_model is not None:
            logistic = learned_model
        else:
            print classifier_type
            if classifier_type == 'DTC':
                logistic = DecisionTreeClassifier(criterion='entropy')
            else:
                logistic = linear_model.LogisticRegression(max_iter=400, C=1.0)
            logistic.fit(x, y)
            joblib.dump(
                logistic,
                output_dir + '/' + classifier_type + '-' + action_str + '.pkl')
        ans = logistic
        print logistic.score(x, y)
        print logistic.get_params()
        print len(x)
        if classifier_type != 'DTC':
            print logistic.coef_
            print logistic.intercept_
            yaml_out = {}
            yaml_out['coef'] = logistic.coef_.tolist()[0]
            yaml_out['intercept'] = logistic.intercept_.tolist()[0]
            write_config_in_file(
                output_dir + '/' + classifier_type + '-' + action_str +
                ".yaml", yaml_out)
        else:
            print logistic.feature_importances_

            #feature_names=['t1','t2', 'j1', 'j2']
            feature_names = [
                'j1', 'j2'
            ]  #Touch not required when object coordinates are known
            feature_names = feature_names + [
                'gx', 'gy', 'gz', 'gxx', 'gyy', 'gzz', 'gw'
            ][0:3]
            feature_names = feature_names + [
                'ox', 'oy', 'oz', 'oxx', 'oyy', 'ozz', 'ow'
            ][0:3]
            feature_names = feature_names + ['xrel', 'yrel']
            if have_graphviz:
                dot_data = tree.export_graphviz(logistic,
                                                out_file=None,
                                                feature_names=feature_names,
                                                filled=True)
                graph = graphviz.Source(dot_data)
                graph.render(output_dir + '/' + classifier_type + '-' +
                             action_str)
            yaml_out = {}
            yaml_out["max_depth"] = logistic.tree_.max_depth
            yaml_out["values"] = logistic.tree_.value
            yaml_out['n_nodes'] = logistic.tree_.node_count
            yaml_out['children_left'] = logistic.tree_.children_left
            yaml_out['children_right'] = logistic.tree_.children_right
            yaml_out['feature'] = logistic.tree_.feature
            yaml_out['threshold'] = logistic.tree_.threshold
            write_config_in_file(
                output_dir + '/' + classifier_type + '-' + action_str +
                ".yaml", yaml_out)
        if debug:
            for i in range(0, len(x)):
                y_bar = logistic.predict([x[i]])
                if y_bar != y[i]:
                    print x_index[i]
                    print x[i]
                    print y[i]
                    print logistic.predict_proba([x[i]])
                    if classifier_type != 'DTC':
                        print logistic.decision_function([x[i]])
                        prob = (np.dot(logistic.coef_[0], x[i]) +
                                logistic.intercept_[0])
                        print prob
                        prob *= -1
                        prob = np.exp(prob)
                        prob += 1
                        prob = np.reciprocal(prob)
                        print prob
    if 'next_state' in train_type:
        actions = range(10)

        #  predictions can be 18, 7 for gripper pose, 7 for objct pose
        # 2 for joint values
        # 2 for touch values
        predictions = range(NUM_PREDICTIONS)

        train_type_array = train_type.split('_')
        for s in train_type_array:
            if 'action' in s:
                actions = s.split('-')[1:]
            if 'pred' in s:
                predictions = s.split('-')[1:]
        ans = {}
        for action_ in actions:
            action = int(action_)
            x = []
            y = []
            y_c = []
            l_reg = []
            l_reg_c = []
            x_index = []
            for i in range(0, NUM_PREDICTIONS):
                y.append([])
                y_c.append([])
                l_reg.append('')
                l_reg_c.append('')
            for sasor in saso_data[action]:
                if sasor['reward'] > -999:  #discard invalid states
                    x_entry = sasor['init_joint_values']
                    x_entry = x_entry + sasor['init_gripper'][0:3] + sasor[
                        'init_object'][0:3]
                    x_entry.append(sasor['init_object'][0] -
                                   sasor['init_gripper'][0])
                    x_entry.append(sasor['init_object'][1] -
                                   sasor['init_gripper'][1])
                    x.append(x_entry)
                    x_index.append(sasor['index'])
                    for p_ in predictions:
                        p = int(p_)
                        y[p].append(get_prediction_value(sasor, p))
                        y_default = get_default_value(sasor, p)
                        y_c[p].append(is_correct(p, y[p][-1], y_default))
                        """
                        try:
                            check_array(x)
                            check_array(y[p])
                        except:
                            print x[-1]
                            print y[p][-1]
                            print sasor['index']
                            assert(0==1)
                        """

            print len(x)
            ans[action] = {}

            for p_ in predictions:
                p = int(p_)
                if learned_model is not None:
                    l_reg[p] = learned_model[action][p]
                else:
                    if classifier_type == 'ridge':
                        l_reg[p] = linear_model.Ridge(alpha=0.5,
                                                      normalize=True)
                    elif classifier_type == 'SVR':
                        l_reg[p] = SVR(epsilon=0.2)
                    elif classifier_type in ['DTR', 'DTRM']:
                        l_reg[p] = DecisionTreeRegressor()
                    elif classifier_type == 'DTC':
                        l_reg[p] = DecisionTreeClassifier()
                    elif classifier_type == 'Earth':
                        l_reg[p] = Earth()
                    elif classifier_type == 'AdaLinear':
                        l_reg[p] = AdaBoostRegressor(
                            linear_model.LinearRegression())
                    else:
                        l_reg[p] = linear_model.LinearRegression()
                    if classifier_type == 'DTRM':
                        l_reg[p].fit(x, np.transpose(np.array(y)))
                    elif classifier_type == 'DTC':
                        l_reg[p].fit(x, y_c[p])
                    else:
                        l_reg[p].fit(x, y[p])
                    joblib.dump(
                        l_reg[p], output_dir + '/' + classifier_type + "-" +
                        repr(action) + "-" + repr(p) + '.pkl')
                ans[action][p] = l_reg[p]

                if classifier_type == 'DTRM':
                    print repr(action) + " " + repr(p) + " " + repr(
                        l_reg[p].score(x, np.transpose(np.array(y))))
                elif classifier_type == 'DTC':
                    print repr(action) + " " + repr(p) + " " + repr(
                        l_reg[p].score(x, y_c[p]))
                else:
                    print repr(action) + " " + repr(p) + " " + repr(
                        l_reg[p].score(x, y[p]))
                print l_reg[p].get_params()
                if classifier_type not in [
                        'SVR', 'DTR', 'DTRM', 'AdaLinear', 'DTC'
                ]:
                    print l_reg[p].coef_
                if classifier_type not in [
                        'DTR', 'DTRM', 'AdaLinear', 'DTC', 'Earth'
                ]:
                    print l_reg[p].intercept_
                if classifier_type in ['Earth']:
                    for j in range(0, len(x)):
                        predict_earth(l_reg[p], x[j])
                    print l_reg[p].summary()
                if learned_model is None:
                    if classifier_type in ['DTR', 'DTRM', 'AdaLinear', 'DTC']:

                        print l_reg[p].feature_importances_

                        feature_names = ['j1', 'j2']
                        feature_names = feature_names + [
                            'gx', 'gy', 'gz', 'gxx', 'gyy', 'gzz', 'gw'
                        ][0:3]
                        feature_names = feature_names + [
                            'ox', 'oy', 'oz', 'oxx', 'oyy', 'ozz', 'ow'
                        ][0:3]
                        feature_names = feature_names + ['xrel', 'yrel']
                        if have_graphviz:
                            dot_data = tree.export_graphviz(
                                l_reg[p],
                                out_file=None,
                                feature_names=feature_names,
                                filled=True)
                            graph = graphviz.Source(dot_data)
                            graph.render(output_dir + '/' + classifier_type +
                                         "-" + repr(action) + "-" + repr(p))
                        yaml_out = {}
                        yaml_out['max_depth'] = l_reg[p].tree_.max_depth
                        yaml_out["values"] = l_reg[p].tree_.value.tolist()
                        yaml_out['n_nodes'] = l_reg[p].tree_.node_count
                        yaml_out['children_left'] = l_reg[
                            p].tree_.children_left.tolist()
                        yaml_out['children_right'] = l_reg[
                            p].tree_.children_right.tolist()
                        yaml_out['feature'] = l_reg[p].tree_.feature.tolist()
                        yaml_out['threshold'] = l_reg[
                            p].tree_.threshold.tolist()
                        write_config_in_file(
                            output_dir + '/' + classifier_type + "-" +
                            repr(action) + "-" + repr(p) + ".yaml", yaml_out)
                    if classifier_type in ['Earth']:
                        yaml_out = get_yaml_earth(l_reg[p])
                        write_config_in_file(
                            output_dir + '/' + classifier_type + "-" +
                            repr(action) + "-" + repr(p) + ".yaml", yaml_out)

                if classifier_type == 'DTRM':
                    i = 0
                    y_bar = l_reg[p].predict([x[i]])
                    print x_index[i]
                    print x[i]
                    y_t = np.transpose(np.array(y))
                    print repr(y_t[i]) + ' Prediction ' + repr(y_bar)
                    break
                if debug:
                    for i in range(0, len(x)):
                        y_bar = l_reg[p].predict([x[i]])
                        if classifier_type == 'DTC':
                            if y_bar != y_c[p][i]:
                                print x_index[i]
                                print x[i]
                                print y_c[p][i]
                                print y[p][i]
                                print l_reg[p].predict_proba([x[i]])
                        else:
                            if is_correct(p, y_bar, y[p][i]) == 0:
                                print x_index[i]
                                print x[i]
                                print repr(
                                    y[p][i]) + ' Prediction ' + repr(y_bar)

    return ans