예제 #1
0
def bpnn(X_train, Y_train, X_test, Y_test, params):
    start = time.time()

    mlp = MLPClassifier(hidden_layer_sizes=params['hidden_layer_sizes'],
                        activation=params['activation'],
                        learning_rate_init=params['learning_rate_init'],
                        solver=params['solver'],
                        max_iter=100000)
    mlp.fit(X_train, Y_train)
    Y_pred = mlp.predict(X_test)

    end = time.time()

    precision, recall, fscore, train_support = score(Y_test,
                                                     Y_pred,
                                                     pos_label='1',
                                                     average='binary')
    print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format(
        round(precision, 3), round(recall, 3), round(fscore, 3),
        round(acs(Y_test, Y_pred), 3)))

    print("Execution time: " + str(end - start))

    cm = confusion_matrix(Y_test, Y_pred)
    class_label = ["0", "1"]
    df_cm = pd.DataFrame(cm, index=class_label, columns=class_label)
    sns.heatmap(df_cm, annot=True, fmt='d')
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.show()

    sklearn.metrics.plot_roc_curve(mlp, X_test, Y_test)
    plt.title("ROC Curve")
    plt.show()
예제 #2
0
def svm(X_train, Y_train, X_test, Y_test):
    start = time.time()

    svclassifier = SVC()
    svclassifier.fit(X_train, Y_train)
    Y_pred = svclassifier.predict(X_test)

    end = time.time()

    precision, recall, fscore, train_support = score(Y_test,
                                                     Y_pred,
                                                     pos_label='1',
                                                     average='binary')
    print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format(
        round(precision, 3), round(recall, 3), round(fscore, 3),
        round(acs(Y_test, Y_pred), 3)))
    print("Execution Time: " + str(end - start))

    cm = confusion_matrix(Y_test, Y_pred)
    class_label = ["0", "1"]
    df_cm = pd.DataFrame(cm, index=class_label, columns=class_label)
    sns.heatmap(df_cm, annot=True, fmt='d')
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.show()

    sklearn.metrics.plot_roc_curve(svclassifier, X_test, Y_test)
    plt.title("ROC Curve")
    plt.show()
예제 #3
0
def kmeans(X_train, Y_train, X_test, Y_test, params):
    start = time.time()

    mlp = KMeans(n_clusters=params['n_clusters'], max_iter=100000)
    mlp.fit(X_train, Y_train)
    Y_pred = mlp.predict(X_test)

    end = time.time()

    # typecast to string to match X_test format
    Y_pred = [str(x) for x in Y_pred]

    precision, recall, fscore, train_support = score(Y_test,
                                                     Y_pred,
                                                     pos_label='1',
                                                     average='binary')
    print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format(
        round(precision, 3), round(recall, 3), round(fscore, 3),
        round(acs(Y_test, Y_pred), 3)))

    print("Execution Time: " + str(end - start))

    cm = confusion_matrix(Y_test, Y_pred)
    class_label = ["0", "1"]
    df_cm = pd.DataFrame(cm, index=class_label, columns=class_label)
    sns.heatmap(df_cm, annot=True, fmt='d')
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.show()
    def print_metrics(self, predicted_output):
        """
        Print some MVP metrics. sklearn is used for calculation of all the
        metric values. Confusion matrix values (true positive, false negative,
        false positive and true negative), precision, recall, f1-score and
        accuracy is calculated. There are few other metrics which comes under
        classification report, but meh to them.

        We need the actual labels and the predicted labels to calculate the
        metrics. We can get the actual labels from the class variable and
        the predicted output or predicted labels are passed as a parameter
        after running each algorithm.

        :param predicted_output: Predicted labels

        """

        res = cm(self.y_test, predicted_output)
        tp = res[0][0]
        fn = res[1][0]
        fp = res[0][1]
        tn = res[1][1]
        print("Accuracy: ", acs(self.y_test, predicted_output))
        print("TP: ", tp, ", FN: ", fn, ", FP: ", fp, "TN: ", tn)
        print(cr(self.y_test, predicted_output))
예제 #5
0
seq.compile(loss='binary_crossentropy',
            metrics=['accuracy'],
            optimizer='rmsprop')

seq.fit(xtrain,
        ztrain,
        validation_data=(xtest, ztest),
        epochs=2000,
        batch_size=16)

# Crosschecking the final model against validation data for checking the overall accuracy

# In[ ]:

from sklearn.metrics import accuracy_score as acs
print(acs(ztest, seq.predict_classes(xtest)))

# Predict survivors from test dataset

# In[ ]:

a = seq.predict_classes(test)

# preparing result file that has to be submitted to competition

# In[ ]:

o = pd.DataFrame(a, columns=['Survived'])
o.index = pd.read_csv('../input/test.csv')['PassengerId']
o.index.name = 'PassengerId'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_iris
iris = load_iris()
x = iris.data
y = iris.target

from sklearn.model_selection import train_test_split
x_train, y_train, x_test, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)

from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=10,
                                    criterion='entropy',
                                    random_state=0)
classifier.fit(x_train, y_train)

y_pred = classifier.predict(x_test)

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

from sklearn.metrics import accuracy_score as acs
print(acs(y_pred, y_test))
예제 #7
0
model = grc(RandomForestClassifier(), rfC)
model.fit(xtrain, ztrain)

# **Summarizing my findings**

# In[ ]:

print(model.best_params_, model.best_score_)

# **Checking the accuracy of my model using sklearn.metrics**

# In[ ]:

from sklearn.metrics import accuracy_score as acs

print(acs(ztest, model.predict(xtest)))

# **Preparing result as csv file**

# In[ ]:

a = pd.DataFrame(model.predict(test[train.columns]))
a.index = pd.read_csv('../input/test.csv')['PassengerId']
a.columns = ['Survived']
a.index.name = 'PassengerId'

# In[ ]:

a.to_csv('result.csv')

# In[ ]:
예제 #8
0
        while tree not in range(100):  ##是字典说明还未完成分类
            comparison = next(iter(tree))  ##取出第一个判断条件
            tree = tree[comparison]  ##第一个条件对应的字典
            attribute_num = int(comparison.split()[0])  ##属性号
            attribute_value = float(comparison.split()[1])  ##属性值
            if X[index][attribute_num] <= attribute_value:
                tree = tree['<=']
            else:
                tree = tree['>']
        res.append(tree)
    return res


if __name__ == '__main__':
    iris = datasets.load_wine()  ##用于手写数字识别只有87%
    X = iris.data
    y = iris.target
    X_train, X_test, y_train, y_test = tts(X,
                                           y,
                                           test_size=0.2,
                                           random_state=19901120,
                                           stratify=y)

    epsilon = 1e-2
    DT = CreateDecisionTree(X_train, y_train, epsilon)
    print(DT)
    print(acs(Classifier(DT, X_test), y_test))

    clf = dtc(criterion='entropy').fit(X_train, y_train)
    print(clf.score(X_test, y_test))
예제 #9
0
def main():

    # load data
    training_data = load_data.read_data("train.csv")
    testing_data = load_data.read_data("test.csv")
    testing_labels = load_data.read_data("submission.csv")
    X_train, X_test = load_data.vectorize_data(training_data, testing_data)

    X_train = X_train.toarray()
    X_test = X_test.toarray()

    Y_train = np.array(training_data)[:, -1]
    Y_test = np.array(testing_labels)[:, -1]

    # the training and testing datasets should have the same dimension
    _, nftrain = X_train.shape
    _, nftest = X_test.shape
    assert nftrain == nftest

    # ask the user to input which discriminant function to use
    prompt = '''
    Type of discriminant functions supported assuming Gaussian pdf:
    1 - minimum Euclidean distance classifier
    2 - minimum Mahalanobis distance classifier
    3 - quadratic classifier
    '''
    print(prompt)
    str = input('Please input 1, 2, or 3: ')
    cases = int(str)

    # ask the user to input prior probability that needs to sum to 1
    prop_str = input(
        "Please input prior probabilities in float numbers, separated by space, and they must add to 1: \n"
    )
    numbers = prop_str.split()
    P = np.zeros(len(numbers))
    Psum = 0
    for i in range(len(numbers)):
        P[i] = float(numbers[i])
        Psum += P[i]
    if Psum != 1:
        print("Prior probabilities do not add up to 1. Please check!")
        sys.exit(1)

    # derive the decision rule from the training set and apply on the test set
    t0 = time.time()  # start time
    Y_pred = mpp(X_train, Y_train, X_test, cases, P)
    t1 = time.time()  # ending time

    print(Y_pred)
    Y_pred = Y_pred.astype("int")
    Y_pred = Y_pred.astype("str")
    # calculate accuracy
    precision, recall, fscore, train_support = score(Y_test,
                                                     Y_pred,
                                                     pos_label='1',
                                                     average='binary')
    print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format(
        round(precision, 3), round(recall, 3), round(fscore, 3),
        round(acs(Y_test, Y_pred), 3)))

    cm = confusion_matrix(Y_test, Y_pred)
    class_label = ["0", "1"]
    df_cm = pd.DataFrame(cm, index=class_label, columns=class_label)
    sns.heatmap(df_cm, annot=True, fmt='d')
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.show()

    print(f'The learning process takes {t1 - t0} seconds.')
    from sklearn.ensemble import RandomForestRegressor
    
    reg = RandomForestRegressor(n_estimators=200, random_state=0)
    reg.fit(X_train, y_train)
    results.append(ac(y_test, reg.predict(X_test)))

else :
    # this is for classification
    
    # Logisitic Classification
    
    from sklearn.linear_model import LogisticRegression
    
    cla = LogisticRegression()
    cla.fit(X_train_Scaler, y_train)
    results.append(acs(y_test, cla.predict(X_test_Scaler)))
    
    # K_Nearnest
    
    from sklearn.neighbors import KNeighborsClassifier
    
    cla = KNeighborsClassifier(n_neighbors=10)
    cla.fit(X_train_Scaler, y_train)
    results.append(acs(y_test, cla.predict(X_test_Scaler)))
    
    # SVM
    
    from sklearn.svm import SVC
    
    cla = SVC(kernel = 'linear',random_state=0)
    cla.fit(X_train_Scaler, y_train)
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import accuracy_score as acs
import matplotlib.pyplot as plt
import seaborn as sns


# In[7]:


rf = RandomForestClassifier(n_estimators=150, max_depth=None, n_jobs=-1)

rf_model = rf.fit(X_train_vect, y_train)

y_pred = rf_model.predict(X_test_vect)

precision, recall, fscore, train_support = score(y_test, y_pred, pos_label='male', average='binary')
print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format(
    round(precision, 3), round(recall, 3), round(fscore,3), round(acs(y_test,y_pred), 3)))

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
class_label = ["1- yes", "0 - no"]
df_cm = pd.DataFrame(cm, index=class_label,columns=class_label)
sns.heatmap(df_cm, annot=True, fmt='d')
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()
예제 #12
0
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import accuracy_score as acs
import matplotlib.pyplot as plt
import seaborn as sns

rf = RandomForestClassifier(n_estimators=150, max_depth=None, n_jobs=-1)

rf_model = rf.fit(X_train_vect, y_train)

y_pred = rf_model.predict(X_test_vect)

precision, recall, fscore, train_support = score(y_test,
                                                 y_pred,
                                                 pos_label='1',
                                                 average='binary')
print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format(
    round(precision, 3), round(recall, 3), round(fscore, 3),
    round(acs(y_test, y_pred), 3)))

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
class_label = ["ham", "spam"]
df_cm = pd.DataFrame(cm, index=class_label, columns=class_label)
sns.heatmap(df_cm, annot=True, fmt='d')
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()
예제 #13
0
import matplotlib.pyplot as plt
plt.plot(PA_score1, label='Passive Aggressive Classifier')
plt.title('ROCAUC-Passive Aggressive Classifier-Training')
plt.xlabel('Sample Size')
plt.ylabel('ROCAUC')
plt.legend()
plt.show()

precision, recall, fscore, train_support = score(y_l1,
                                                 y_pred_l1,
                                                 pos_label=1,
                                                 average='binary')
print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format(
    round(precision, 3), round(recall, 3), round(fscore, 3),
    round(acs(y_l1, y_pred_l1), 3)))
import seaborn as sns
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_l1, y_pred_l1)
class_label = [1, 0]
df_cm = pd.DataFrame(cm, index=class_label, columns=class_label)
sns.heatmap(df_cm, annot=True, fmt='d')
plt.title("Confusion Matrix-Passive Aggressive Classifier-Trainig")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

# save
with open('PA_model1.pkl', 'wb') as f:
    pickle.dump(PA_model, f)
models.append(('RF', RandomForestClassifier()))
models.append(('MNB', MultinomialNB()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('SVM', SVC(gamma='auto', max_iter=3000, probability=True)))
# evaluate each model in turn
results = []
names = []
print('Testing Data')
for name, model in models:
    model = model.fit(X_train, Y_train)
    y_pred = model.predict(X_validation)
    precision, recall, fscore, train_support = score(Y_validation,
                                                     y_pred,
                                                     pos_label='Yes',
                                                     average='binary')
    accuracy = acs(Y_validation, y_pred)
    results.append(recall)
    results.append(fscore)
    results.append(accuracy)
    names.append(name)
    msg = "%s: %f %f %f" % (name, recall.mean(), fscore.mean(),
                            accuracy.mean())
    print(msg)

print('\n')

print('Training Data')
for name, model in models:
    model = model.fit(X_train, Y_train)
    trainpred = model.predict(X_train)
    precision, recall, fscore, train_support = score(Y_train,

#---------------------------------  Validation of models

# 1. Random Forest Classifier

# higher estimator, the better the accuracy so far. 20 vs. 200
class_rnf_val = RandomForestClassifier(n_estimators = 200, max_depth = None, n_jobs = -1)

rnf_val_model = class_rnf_val.fit(tfidf_train, y_train)

y_val_rnf_pred = rnf_val_model.predict(tfidf_validate)

precision, recall, fscore, train_support = score(y_validate, y_val_rnf_pred, pos_label='5', average='binary')
print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format(
    round(precision, 3), round(recall, 3), round(fscore,3), round(acs(y_validate, y_val_rnf_pred), 3)))


# Making the Confusion Matrix
cm = confusion_matrix(y_validate, y_val_rnf_pred)
class_label = ['1', '5']
df_cm = pd.DataFrame(cm, index=class_label,columns=class_label)
sns.heatmap(df_cm, annot=True, fmt='d')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Star')
plt.ylabel('Actual Star')
plt.show()


# 2. Logistic Regression 
X = Total_employees.iloc[:, -10:-1]
Y = Total_employees.iloc[:, -1]

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X['dept'] = le.fit_transform(X['dept'])
X['salary'] = le.fit_transform(X['salary'])

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.3,
                                                    random_state=0)

#RandomFscore = []
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=71, random_state=0)
classifier.fit(X_train, y_train)
X_pred = classifier.predict(X_train)
y_pred = classifier.predict(X_test)

#Testing the model accuracy (Training)
from sklearn.metrics import accuracy_score as acs
from sklearn.metrics import confusion_matrix as cm
score = acs(y_train, X_pred)
matrix = cm(y_train, X_pred)

#Testing the Model Validity
Tscore = acs(y_test, y_pred)
Tmatrix = cm(y_test, y_pred)
예제 #17
0
		predictions = []
		for row in x_test:
			label = self.closest(row)
			predictions.append(label)
		return predictions

	def closest(self, row):
		best_dist = eu(row, self.x_train[0])
		best_index = 0
		for i in range (1, len(self.x_train)):
			dist = eu(row, self.x_train[i])
			if dist < best_dist:
				best_dist = dist
				best_index = i
		return self.y_train[best_index]

from sklearn.datasets import load_iris
iris = load_iris()
x = iris.data
y = iris.target

from sklearn.cross_validation import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

clf = KNN()
clf.fit(x_train, y_train)
prediction = clf.predict(x_test)

from sklearn.metrics import accuracy_score as acs
print(acs(prediction, y_test))
"""
#Decision Tree Algorithm of flower
import pandas as pd
data = pd.read_csv('Iris.csv', index_col=0)
y = data.iloc[:, [-1]].values
x = data.iloc[:, :-1]
from sklearn.preprocessing import LabelEncoder
X = LabelEncoder()
y = X.fit_transform(y.ravel())

from sklearn.model_selection import train_test_split as tts
X_train, X_test, y_train, y_test = tts(x, y, test_size=0.3)

from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)
X_pred = classifier.predict(X_train)

#Predicting the Value of y
y_pred = classifier.predict(X_test)

#Displaying the result
print(y_pred)

#Testing the accuracy of the model
from sklearn.metrics import accuracy_score as acs, confusion_matrix as cm
y_pred_cm = cm(y_test, y_pred)
print(y_pred_cm)
#The accuracy score of the model
y_pred_acs = acs(y_test, y_pred)
print('The Model accuracy score is ', y_pred_acs)