Ejemplo n.º 1
0
def get_model_visualiation(X_sample, true_label, modelpath):
    model = Sequential()
    model.load_weights(modelpath)

    y_predict = model.predict_classes(X_sample)
    tsne = TSNE(n_components=2, random_state=0)
    X_2d = tsne.fit_transform(X_sample)
Ejemplo n.º 2
0
def RNN():
    model = Sequential()
    result = []
    (x_train, y_train), (x_test, y_test) = get_data()
    model.add(
        Embedding(output_dim=32,
                  input_dim=x_train.shape[0],
                  input_length=x_train.shape[1]))
    model.add(Dropout(0.25))
    model.add(SimpleRNN(units=32))
    model.add(Dense(units=256, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(units=1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    model.fit(x_train,
              y_train,
              batch_size=30,
              epochs=1,
              verbose=2,
              validation_split=0.2)
    y_pred = model.predict_classes(x_test)
    result.append(accuracy_score(y_test, y_pred))

    print('RNN done')
    return result
def evaluate_model(trainX, trainy, testX, testy):
    verbose, epochs, batch_size = 0, 100, 25
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[
        2], trainy.shape[1]
    # print(n_timesteps,n_features,n_outputs)
    model = Sequential()
    model.add(LSTM(100, input_shape=(n_timesteps, n_features)))
    model.add(Dropout(0.5))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    # fit network
    model.fit(trainX,
              trainy,
              epochs=epochs,
              batch_size=batch_size,
              verbose=verbose)
    # evaluate model
    _, accuracy = model.evaluate(testX,
                                 testy,
                                 batch_size=batch_size,
                                 verbose=0)
    y_pred = model.predict_classes(testX, verbose=0)

    return accuracy, y_pred
def train_and_evaluate_model(xtrain, ytrain, xval, yval):

    model = Sequential()
    model.add(Dense(3, input_dim=22, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    model.summary()
    history = model.fit(xtrain,
                        ytrain,
                        validation_data=(xval, yval),
                        batch_size=32,
                        epochs=10,
                        verbose=1)

    #plot_epoch(history)

    y_predicted_classes = model.predict_classes(xval)
    conf_matrix = skm.confusion_matrix(yval, y_predicted_classes)
    print("Confusion matrix: \n" + str(conf_matrix))
    score = model.evaluate(xval, yval, verbose=0)
    print("Accuracy:: %.2f%%" % (score[1] * 100))
    precision = skm.precision_score(yval, y_predicted_classes)
    print("Precision: %.2f%%" % (precision * 100))
    recall = skm.recall_score(yval, y_predicted_classes)
    print("Recall: %.2f%%" % (recall * 100))
    f1_score = skm.f1_score(yval, y_predicted_classes)
    print("F1-score: %.4f" % f1_score)
    auroc = skm.roc_auc_score(yval, y_predicted_classes)
    print("AUROC: %.4f" % auroc)

    return score[1], precision, recall, f1_score
Ejemplo n.º 5
0
def neural_network(x_tr, y_tr, x_te, y_te, dum=False, min_max=False):
    start = time.clock()
    if dum:
        x_tr = data_std(x_tr, min_max=False)
        x_te = data_std(x_te, min_max=False)
    y_tr_dm = data_propr(y_tr, name=False)
    y_te_dm = data_propr(y_te, name=False)
    init = initializers.glorot_uniform(seed=1)
    simple_adam = optimizers.Adam()
    model = Sequential()
    model.add(
        Dense(units=5,
              input_dim=x_te.shape[1],
              kernel_initializer=init,
              activation='relu'))
    model.add(Dropout(1))
    model.add(Dense(units=6, kernel_initializer=init, activation='sigmoid'))
    model.add(Dropout(1))
    model.add(Dense(units=5, kernel_initializer=init, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer=simple_adam,
                  metrics=['accuracy'])
    model.fit(x_tr,
              y_tr_dm,
              verbose=0,
              class_weight={
                  0: 0.05,
                  1: 0.05,
                  2: 0.49,
                  3: 0.499,
                  4: 0.01
              })
    #返回模型的基础结构
    NN_class_repot = classification_report(model.predict_classes(x_te), y_te)
    NN_class_con = confusion_matrix(model.predict_classes(x_te), y_te)
    NN_class_pred = model.predict_classes(x_te)
    NN_class_pred_prob = model.predict_proba(x_te)
    #输出精确度
    print('神经网络耗时:', end='--')
    print(model.evaluate(x_te, y_te_dm))
    #简单绘制模型的roc曲线
    poc_plt(y_te_dm, NN_class_pred_prob)
    end = time.clock()
    #计算模型耗时
    print('神经网络耗时:%f' % (end - start))
    return NN_class_repot, NN_class_con, NN_class_pred, NN_class_pred_prob, model
Ejemplo n.º 6
0
def nn(X, y, X_test, y_test):
    """

       :param X: X training data
       :param y: y training data
       :param X_test: X test data
       :param y_test: Y test data
       :return: none
       """
    # fit Keras model
    print("Neural Net")
    batch_size = 100
    vocab_size = X.shape[1]
    model = Sequential()
    model.add(
        Dense(
            100,
            input_dim=vocab_size,
            kernel_initializer="normal",
            kernel_regularizer=regularizers.l2(10),
            activation="sigmoid",
        ))
    model.add(Dense(1, kernel_initializer="normal", activation="sigmoid"))

    model.compile(loss="binary_crossentropy",
                  optimizer="adam",
                  metrics=["accuracy"])

    history = model.fit(X,
                        y,
                        batch_size=batch_size,
                        epochs=30,
                        verbose=0,
                        validation_split=0.1)
    # evaluate
    print("Training accuracy")
    pred = model.predict_classes(X)
    eval(pred, y)
    print("Test accuracy")
    pred = model.predict_classes(X_test)
    eval(pred, y_test)
Ejemplo n.º 7
0
def deep_and_wide_network(np_full_array, testing, truth_dictionary, key,
                          logger):
    # get w2v lstm matrices
    if testing:
        number_of_epochs = 1
    else:
        number_of_epochs = 100

    full_x_train, full_x_test, y_train, y_test = train_test_split(
        np_full_array, truth_dictionary[key], test_size=0.05, random_state=42)

    sparse_model = Sequential()
    sparse_model.add(Dense(128, input_shape=(np_full_array.shape[1], )))
    sparse_model.add(Dropout(0.2))
    sparse_model.add(Dense(100))
    sparse_model.add(Dropout(0.2))
    sparse_model.add(Dense(50))
    sparse_model.add(Dropout(0.2))
    sparse_model.add(Dense(10))
    sparse_model.add(Dropout(0.2))
    sparse_model.add(Dense(1, activation='sigmoid'))
    sparse_model.compile(optimizer='rmsprop',
                         loss='binary_crossentropy',
                         metrics=['accuracy'])
    early_stop_callback = keras.callbacks.EarlyStopping(monitor='val_loss',
                                                        patience=4,
                                                        verbose=0,
                                                        mode='auto')

    sparse_model.fit(full_x_train,
                     y_train,
                     batch_size=BATCH_SIZE,
                     nb_epoch=number_of_epochs,
                     callbacks=[
                         early_stop_callback,
                     ])
    sparse_model.evaluate(full_x_test, y_test)
    validation = sparse_model.predict_classes(full_x_test)
    logger.info('\nConfusion matrix\n %s',
                confusion_matrix(y_test, validation))
    logger.info('classification report\n %s',
                classification_report(y_test, validation))
    return sparse_model
Ejemplo n.º 8
0
    def mnistTest(self):
        # test out model with mnist
        mnist = tf.keras.datasets.mnist
        (x_train, y_train), (x_test, y_test) = mnist.load_data()

        x_train = x_train / 255.0
        x_test = x_test / 255.0

        # 60k mnist images
        print(x_train.shape)
        # get 28*28
        print(x_train[0].shape)


        model = Sequential()
        # using dropouts to reduce bias
        # slice starting from second item till the end
        model.add(LSTM(128, input_shape=(x_train.shape[1:]), activation='relu', return_sequences=True))
        model.add(Dropout(0.2))

        # 128 unit defining the output dimensions
        model.add(LSTM(128, activation='relu'))
        model.add(Dropout(0.2))

        # fully connected layers from previous seq
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.2))

        model.add(Dense(10, activation='softmax'))
        opt = Adam(lr=1e-3, decay=1e-5)
        # compile our model
        model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

        # test our model epochs is the number of times it runs through the entire data set
        model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))
        #model.save('model_backup\\mnist_model_v1.h5')

        plt.imshow(x_test[0], cmap = plt.cm.binary)
        plt.show()
        # we use the Keras lib to handle the 1hot vector alternative we can use np.argmax
        prediction = model.predict_classes(x_test)
        print(prediction[0])
def train_model():
    num = 80000
    X_train = np.load('../data/train_x.npy')[0:num]
    Y_train = np.load('../data/train_y.npy')[0:num]
    # Y_train = Y_train[:,1]
    x_train, x_val, y_train, y_val = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)
    y_train = pd.DataFrame(y_train)[0]
    y_val = pd.DataFrame(y_val)[0]
    # one-hot,5 category
    y_labels = list(y_train.value_counts().index)
    y_labels = list(range(157))
    # y_labels = np.unique(y_train)
    le = preprocessing.LabelEncoder()
    le.fit(y_labels)
    num_labels = len(y_labels)
    y_train = to_categorical(y_train.map(lambda x: le.transform([x])[0]), 157)
    y_val = to_categorical(y_val.map(lambda x: le.transform([x])[0]), 157)
    model = Sequential()
    model.add(Dense(1024, input_shape=(x_train.shape[1],), activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(157, activation='softmax'))
    model.summary()
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    # model = models.load_model('0.7305NN')
    monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')
    model.fit(x_train, y_train,
              batch_size=500,
              epochs=50,
              validation_data=(x_val, y_val),
              callbacks=[monitor])
    model.save("../models/80000NN.h5py")
    X_test = np.load('../data/test_x.npy')
    Y_test = np.load('../data/test_y.npy')
    Y_test = Y_test[:,1]
    score = accuracy_score(model.predict_classes(X_test), Y_test)

    print(score)
Ejemplo n.º 10
0

#process the data
#1. convert each image of shape 28*28 to 784 dimensional which will be fed to the network as a single feature
dimData = np.prod(train_images.shape[1:])
train_data = train_images.reshape(train_images.shape[0],dimData)
test_data = test_images.reshape(test_images.shape[0],dimData)

#convert data to float and scale values between 0 and 1
train_data = train_data.astype('float')
test_data = test_data.astype('float')
#scale data
train_data /=255.0
test_data /=255.0
#change the labels frominteger to one-hot encoding
train_labels_one_hot = to_categorical(train_labels)
test_labels_one_hot = to_categorical(test_labels)

#creating network
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(dimData,)))
model.add(Dense(512, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_data, train_labels_one_hot, batch_size=256, epochs=5, verbose=1,
                   validation_data=(test_data, test_labels_one_hot))

[test_loss, test_acc] = model.evaluate(test_data, test_labels_one_hot)
print(model.predict_classes(test_data[[0],:]))
Ejemplo n.º 11
0
    Conv2D(20, kernel_size1, padding=padding, activation='relu'),
    Dropout(.5),
    Conv2D(10, kernel_size1, padding=padding, activation='relu'),
    Dropout(.5),
    Flatten(),
    Dense(10, activation='softmax')
])

# Compiling the simple model
model6.compile(Adam(.001),
               loss='sparse_categorical_crossentropy',
               metrics=['accuracy'])

# Fitting model with specifying validation set
history = model6.fit(X_train,
                     y_train,
                     validation_split=.1,
                     epochs=100,
                     callbacks=es,
                     verbose=2)

## Going with the simplest best model.

## Predicting with model2. It had the best results with one of the simplest architectures.
predictions = model2.predict_classes(X_test)

# Accuracy 90.52%
accuracy_score(y_test, predictions)

# Saving out best model
model2.save("Fashion-MNIST/data/simple_cnn.h5")
Ejemplo n.º 12
0
batch_size = 16
lr = 0.1
epochs = 50
hidden_num = input_dim * i
hidden_num_2 = output_dim * i
ohe = OneHotEncoder()
y_train = ohe.fit_transform(np.matrix(y_train.values).T).toarray()
model = Sequential()
model.add(Dense(input_dim=input_dim, units=hidden_num))
model.add(Activation('relu'))
model.add(Dense(input_dim=hidden_num_2, units=output_dim))
optimizer = SGD(lr=lr)
model.compile(loss='mean_squared_error', optimizer=optimizer)
model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1)

y_pre = model.predict_classes(x_test, batch_size=batch_size)
y_fit_pre = model.predict_classes(x_train, batch_size=batch_size)

y_true = y_test
y_pre = y_pre + 1
y_fit_pre = y_fit_pre + 1

# In[22]:

clf_name = 'NN'
clf_names.append(clf_name)
output(pipline_fit_score, pipline_test_score, pipline_fit_test_score, kappas_,
       y_true, y_pre, y_fit_pre, clf_name)
ouput2(pipline_fit_score, pipline_test_score, pipline_fit_test_score, kappas_,
       clf_names)
Ejemplo n.º 13
0
    new_model.add(layer)

new_model.add(top_model)  # now this works

# set the first 25 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
# LOCK THE TOP CONV LAYERS
for layer in new_model.layers[:15]:
    layer.trainable = False

print('Model loaded.')

print(new_model.summary())

# model_aug.load_weights('k64 binary 25percent stride8/fine_tuned_model_resnet_64_adam_weights.h5')
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
new_model.compile(loss='binary_crossentropy',
                  optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
                  metrics=['accuracy'])

new_model.fit(x_train, y_train, epochs=100, batch_size=10)

new_model.evaluate(x_test, y_test, verbose=0)

y_pred = new_model.predict_classes(x_test)

new_model.save(filepath='casia2_model.h5')
# new_model.save_weights('bottleneck_fc_model.h5')
plt.clf()
print(confusion_matrix(y_test, y_pred))
Ejemplo n.º 14
0
'''
classifier.add(Dense(25, activation='relu', kernel_initializer='random_normal'))
#Output Layer
classifier.add(Dense(10, activation='relu', kernel_initializer='random_normal'))

classifier.add(Dense(4, activation='relu', kernel_initializer='random_normal'))
'''
classifier.add(Dense(1, activation='sigmoid', kernel_initializer='random_normal'))

classifier.compile(optimizer ='adam',loss='binary_crossentropy', metrics =['accuracy'])
classifier.fit(X_train,y_train, batch_size=100, epochs=10,callbacks = [EarlyStopping(monitor='accuracy', patience=2)])

eval_model=classifier.evaluate(X_test, y_test)
print(eval_model)

predictions = classifier.predict_classes(X_test)
correct = 0
wrong = 0
for i in range(X_test.shape[0]):
	#print('%s %d (expected %d)' % (text_test[i],predictions[i], y_test[i]))
    if predictions[i]==y_test[i]:
        correct = correct + 1
    else:
        wrong = wrong + 1

print(correct)
print(wrong)
print(correct/y_test.shape[0])

    
Ejemplo n.º 15
0
[test_loss, test_acc] = model.evaluate(test_data, test_labels_one_hot)
print("Evaluation result on Test Data : Loss = {}, accuracy = {}".format(test_loss, test_acc))

# Loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


# Accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

#display the test image in the training data

predict_test = model.predict_classes(test_data[[30], :])
print("The prediction of the 30th in the test dataset is: ", predict_test)

plt.imshow(test_images[30,:,:],cmap='gray')
plt.show()
plt.plot(np.arange(0, 10), history.history["val_accuracy"], label="validation accuracy")
plt.plot(np.arange(0, 10), history.history["loss"], label="training Loss")
plt.plot(np.arange(0, 10), history.history["accuracy"], label="training accuracy")
plt.title("the loss and accuracy for both training data and validation data")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="best")
plt.show()

# 2 Single test image

print("Rendering test image...")
test_img_seven = test_images[27]
test_data_seven = test_data[[27], :]
plt.imshow(test_img_seven, cmap=plt.get_cmap('gray'))
plt.title("Model Prediction: {}".format(model.predict_classes(test_data_seven)[0]))
plt.show()

# 3 Change number of hidden layers and activation

print("Training a model with 2 more relu hidden layers...")
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(dimData,)))
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
print()
model.fit(train_data, train_labels_one_hot, batch_size=256, epochs=10, verbose=1,
          validation_data=(test_data, test_labels_one_hot))
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# get prediction of selected image data
img = test_images[400]  # get image at location
test_img = img.reshape((1, 784))  # reshape to fit
img_class = model.predict_classes(test_img)  # predicted data from selected image
prediction = img_class[0]
classname = img_class[0]
print("Class: ", classname)

# setup image for output
img = img.reshape((28, 28))
plt.imshow(img)
plt.title(classname)
plt.show()

# Loss = 0.9
# Accuracy = 0.983
Ejemplo n.º 18
0
def main():
    with open('data2/train_data.txt', encoding="utf-8") as f:
        a = f.readlines()
        # print(a[:4])
    training_data = []
    for i in a:
        i.replace("\n", "")
        training_data.append(i.split())
    print(training_data[:3])






    with open('data2/train_label.txt', encoding="utf-8") as f:
        b = f.readlines()
        # print(b[:5])
    training_label = []
    for i in b:
        i.replace("\n", "")
        training_label.append(i.split())
    print(training_label[:5])




    # 建立词典
    word_to_ix = {"pad": 0}
    for sentence in training_data:
        for word in sentence:
            if word not in word_to_ix:
                word_to_ix[word] = len(word_to_ix)



    tag_to_ix = {'未知': 0, '竞争': 1, '隶属': 2, '上下级': 3, '同级': 4, '夫妻': 5, '亲属': 6, 'pad': 7}



    vec_data = []
    vocab_data = []
    for sentence in training_data[:]:
        for c in sentence:
            if c in word_to_ix:
                vocab_data.append(word_to_ix[c])
        vec_data.append(vocab_data)
        vocab_data = []




    max_length = max(len(i) for i in training_data)
    matrix = keras.preprocessing.sequence.pad_sequences(vec_data, maxlen=max_length, padding='post', value=0)
    print(matrix[:3])
    m = np.array(matrix)
    print(m.shape)





    #建立模型
    model = Sequential()

    # embedding 层
    model.add(Embedding(input_dim=len(word_to_ix), output_dim=100, mask_zero=True))  # Random embedding



    model.add(LSTM(output_dim=100, activation='tanh'))
    model.add(Dense(7, activation='softmax'))

    model.summary()

    model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    matrix_2 = np.reshape(training_label, (50, 1))

    model.fit(matrix, matrix_2, batch_size=32, epochs=50)




    #测试
    with open('data2/dev_1.txt', encoding="utf-8") as f:
        c = f.readlines()

    test_data = []
    for i in c:
        i.replace("\n", "")
        test_data.append(i.split())
    print(test_data[:])


    test_list = []
    test_vec = []
    for sentence in test_data[:]:
        for c in sentence:
            if c in word_to_ix:
                test_list.append(word_to_ix[c])
        test_vec.append(test_list)
        test_list = []

    ans = model.predict_classes(test_vec)
    print(ans)
Ejemplo n.º 19
0
class myModel(object):
    def __init__(self):
        self.model = Sequential()
        self.model.add(Conv2D(32, (3, 3), input_shape=(100, 100, 3)))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 2)))

        self.model.add(Conv2D(32, (3, 3)))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 2)))

        self.model.add(Conv2D(64, (3, 3)))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 2)))

        self.model.add(Conv2D(64, (3, 3)))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 2)))

        self.model.add(Flatten())
        self.model.add(Dense(64))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(0.85))
        self.model.add(Dense(2))
        self.model.add(Activation('sigmoid'))

    def train(self, dataset):
        batch_size = dataset.batch_size
        nb_epoch = dataset.nb_epoch
        self.model.compile(loss='binary_crossentropy',
                           optimizer='adam',
                           metrics=['accuracy'])
        self.model.fit_generator(
            dataset.train_data_generate(),
            steps_per_epoch=dataset.total_train // batch_size,
            epochs=nb_epoch,
            validation_data=dataset.val_data_generate(),
            validation_steps=dataset.total_val // batch_size)

    def save(self, file_path="model.h5"):
        print('Model Saved.')
        self.model.save_weights(file_path)

    def load(self, file_path="model.h5"):
        print('Model Loaded.')
        self.model.load_weights(file_path)

    def predict(self, image):
        # 预测样本分类
        img = image.resize((1, IMAGE_SIZE, IMAGE_SIZE, 3))
        img = image.astype('float32')
        img /= 255

        #归一化
        result = self.model.predict(img)
        print(result)
        # 概率
        result = self.model.predict_classes(img)
        print(result)
        # 0/1

        return result[0]

    def evaluate(self, dataset):
        # 测试样本准确率
        score = self.model.evaluate_generator(dataset.valid, steps=2)
        print("样本准确率%s: %.2f%%" %
              (self.model.metrics_names[1], score[1] * 100))
def main():
    #training data
    trainingData = pd.read_csv("Action Required data.csv", encoding = "ISO-8859-1")
    
    
    ModifiedData = trainingData
    
    #Convert to strings
    ModifiedData["MethodOfPayment"] = ModifiedData["MethodOfPayment"].astype(str)
    
    #MAKE 1-HOT COLUMNS
    #Fill Tuition Reimbursement Data
    i=0
    for cell in trainingData["MethodOfPayment"]:
        if "tr" in cell.lower():
          ModifiedData.loc[i,"TR"] = 1
        else:
            ModifiedData.loc[i,"TR"] = 0
        if "fa" in cell.lower():
          ModifiedData.loc[i,"FA"] = 1
        else:
            ModifiedData.loc[i,"FA"] = 0    
        if "sp" in cell.lower():
          ModifiedData.loc[i,"SP"] = 1
        else:
            ModifiedData.loc[i,"SP"] = 0   
        if "va" in cell.lower():
          ModifiedData.loc[i,"VA"] = 1
        else:
            ModifiedData.loc[i,"VA"] = 0  
        if "va" in cell.lower():
          ModifiedData.loc[i,"VA"] = 1
        else:
            ModifiedData.loc[i,"VA"] = 0 
        i=i+1
    
    
    
    #Make the overage or underage of financial aid column
    i=0
    for cell in trainingData["MethodOfPayment"]:
        ModifiedData.loc[i,"FA_Shortage"] = ModifiedData.loc[i,"TotalFees"]- ModifiedData.loc[i,"FA_Accepted"]
        i=i+1
    

    
    #Threshold for figuring out who's late on payments
    PaymentThreshold = calculate_payment_percentage()
    
    i=0
    for cell in trainingData["MethodOfPayment"]:
        #If Student is late and selfpay
        if "sp" in cell.lower() and float(ModifiedData.loc[i,"PercentPaid"]) < PaymentThreshold and "tr" not in cell.lower() and "fa" not in cell.lower():
            ModifiedData.loc[i,"LateOnPayment"] = 1
        else:
            ModifiedData.loc[i,"LateOnPayment"] = 0
        i=i+1
    
    
    ModifiedData.to_csv("NewReport.csv")
    
    
    
    
    
    
    
    #BUILD THE NEURAL NETWORK
    predictiveData = trainingData[predictiveVars]
    
    #This section removes extranous data (not in predictive list) from the training data
    trainingData = trainingData[:][["ZNumber", "ActionRequired"]]
    trainingData = pd.concat([trainingData, predictiveData], axis = 1)
    
    
    
    ##BALANCE THE DATA
    #Section not used. Although predictions were accurate, the model was flagging more accounts than necessary.
    
    #Problem = pd.DataFrame(trainingData[trainingData[:]["ActionRequired"] == 1])
    #NoProblem = pd.DataFrame(trainingData[trainingData[:]["ActionRequired"] == 0])
    #smallestSizedDf = min(len(Problem),len(NoProblem))
    #Problem = Problem[:smallestSizedDf]
    #NoProblem = NoProblem[:smallestSizedDf]
    ##Recombine the data. It is now balanced with 50% problem items and 50% good items.
    #trainingData = pd.concat([Problem, NoProblem], axis = 0)
    
    
    
    X= trainingData[:][predictiveVars]
    X['ZNumber'] = trainingData["ZNumber"]
    X.set_index("ZNumber", inplace = True)
    
    y= pd.DataFrame(trainingData[:]["ActionRequired"])
    y['ZNumber'] = trainingData["ZNumber"]
    y.set_index("ZNumber", inplace = True)
    
    
    
    
    #standardizing the input feature
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X = pd.DataFrame(sc.fit_transform(X))
    X
    
    #Replace na's with 0
    X.fillna(value=0, inplace= True)
    
    
    #We now split the input features and target variables into 
    #training dataset and test dataset. out test dataset will be 30% of our entire dataset.
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    
    from keras import Sequential
    from keras.layers import Dense, Dropout
    
    
    classifier = Sequential()
    #First Hidden Layer
    classifier.add(Dense(8, activation='relu', kernel_initializer='random_normal', input_dim=len(predictiveVars))) #we have 11 inputs, so len shows 11
    #Second  Hidden Layer
    classifier.add(Dense(16, activation='relu', kernel_initializer='random_normal'))
    #Second  Hidden Layer
    classifier.add(Dense(64, activation='relu', kernel_initializer='random_normal'))
    classifier.add(Dense(128, activation='relu', kernel_initializer='random_normal'))
    classifier.add(Dropout(0.2))
    classifier.add(Dense(256, activation='relu', kernel_initializer='random_normal'))
    classifier.add(Dropout(0.2))
    classifier.add(Dense(512, activation='relu', kernel_initializer='random_normal'))
    classifier.add(Dropout(0.2))
    classifier.add(Dense(1024, activation='relu', kernel_initializer='random_normal'))
    classifier.add(Dropout(0.2))
    classifier.add(Dense(2048, activation='relu', kernel_initializer='random_normal'))
    classifier.add(Dropout(0.2))
    classifier.add(Dense(4096, activation='relu', kernel_initializer='random_normal'))
    classifier.add(Dropout(0.2))
    #Output Layer
    classifier.add(Dense(1, activation='sigmoid', kernel_initializer='random_normal'))
    
    #Compiling the neural network
    classifier.compile(optimizer ='adam',loss='binary_crossentropy', metrics =['accuracy'])
    
    
    
    
    #Callbacks
    from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
    NAME = f"Model {CurrentDate.month}-{CurrentDate.day}-{CurrentDate.year} {CurrentDate.hour}-{CurrentDate.minute}"
    tensorboard = TensorBoard(log_dir = f"C:/Users/mfrangos2016/Desktop/R/Leap Ahead Data Merger/logs/{NAME}") #Dynamically created names for our logs. Goes by the model name we defined above
    #More tensorboard settings
    filepath = "CallBack Model-{epoch:02d}-{val_acc:.3f}"
    checkpoint = ModelCheckpoint("models/{}.model".format(
            filepath,monitor = "val_acc",
            verbose = 1,
            save_best_only = True,
            mode = "max"))
    
    
    
    ##########################################
    #Fitting the data to the training dataset
    ##########################################
    classifier.fit(X_train,y_train, batch_size=8, epochs=1000, shuffle=True, validation_data=(X_test,y_test), callbacks = [tensorboard,checkpoint])
    
    eval_model=classifier.evaluate(X_train, y_train)
    print("Loss = ", eval_model[0],"||||||||||||||", "Accuracy = ", eval_model[1])
    
    #We now predict the output for our test dataset. If the prediction is greater than 0.5 then the output is 1 else the output is 0
    y_pred=classifier.predict(X_test)
    y_pred =(y_pred>0.5)
    
    #Now is the moment of truth. we check the accuracy on the test dataset
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(y_test, y_pred)
    
    
    
    print("True Positive (no problems)", cm[0][0], "False Positive", cm[0][1],"\n",
          "False Negative", cm[1][0], "True Negative (Problems Detected)", cm[1][1])
    
    print("Negative Accuracy = ", cm[1][1]/(cm[1][1]+cm[1][0]))
    print("Positive Accuracy = ", cm[0][0]/(cm[0][0]+cm[0][1]))
    print("Total Accuracy = ", (cm[0][0]+cm[1][1])/(cm[0][0]+cm[0][1]+cm[1][0]+cm[1][1]))
    
    
    
    
    
    
    
    
    #LETS MAKE PREDICTIONS ON AN UNSEEN DATASET
    
    #Load Data
    SecondValidationSet = pd.read_csv("FinalOutputData - All Merged Data.csv")
    
    #Data for testing bugs
    #SecondValidationSet = pd.read_csv("aCTION REquired data.csv")
    
    SecondValidationSet["MethodOfPayment"] = SecondValidationSet["MethodOfPayment"].astype(str)
    
    #Process the data into 1-hot columns
    i=0
    for cell in SecondValidationSet["MethodOfPayment"]:
        if "tr" in cell.lower():
          SecondValidationSet.loc[i,"TR"] = 1
        else:
            SecondValidationSet.loc[i,"TR"] = 0
        if "fa" in cell.lower():
          SecondValidationSet.loc[i,"FA"] = 1
        else:
            SecondValidationSet.loc[i,"FA"] = 0    
        if "sp" in cell.lower():
          SecondValidationSet.loc[i,"SP"] = 1
        else:
            SecondValidationSet.loc[i,"SP"] = 0   
        if "va" in cell.lower():
          SecondValidationSet.loc[i,"VA"] = 1
        else:
            SecondValidationSet.loc[i,"VA"] = 0  
        if "va" in cell.lower():
          SecondValidationSet.loc[i,"VA"] = 1
        else:
            SecondValidationSet.loc[i,"VA"] = 0 
            
        i=i+1
        
    i=0
    for cell in SecondValidationSet["MethodOfPayment"]:
        #If Student is late and selfpay
        if "sp" in cell.lower() and SecondValidationSet.loc[i,"PercentPaid"] < PaymentThreshold and "tr" not in cell.lower() and "fa" not in cell.lower():
            SecondValidationSet.loc[i,"LateOnPayment"] = 1
        else:
            SecondValidationSet.loc[i,"LateOnPayment"] = 0    
        i=i+1
    
    #Make the overage or underage of financial aid column
    i=0
    for cell in SecondValidationSet["MethodOfPayment"]:
        SecondValidationSet.loc[i,"FA_Shortage"] = SecondValidationSet.loc[i,"TotalFees"]- SecondValidationSet.loc[i,"FA_Accepted"]
        i=i+1
    
    
    X2= SecondValidationSet[:][predictiveVars]
    
    #standardizing the input feature
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X2 = pd.DataFrame(sc.fit_transform(X2))   
    
    #Replace na's with 0
    X2.fillna(value=0, inplace= True)
    
    X2["ZNumber"] = SecondValidationSet["ZNumber"]
    X2.set_index("ZNumber", inplace = True)
    
    Xnew = X2
    #Make predictions using the classifier model
    ynew = classifier.predict_classes(Xnew)
    
    #assign the predictions to the students
    SecondValidationSet["IsThereAProblem?"] = ynew
    
    SecondValidationSet.to_csv("AI Predictions2.csv")
Ejemplo n.º 21
0
testImages = testImages.astype(np.float32)

trainImages /= 255
testImages /= 255

NUM_DIGITS = 10
print(train_labels[:10])
trainLabels = keras.utils.to_categorical(train_labels, NUM_DIGITS)
testLabels = keras.utils.to_categorical(test_labels, NUM_DIGITS)

model = Sequential()
model.add(Dense(128, activation=tf.nn.relu,
                input_shape=(FLATTEN_DIM,)))
model.add(Dense(10, activation=tf.nn.softmax))
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
print(model.summary())
from keras.callbacks import TensorBoard
tbCallbacks = TensorBoard(log_dir='logs',
                          histogram_freq=0,
                          write_graph=True,
                          write_images=True)
model.fit(trainImages, trainLabels, epochs=20,
          callbacks=[tbCallbacks])

predictLabels = model.predict_classes(testImages)
print("result=", predictLabels[:10])

loss, accuracy = model.evaluate(testImages, testLabels)
print("loss={}, accuracy={}".format(loss, accuracy))
                             monitor='val_loss',
                             save_best_only=True)

history = model.fit(x=X_train,
                    y=y_train1,
                    verbose=3,
                    epochs=100,
                    batch_size=32,
                    validation_split=0.1,
                    callbacks=[early_stopping, save_model])
print(model.summary())
print(model.get_weights())
print(history.history['accuracy'][-1])
print(history.history['val_accuracy'][-1])
kutils.plot_loss(history)

digit_test = pd.read_csv(os.path.join(path, "test.csv"))
digit_test.shape
digit_test.info()

X_test = digit_test / 255.0
pred = model.predict_classes(X_test)
submissions = pd.DataFrame({
    "ImageId": list(range(1,
                          len(pred) + 1)),
    "Label": pred
})
submissions.to_csv(os.path.join(path, "submission.csv"),
                   index=False,
                   header=True)
Ejemplo n.º 23
0
#creating network
model = Sequential()
model.add(Dense(500, activation='sigmoid', input_shape=(dimData,)))
model.add(Dense(446, activation='sigmoid'))
model.add(Dense(446, activation='sigmoid'))
model.add(Dense(300, activation='sigmoid'))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_data, train_labels_one_hot, batch_size=256, epochs=20, verbose=1,
                   validation_data=(test_data, test_labels_one_hot))

[test_loss, test_acc] = model.evaluate(test_data, test_labels_one_hot)
print("Evaluation result on Test Data : Loss = {}, accuracy = {}".format(test_loss, test_acc))

#graph
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('accuracy')
plt.legend(['train', 'validation'])
plt.show()
#n=100
plt.imshow(test_images[0,:,:])
single_test=test_images[0,:,:]
single_test=single_test.reshape(1,dimData)
#plt.imshow(single_test)
predicted=model.predict_classes(single_test)
print(predicted)
plt.imshow(predicted)
          input_dim=8))  # First hidden layer
ann.add(Dense(4, activation='relu',
              kernel_initializer='random_normal'))  # Second hidden layer
ann.add(Dense(1, activation='sigmoid',
              kernel_initializer='random_normal'))  # Output layer

# Optimize neural network with Adam (Adaptive moment estimation), combination of RMSProp and Momentum.
# Momentum takes past gradients into account in order to smooth out the gradient descent.
ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

ann.fit(X_train_norm, y_train, batch_size=10, epochs=100, verbose=0)

# In[16]:

ann_eval = ann.evaluate(X_train_norm, y_train, verbose=0)
y_pred = ann.predict_classes(X_test_norm, batch_size=10, verbose=0).flatten()
ann_class_report = classification_report(y_test.astype(bool),
                                         y_pred.astype(bool))

print('Loss and accuracy:')
print(ann_eval)
print()
print(ann_class_report)

# ## ROC Plot

# In[18]:

from sklearn import metrics

plt.figure()
Ejemplo n.º 25
0
model.add(Dropout(0.2))
model.add(Dense(units=757, activation="relu"))
model.add(Dense(units=10, activation="softmax"))
print(model.summary())
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
trained_model = model.fit(x_train, y_train, verbose=1, epochs=3)
print(trained_model.history.keys())
plt.style.use("seaborn")
fig, ax = plt.subplots(figsize=(15, 9))
ax.plot(trained_model.history["loss"])
ax.plot(trained_model.history["accuracy"])
ax.legend(["Loss", "Accuracy"])
ax.set(title="LOSS VS ACCURACY GRAPH",
       xlabel="EPOCHS",
       ylabel="LOSS & ACCURACY")
plt.show()
y_pred = model.predict_classes(x_test)
cf = classification_report(y_test, y_pred)
print(cf)
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(15, 9))
sns.heatmap(cm, annot=True)
plt.show()
acs = model.evaluate(x_test, y_test)[1]
acs = acs * 100
print(acs)
plt.imshow(x_train[5].reshape(x_train[5].shape[0], x_train[5].shape[1]))
plt.title("Actual = {} Prediction = {}".format(y_test[5], y_pred[5]))
plt.show()
Ejemplo n.º 26
0
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1.]]
"""
print(seq2Y)

model = Sequential()
model.add(LSTM(20, batch_input_shape=(1, 1, 5), stateful=True))
model.add(Dense(5, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam')

for i in range(650):
    model.fit(seq1X, seq1Y, epochs=1, batch_size=1, verbose=1, shuffle=False)
    model.reset_states()
    model.fit(seq2X, seq2Y, epochs=1, batch_size=1, verbose=0, shuffle=False)
    model.reset_states()

n_batch = 1

print('Sequence 1')
result = model.predict_classes(seq1X, batch_size=n_batch, verbose=0)
model.reset_states()
for i in range(len(result)):
    print('X=%.1f y=%.1f, yhat=%.1f' % (seq1[i], seq1[i + 1], result[i]))

# 测试 LSTM对“数列2”预测
print('Sequence 2')
result = model.predict_classes(seq2X, batch_size=n_batch, verbose=0)
model.reset_states()
for i in range(len(result)):
    print('X=%.1f y=%.1f, yhat=%.1f' % (seq2[i], seq2[i + 1], result[i]))
Ejemplo n.º 27
0
train_labels_one_hot = to_categorical(train_labels)
test_labels_one_hot = to_categorical(test_labels)

# creating network
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(dimData,)))
model.add(Dense(512, activation='tanh'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='sigmoid'))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_data, train_labels_one_hot, batch_size=256, epochs=20, verbose=1,
                    validation_data=(test_data, test_labels_one_hot))

predict1 = model.predict_classes(test_data[[0], :])
print(predict1)

[test_loss, test_acc] = model.evaluate(test_data, test_labels_one_hot)
print("Evaluation result on Test Data : Loss = {}, accuracy = {}".format(test_loss, test_acc))
print("======================")
print(history.history.keys())

# question 1
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(train_data,
                    train_labels_one_hot,
                    batch_size=256,
                    epochs=3,
                    verbose=1,
                    validation_data=(test_data, test_labels_one_hot))

for i in range(4):
    img = test_data[i]
    test_img = img.reshape((1, 784))
    img_class = model.predict_classes(test_img)
    prediction = img_class[0]
    classname = img_class[0]
    print("Class: ", classname)
    img = img.reshape((28, 28))
    plt.imshow(img)
    plt.title("Predicted Value: " + str(classname))
    plt.show()

print(history)
# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
        

        #Compiling the neural network
        classifier.compile(optimizer ='adam', loss='binary_crossentropy', metrics =['accuracy'])

        #class_weight = {0 : 1., 1: 4.5}
        class_weight = {0 : 1., 1: 4}

        history = classifier.fit(X, y_train, batch_size=30, epochs=15, class_weight=class_weight, validation_data=(X_test, y_test))

        
            # predict probabilities for test set
        yhat_probs = classifier.predict(X_test, verbose=0)
            # predict crisp classes for test set
        yhat_classes = classifier.predict_classes(X_test, verbose=0)
            # reduce to 1d array
        yhat_probs = yhat_probs[:, 0]
        yhat_classes = yhat_classes[:, 0]

        accuracy = accuracy_score(y_test, yhat_classes)
        precision = precision_score(y_test, yhat_classes)
        recall = recall_score(y_test, yhat_classes)
        f1 = f1_score(y_test, yhat_classes)
        roc_auc = roc_auc_score(y_test, yhat_probs)
        balanced_accuracy = balanced_accuracy_score(y_test, yhat_classes)
        average_precision = average_precision_score(y_test, yhat_probs)


        if counter == 1:
            accuracy_1.append(accuracy)
Ejemplo n.º 30
0
[test_loss, test_acc] = model.evaluate(test_data, test_labels_one_hot)
print("Evaluation result on Test Data : Loss = {}, accuracy = {}".format(test_loss, test_acc))

# Model Accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Training', 'Testing'], loc='best')
plt.show()

# Model Loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Testing'], loc='best')
plt.show()

# Plot the test image
plt.imshow(test_data[150].reshape(28, 28), cmap='gray')
plt.title('Ground Truth : {}'.format(test_labels[150]))
plt.show()

# Predict test image number
predict = model.predict_classes(test_data[150].reshape(1, 784))
print('Predicted number: ' + str(predict))