Ejemplo n.º 1
0
def run_expreminent(look_forward, hidden_size, batch_size, epochs, dropout,
                    dataset):
    x, y = get_sentiments_prices(dataset['twitter_sentiments'],
                                 dataset["reddit_sentiments"],
                                 dataset["coin_price"], look_forward)

    for i in range(x.shape[1]):
        x[:, i] = normalize_array(x[:, i])

    # split into train and test sets
    train_x, test_x = split(x)
    train_y, test_y = split(y)

    train_x = np.reshape(train_x,
                         (train_x.shape[0], look_forward, train_x.shape[1]))
    test_x = np.reshape(test_x,
                        (test_x.shape[0], look_forward, test_x.shape[1]))

    model = create_model(hidden_size=hidden_size,
                         look_forward=look_forward,
                         dropout=dropout)
    model = train(model,
                  train_x,
                  train_y,
                  batch_size=batch_size,
                  epochs=epochs)
    y_pred = test(model, test_x)
    score = evaluate(test_y, y_pred)
    print('Test Score: %.2f RMSE' % score)
    return score
Ejemplo n.º 2
0
from time import time

import util
from dataset import data_validation
from model import model

start_time = time()

model.load_weights('data/model.h5')
data = data_validation()

print('Validating model...')
result = model.evaluate(data[0], data[1])
print()
print('Done')
print('Result:', result)
print('Time:', util.time_delta(time() - start_time))
X_train, X_test, Y_train, Y_test = train_test_split(FEATURE,
                                                    LABEL,
                                                    test_size=SPLIT,
                                                    random_state=SEED)
X_train = np.array([i for i in X_train])
X_test = np.array([i for i in X_test])
Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)
logger.debug('training shape = %s', X_train.shape)

DATA = [X_train, X_train, X_train, X_train]

FIT_HISTORY = model.fit(DATA, Y_train,
                        batch_size=BATCH_SIZE,
                        epochs=EPOCH,
                        verbose=1,
                        validation_split=0.4,
                        # validation_data=(X_val, Y_val),
                        callbacks=[EARLY_STOP,])
history_logging(FIT_HISTORY)
loss, acc = model.evaluate([X_test, X_test, X_test, X_test],
                              Y_test,
                              verbose=1,
                              batch_size=32)
logger.info('Testing Acc: %s', acc)
logger.info('Testing Loss: %s', loss)

fname = 'DNN_{}.h5'.format(time.strftime('%Y%m%d-%H%M'))
model.save(fname)
logger.debug('Saving model as: %s', fname)
""" From numerical label to categorical label """
y_train = to_categorical(y_train, classes)
y_test = to_categorical(y_test, classes)

""" Building the Model """
model = model()
model.summary()
""" End of the Model """

""" Compilation """
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

""" Training """
history = model.fit(x_train, y_train, batch_size=256, epochs=120, verbose=2, shuffle=True,
                    validation_data=(x_test, y_test))

""" Evaluation of the model """
score = model.evaluate(x_test, y_test, verbose=1)
print('Test set loss: {0:.2f} and Accuracy: {1:.2f}%'.format(score[0], score[1] * 100))

""" Saving the weight file """
model.save('MODEL DATA/cifar-10.h5')

""" Saving the history in a text file """
with open('history.txt', 'a+') as f:
    print(history.history, file=f)
    print(score, file=f)

print('All Done!')
Ejemplo n.º 5
0
import matplotlib.pyplot as plt

from model import history, model
from preprocess import x_test, one_hot_test_labels

acc = history.history['accuracy']
loss = history.history['loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.title('Training  accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.title('Training  loss')
plt.legend()
plt.show()

print(history.history.keys())
print(model.evaluate(x_test, one_hot_test_labels))
Ejemplo n.º 6
0
    testSents, _ = Util.loadConll('test.conll')
    genDictionaries(trainSents, trainTrees)

    embedding_filename = 'word2vec.model'

    embedding_array = load_embeddings(embedding_filename, wordDict, posDict, labelDict)

    labelInfo = []
    for idx in np.argsort(labelDict.values()):
        labelInfo.append(labelDict.keys()[idx])
    parsing_system = ParsingSystem(labelInfo[1:])
    print parsing_system.rootLabel

    print "Generating Traning Examples"
    trainFeats, trainLabels = genTrainExamples(trainSents, trainTrees)

    print "Done."

    # Build the graph model
    graph = tf.Graph()
    model = DependencyParserModel(graph, embedding_array, Config)

    num_steps = constants.max_iter
    with tf.Session(graph=graph) as sess:

        model.train(sess, num_steps)

        model.evaluate(sess, testSents)


Ejemplo n.º 7
0
from keras.datasets import mnist
from model import model
from keras.utils import to_categorical

#Load / Process the data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

#Reshape for the CNN
x_train = x_train.reshape(60000, 28, 28, 1)
x_test = x_test.reshape(10000, 28, 28, 1)

#one-hot encoding
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

#generate / compile the model
image_size = x_train[0].shape
model = model(image_size)
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

#train the model
model.fit(x_train, y_train, epochs=3, validation_split=0.2, batch_size=32)

#test the model
loss_and_metrics = model.evaluate(x_test, y_test, batch_size=128)
print 'Testing Results: ', loss_and_metrics
Ejemplo n.º 8
0
            n += 1
    plt.show()


# Тестовая выборка:
_, (test_images, test_labels) = mnist.load_data()

test_images = test_images.reshape(*test_images.shape, 1)
test_images = test_images / 255.0

test_labels = to_categorical(test_labels, num_classes=10)

# Обученная модель:
model.load_weights(SAVED_MODEL)

model.evaluate(test_images, test_labels)

## Матрица ошибок:
Y_pred = model.predict(test_images)
Y_pred_classes = np.argmax(Y_pred, axis=1)
Y_true = np.argmax(test_labels, axis=1)
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes)
plot_confusion_matrix(confusion_mtx, classes=range(10))

## Топ 6 ошибок:
# ошибки:
errors = (Y_pred_classes - Y_true != 0)
Y_pred_classes_errors = Y_pred_classes[errors]
Y_pred_errors = Y_pred[errors]
Y_true_errors = Y_true[errors]
X_true_errors = test_images[errors]
    plt.show()

history = AccuracyHistory()

#On ajoute de la distorsion à nos images
datagen = keras.preprocessing.image.ImageDataGenerator(
  rotation_range=30,
  width_shift_range=0.25,
  height_shift_range=0.25,
  shear_range=0.25,
  zoom_range=0.2
)

train_generator = datagen.flow(dl.train_image_data, dl.train_label_data)

#Pour visualiser les images
#visualize_data(dl.train_image_data, dl.train_label_data)
#exit(0)
model.fit(train_generator, batch_size = const.BATCH_SIZE, epochs = const.EPOCHS, verbose = 1, validation_data = (dl.test_image_data, dl.test_label_data), callbacks = [history])
test_loss,test_acc = model.evaluate(dl.test_image_data, dl.test_label_data)

#conversion h5
model.save('mnist_shift.h5')

#conversion pour le format web
tfjs.converters.save_keras_model(model, 'models3')

#conversion pour Android
#utiliser le script convert-model-anroid.py

Ejemplo n.º 10
0
def test_model(sample_size):
    train = pd.read_table("data/train.tsv")
    evaluate(train.sample(sample_size))
Ejemplo n.º 11
0
    tb = TensorBoard(log_dir='./logs')
    update_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.8,
                                  patience=5,
                                  min_lr=1e-6,
                                  verbose=1)
    checkpoint = ModelCheckpoint(weights_path,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 save_weights_only=True)

    # MODEL AND TRAIN

    model = model(img_height, img_width, num_classes, lr)
    model.fit(X_train,
              Y_train,
              epochs=epochs,
              batch_size=batch_size,
              verbose=1,
              validation_data=(X_val, Y_val),
              shuffle=True,
              callbacks=[checkpoint, update_lr, tb])

    # MODEL TEST

    model.load_weights(weights_path)
    preds = model.evaluate(X_test, Y_test)
    print("Loss = " + str(preds[0]))
    print("Test Accuracy = " + str(preds[1]))
Ejemplo n.º 12
0
def main(args):
    print('start ..!')
    BATCH_SIZE = args.batch_size
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    TEXT = Field(
        sequential=True,  # text: sequential data
        tokenize=str.split,
        batch_first=True,
        fix_length=56,  # padding size: max length of data text
        lower=True)
    LABEL = LabelField(sequential=False, dtype=torch.float)

    w2v = KeyedVectors.load_word2vec_format(
        './model/GoogleNews-vectors-negative300.bin.gz', binary=True)

    data_dir = args.data_dir

    train_paths, val_paths = build_data(data_dir)

    N_EPOCHS = args.epochs
    EMBEDDING_DIM = args.embedding
    N_FILTERS = args.n_filters
    FILTER_SIZES = args.filter_sizes
    OUTPUT_DIM = 1
    DROPOUT = args.dropout
    test_acc_lists = []

    for kfold in range(10):
        # make datasets
        train_path = train_paths[kfold]
        val_path = val_paths[kfold]
        train_data = TabularDataset(path=train_path,
                                    skip_header=True,
                                    format='csv',
                                    fields=[('label', LABEL), ('text', TEXT)])
        test_data = TabularDataset(path=val_path,
                                   skip_header=True,
                                   format='csv',
                                   fields=[('label', LABEL), ('text', TEXT)])

        TEXT.build_vocab(train_data)
        LABEL.build_vocab(train_data)

        # for pretrained embedding vectors
        w2v_vectors = []
        for token, idx in TEXT.vocab.stoi.items():
            # pad token -> zero
            if idx == 1:
                w2v_vectors.append(torch.zeros(EMBEDDING_DIM))
            # if word in word2vec vocab -> replace with pretrained word2vec
            elif token in w2v.wv.vocab.keys():
                w2v_vectors.append(torch.FloatTensor(w2v[token]))
            # oov -> randomly initialized uniform distribution
            else:
                w2v_vectors.append(
                    torch.distributions.Uniform(-0.25, +0.25).sample(
                        (EMBEDDING_DIM, )))

        TEXT.vocab.set_vectors(TEXT.vocab.stoi, w2v_vectors, EMBEDDING_DIM)
        pretrained_embeddings = torch.FloatTensor(TEXT.vocab.vectors)

        # make iterators
        train_iterator, test_iterator = BucketIterator.splits(
            (train_data, test_data),
            batch_size=BATCH_SIZE,
            device=device,
            sort=False,
            shuffle=True)

        # define a model
        INPUT_DIM = len(TEXT.vocab)

        model = CNN1d(pretrained_embeddings, INPUT_DIM, EMBEDDING_DIM,
                      N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT)
        optimizer = optim.Adadelta(model.parameters(), rho=0.95)
        criterion = nn.BCEWithLogitsLoss()

        model = model.to(device)
        criterion = criterion.to(device)

        # train
        best_test_acc = -float('inf')
        model_name = './model/model' + str(kfold) + '.pt'
        print('kfold', kfold)
        for epoch in range(N_EPOCHS):

            start_time = time.time()

            train_loss, train_acc = train(model, train_iterator, optimizer,
                                          criterion)
            test_loss, test_acc = evaluate(model, test_iterator, criterion)

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if test_acc > best_test_acc:
                best_test_acc = test_acc
                torch.save(model.state_dict(), model_name)

            # print(f'\tEpoch: {epoch + 1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
            # print(f'\t\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc * 100:.2f}%')
            # print(f'\t\tTest. Loss: {test_loss:.3f} |  Val. Acc: {test_acc * 100:.2f}%')
            logging.info(
                f'\tEpoch: {epoch + 1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s'
            )
            logging.info(
                f'\t\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc * 100:.2f}%'
            )
            logging.info(
                f'\t\tTest. Loss: {test_loss:.3f} |  Val. Acc: {test_acc * 100:.2f}%'
            )

        model.load_state_dict(torch.load(model_name))

        test_loss, test_acc = evaluate(model, test_iterator, criterion)
        test_acc_lists.append(test_acc)
        logging.info(f'============== last test accuracy: {test_acc}')
        # print(f'============== last test accuracy: {test_acc}')
        print()
    return test_acc_lists
Ejemplo n.º 13
0
import tensorflow as tf
import pandas as pd
import os
from model import model, checkpoint_path
from DownData import x_test, y_test, selected_cols, selected_df_data, prepare_data

evaluate_result = model.evaluate(x=x_test, y=y_test)
print(evaluate_result)

# checkpoint_dir=os.path.dirname(checkpoint_path)
# latest = tf.train.latest_checkpoint(checkpoint_dir)
# model.load_weights(latest)

Jake_info = [0, 'Jake', 3, 'male', 23, 1, 0, 5.0000, 'S']
Rose_info = [1, 'Rose', 1, 'female', 20, 1, 0, 100.0000, 'S']
new_passenger_pd = pd.DataFrame([Jake_info, Rose_info], columns=selected_cols)
all_passenger_pd = selected_df_data.append(new_passenger_pd)
x_features, y_label = prepare_data(all_passenger_pd)
surv_probability = model.predict(x_features)
all_passenger_pd.insert(len(all_passenger_pd.columns), 'surv_probability',
                        surv_probability)
print(all_passenger_pd[-5:])
Ejemplo n.º 14
0
from model import model
from logic import prepare_files
import numpy as np
from sklearn.model_selection import train_test_split

train_x, train_y = prepare_files('cifar-10-batches-py/data_batch_{}')
test_x, test_y = prepare_files('cifar-10-batches-py/test_batch')
print('\nData obtained')

x = np.concatenate((train_x, test_x))
y = np.concatenate((train_y, test_y))
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=.2)

model = model()
model.fit(train_x, train_y, batch_size=1000)
# model.load('cifar-10.model')
model.save('cifar-10.model')
accuracy = model.evaluate(test_x, test_y, batch_size=1000)
print('Accuracy: ', accuracy)