Ejemplo n.º 1
0
def train_model(train_test_path):
    """
    Creates a model and performs training.
    """
    # Load train/test data
    train_test_data = np.load(train_test_path)
    x_train = train_test_data['X_train']
    y_train = train_test_data['y_train']

    print("x_train:", x_train.shape)
    print("y_train:", y_train.shape)

    del train_test_data

    x_train = np.expand_dims(x_train, axis=3)

    # Create network
    model = Sequential()
    model.add(Conv1D(128, 5, input_shape=x_train.shape[1:], padding='same', activation='relu'))
    model.add(MaxPooling1D(5))
    model.add(Conv1D(128, 5, padding='same', activation='relu'))
    model.add(MaxPooling1D(5))
    model.add(Dropout(0.5))

    model.add(Flatten())

    model.add(Dense(1024, kernel_initializer='glorot_uniform', activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(512, kernel_initializer='glorot_uniform', activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(256, kernel_initializer='glorot_uniform', activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128, kernel_initializer='glorot_uniform', activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(len(language_codes), kernel_initializer='glorot_uniform', activation='softmax'))

    model_optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    model.compile(loss='categorical_crossentropy', optimizer=model_optimizer, metrics=['accuracy'])

    # Train
    model.fit(x_train, y_train,
              epochs=10,
              validation_split=0.10,
              batch_size=64,
              verbose=2,
              shuffle=True)

    model.save(model_path)
Ejemplo n.º 2
0
# convert data to float and scale values between 0 and 1
train_data = train_data.astype('float')
test_data = test_data.astype('float')
# scale data
train_data /= 255.0
test_data /= 255.0
# change the labels frominteger to one-hot encoding
train_labels_one_hot = to_categorical(train_labels)
test_labels_one_hot = to_categorical(test_labels)

# creating network
model = Sequential()
model.add(Dense(500, activation='relu', input_shape=(dimData,)))
model.add(Dense(446, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_data, train_labels_one_hot, batch_size=256, epochs=20, verbose=1,
                    validation_data=(test_data, test_labels_one_hot))

[test_loss, test_acc] = model.evaluate(test_data, test_labels_one_hot)
print("Evaluation result on Test Data : Loss = {}, accuracy = {}".format(test_loss, test_acc))

# DL_ICP2 - question2
predict_test = model.predict_classes(test_data[[30], :])
print("The prediction of the 30th in the test dataset is: ", predict_test)

plt.imshow(test_images[30, :, :], cmap='gray')
plt.title('Ground Truth : {}'.format(test_labels[30]))
plt.show()
Ejemplo n.º 3
0
# print(series.shape)

# choose a number of time steps
n_steps = 3
X, y = split_sequence(raw_seq, n_steps)
print(X.shape, y.shape)

# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
# define model
model = Sequential()
model.add(
    Conv1D(filters=64,
           kernel_size=2,
           activation='relu',
           input_shape=(n_steps, n_features)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=1000)
# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input)
print(yhat)
Ejemplo n.º 4
0
    #Construct the model
    model = Sequential()
    model.add(Dense(64))
    model.add(Activation('softmax'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    #I set the learning rate extremely low because early on the model couldn't learn anything other than the baseline
    opt = Adam(lr=0.00001)

    #Compile the model
    model.compile(
        loss='binary_crossentropy',
        optimizer=opt,
        #optimizer='adam',
        metrics=['accuracy'])

    #Fit the model to the training and validation data, using early stopping to prevent overfitting
    model.fit(X_train,
              y_train,
              callbacks=[EarlyStopping(patience=5)],
              epochs=100,
              validation_data=(X_test, y_test))

    #Save the model to a file using the fold number and the value set at the start to create the filename
    name = 'fold' + str(i) + 'attempt' + str(attempt) + '.h5'
    i += 1
    model.save(name)
Ejemplo n.º 5
0
#ST2.add( myDense_1(n_nodes) )
#ST2.add( Activation('relu') )

ST2.add(myDense_1(n_class))
ST2.add(Activation('softmax'))

#ST2.add( Dense(n_class, activation='softmax') )
ST2.compile(optimizer=Optimizer,
            loss=loss_func,
            metrics=Metric,
            weighted_metrics=['accuracy'])
ST2.fit(x=X_train,
        y=Y_train,
        sample_weight=W_train,
        epochs=n_epochs,
        verbose=vb,
        shuffle=True,
        batch_size=batch_s)
ST2.summary()
## Test custom layer###########################

STs = [ST2]

for STi in STs:
    ## Show learned weights:
    ii = 1
    for layer in STi.layers:
        g = layer.get_config()
        h = layer.get_weights()
        if ii == 4:
clf.add(Dense(20, input_dim=11, activation='relu'))
# after the first layer, you don't need to specify
# the size of the input anymore
clf.add(Dense(16, activation='relu'))
clf.add(Dense(1, activation='sigmoid'))
# softmax is used for multiclass classification
# an instance of optimizer of class can be passed instead of string [it supports hyper pramaeter tuning]
# passing string version doesn't support changing parameters
clf.compile(optimizer=keras.optimizers.Nadam(),
            metrics=['accuracy'],
            loss='binary_crossentropy')

# Training the model
history = clf.fit(x=X_train,
                  y=y_train,
                  batch_size=850,
                  validation_split=.2,
                  epochs=1600)

score_history = history.history
y_pred_vanilla = clf.predict_classes(X_test)

plt.plot(range(1, 1601), score_history['loss'], label='Train Loss')
plt.plot(range(1, 1601), score_history['val_loss'], label='Validation Loss')
plt.legend()
plt.xlabel('No of Epochs')
plt.ylabel('Loss')
plt.show()
plt.plot(range(1, 1601), score_history['accuracy'], label='Train Accuracy')
plt.plot(range(1, 1601),
         score_history['val_accuracy'],
Ejemplo n.º 7
0
cls = Sequential()

cls.add(
    Dense(2,
          input_dim=2,
          activation='relu',
          kernel_initializer='random_uniform'))
cls.add(Dense(30, activation='relu', kernel_initializer='random_uniform'))
cls.add(Dense(3, activation='softmax', kernel_initializer='random_uniform'))

opt = Adam(lr=INIT_LR)

cls.compile(loss="categorical_crossentropy",
            optimizer=opt,
            metrics=["accuracy"])

history = cls.fit(newX,
                  y,
                  epochs=EPOCHS,
                  steps_per_epoch=10,
                  validation_split=0.2,
                  validation_steps=50)

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('gmm rca model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['test', 'train'], loc='upper left')
plt.show()
Ejemplo n.º 8
0
    def fitting(self):

        timesteps = self.lags  # tiempo
        features = 1  # features or chanels (Volume)
        num_classes = 3  # 3 for categorical

        #data = np.random.random((1000, dim_row, dim_col))
        #clas = np.random.randint(3, size=(1000, 1))
        ##print(clas)
        #clas = to_categorical(clas)
        ##print(clas)
        data = self.X_train
        data_test = self.X_test
        print(data)

        data = data.values.reshape(len(data), timesteps, 1)
        data_test = data_test.values.reshape(len(data_test), timesteps, 1)
        print(data)

        clas = self.y_train
        clas_test = self.y_test
        clas = to_categorical(clas)
        clas_test = to_categorical(clas_test)

        cat0 = self.y_train.tolist().count(0)
        cat1 = self.y_train.tolist().count(1)
        cat2 = self.y_train.tolist().count(2)

        print("may: ", cat1, "  ", "menor: ", cat2, " ", "neutro: ", cat0)

        n_samples_0 = cat0
        n_samples_1 = (cat1 + cat2) / 2.0
        n_samples_2 = (cat1 + cat2) / 2.0

        class_weight = {
            0: 1.0,
            1: n_samples_0 / n_samples_1,
            2: n_samples_0 / n_samples_2
        }

        def class_1_accuracy(y_true, y_pred):
            # cojido de: http://www.deepideas.net/unbalanced-classes-machine-learning/
            class_id_true = K.argmax(y_true, axis=-1)
            class_id_preds = K.argmax(y_pred, axis=-1)

            accuracy_mask = K.cast(K.equal(class_id_preds, 1), 'int32')
            class_acc_tensor = K.cast(K.equal(class_id_true, class_id_preds),
                                      'int32') * accuracy_mask

            class_acc = K.sum(class_acc_tensor) / K.maximum(
                K.sum(accuracy_mask), 1)
            return class_acc

        class SecondOpinion(Callback):
            def __init__(self, model, x_test, y_test, N):
                self.model = model
                self.x_test = x_test
                self.y_test = y_test
                self.N = N
                self.epoch = 1

            def on_epoch_end(self, epoch, logs={}):
                if self.epoch % self.N == 0:
                    y_pred = self.model.predict(self.x_test)
                    pred_T = 0
                    pred_F = 0
                    for i in range(len(y_pred)):
                        if np.argmax(y_pred[i]) == 1 and np.argmax(
                                self.y_test[i]) == 1:
                            pred_T += 1
                        if np.argmax(y_pred[i]) == 1 and np.argmax(
                                self.y_test[i]) != 1:
                            pred_F += 1
                    if pred_T + pred_F > 0:
                        Pr_pos = pred_T / (pred_T + pred_F)
                        print("Yoe: epoch, Probabilidad pos: ", self.epoch,
                              Pr_pos)
                    else:
                        print("Yoe Probabilidad pos: 0")
                self.epoch += 1

#################################################################################################################

        model = Sequential()
        if self.nConv == 0:
            model.add(
                LSTM(units=self.lstm_nodes,
                     return_sequences=True,
                     activation='tanh',
                     input_shape=(timesteps, features)))
        for i in range(self.nLSTM - 2):
            model.add(
                LSTM(units=self.lstm_nodes,
                     return_sequences=True,
                     activation='tanh'))
        model.add(LSTM(units=self.lstm_nodes, activation='tanh'))
        model.add(Dropout(0.5))
        model.add(
            Dense(num_classes, activation='softmax')
        )  # the dimension of index one will be considered to be the temporal dimension
        #model.add(Activation('sigmoid'))  # for loss = 'binary_crossentropy'

        # haciendo x: x[:, -1, :], la segunda dimension desaparece quedando solo
        # los ULTIMOS elementos (-1) de dicha dimension:
        # Try this to see:
        # data = np.random.random((5, 3, 4))
        # print(data)
        # print(data[:, -1, :])

        #        model.add(Lambda(lambda x: x[:, -1, :], output_shape = [output_dim]))
        print(model.summary())

        tensorboard_active = False
        val_loss = False
        second_opinion = True
        callbacks = []
        if tensorboard_active:
            callbacks.append(
                TensorBoard(log_dir=self.putmodel + "Tensor_board_data",
                            histogram_freq=0,
                            write_graph=True,
                            write_images=True))
        if val_loss:
            callbacks.append(EarlyStopping(monitor='val_loss', patience=5))
        if second_opinion:
            callbacks.append(SecondOpinion(model, data_test, clas_test, 10))
        #model.compile(loss = 'categorical_crossentropy', optimizer='Adam', metrics = ['categorical_accuracy'])
        #model.compile(loss = 'binary_crossentropy', optimizer=Adam(lr=self.learning), metrics = ['categorical_accuracy'])
        model.compile(loss='categorical_crossentropy',
                      optimizer='Adam',
                      metrics=[class_1_accuracy])

        model.fit(x=data,
                  y=clas,
                  batch_size=self.batch_size,
                  epochs=800,
                  verbose=2,
                  callbacks=callbacks,
                  class_weight=class_weight)
        #validation_data=(data_test, clas_test))

        #####################################################################################################################

        # serialize model to YAML
        model_yaml = model.to_yaml()
        with open("model.yaml", "w") as yaml_file:
            yaml_file.write(model_yaml)
        # serialize weights to HDF5
        model.save_weights("model.h5")
        print("Saved model to disk")

        #        # load YAML and create model
        #        yaml_file = open('model.yaml', 'r')
        #        loaded_model_yaml = yaml_file.read()
        #        yaml_file.close()
        #        loaded_model = model_from_yaml(loaded_model_yaml)
        #        # load weights into new model
        #        loaded_model.load_weights("model.h5")
        #        print("Loaded model from disk")
        #        loaded_model.compile(loss = 'categorical_crossentropy', optimizer='Adam', metrics = [class_1_accuracy])
        #
        print("Computing prediction ...")
        y_pred = model.predict_proba(data_test)

        model.reset_states()
        print("Computing train evaluation ...")
        score_train = model.evaluate(data, clas, verbose=2)
        print('Train loss:', score_train[0])
        print('Train accuracy:', score_train[1])

        model.reset_states()
        #        score_train_loaded = loaded_model.evaluate(data, clas, verbose=2)
        #        loaded_model.reset_states()
        #        print('Train loss loaded:', score_train[0])
        #        print('Train accuracy loaded:', score_train[1])

        print("Computing test evaluation ...")
        score_test = model.evaluate(data_test, clas_test, verbose=2)
        print('Test loss:', score_test[0])
        print('Test accuracy:', score_test[1])

        model.reset_states()
        #        score_test_loaded = loaded_model.evaluate(data_test, clas_test, verbose=2)
        #        loaded_model.reset_states()
        #        print('Test loss loaded:', score_test[0])
        #        print('Test accuracy loaded:', score_test[1])

        pred_T = 0
        pred_F = 0
        for i in range(len(y_pred)):
            if np.argmax(y_pred[i]) == 1 and np.argmax(clas_test[i]) == 1:
                pred_T += 1
#                print(y_pred[i])
            if np.argmax(y_pred[i]) == 1 and np.argmax(clas_test[i]) != 1:
                pred_F += 1
        if pred_T + pred_F > 0:
            Pr_pos = pred_T / (pred_T + pred_F)
            print("Yoe Probabilidad pos: ", Pr_pos)
        else:
            print("Yoe Probabilidad pos: 0")

        history = DataFrame([[
            self.skip, self.nConv, self.nLSTM, self.learning, self.batch_size,
            self.conv_nodes, self.lstm_nodes, score_train[0], score_train[1],
            score_test[0], score_test[1]
        ]],
                            columns=('Skip', 'cConv', 'nLSTM', 'learning',
                                     'batch_size', 'conv_nodes', 'lstm_nodes',
                                     'loss_train', 'acc_train', 'loss_test',
                                     'acc_test'))
        self.history = self.history.append(history)
Ejemplo n.º 9
0
model = Sequential()
# input_shape = (time_step, 每个时间步的input_dim)
# LSTM的第一个参数5表示LSTM的单元数为5,我们可以把LSTM理解为一个特殊的且带有时序信息的全连接层。
# model.add(LSTM(10, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(
    LSTM(4,
         input_shape=(train_X.shape[1], train_X.shape[2]),
         return_sequences=True))
model.add(LSTM(4, return_sequences=False))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')
# fit network
history = model.fit(train_X,
                    train_y,
                    nb_epoch=20,
                    batch_size=1,
                    validation_data=(test_X, test_y),
                    verbose=2,
                    shuffle=False)
model.save("currentlstm.h5")
#plot history
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()

plt.figure(figsize=(16, 8))
train_predict = model.predict(train_X)
test_predict = model.predict(test_X)
plt.plot(scaled[1:, 0], c='b')
plt.plot([x for x in train_predict], c='g')
Ejemplo n.º 10
0
lstm_out = 200
#@
model_lstm = Sequential()
model_lstm.add(Embedding(max_features, embed_dim, input_length=X1.shape[1]))
model_lstm.add(SpatialDropout1D(0.2))
model_lstm.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model_lstm.add(Dense(15, activation='softmax'))
model_lstm.compile(loss='categorical_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])
print(model_lstm.summary())

batch_size = 32
model_lstm.fit(X1_train,
               Y1_train,
               epochs=2,
               batch_size=batch_size,
               verbose=2,
               validation_data=(X1_test, Y1_test))

y_pred = model_lstm.predict(X1_test)
loss, acc = model_lstm.evaluate(X1_test,
                                Y1_test,
                                verbose=1,
                                batch_size=batch_size)

#Η Εκτίμηση της loss function ή αλλιώς η συνάρτηση κόστους(όσο πιο μικρό είναι το score , τόσο χαμηλότερο είναι τολάθος πρόβλεψης)
#
print("Test val_loss: %.2f" % (loss))

#Το ποσοστό των προβλέψεων για τις κατηγορίες από το σύνολο ελέγχου
print("Test val_accuracy: %.2f" % (acc))
Ejemplo n.º 11
0
from keras.utils import to_categorical

(features, labels), (test_features, test_labels) = mnist.load_data()

num_pixel = features.shape[1] * features.shape[2]
features = features.reshape(features.shape[0], num_pixel).astype('float32')

test_features = test_features.reshape(test_features.shape[0],
                                      num_pixel).astype('float32')

labels = to_categorical(labels)
test_labels = to_categorical(test_labels)
num_classes = test_labels.shape[1]

features = features / 255
test_features = test_features / 255

model = Sequential()

model.add(Dense(num_pixel, input_dim=num_pixel))
model.add(Dense(25, activation="relu"))
model.add(Dense(num_classes, activation="softmax"))

model.compile(optimizer="Adam", loss="categorical_crossentropy")
model.fit(features, labels, epochs=3)
'''model = SVC()
model.fit(features,labels)'''

print(model.evaluate(test_features, test_labels, batch_size=128))

joblib.dump(model, "keras_mdoel", compress=3)
Ejemplo n.º 12
0
    cls.add(
        Dense(7,
              input_dim=7,
              activation='relu',
              kernel_initializer='random_uniform'))
    cls.add(Dense(30, activation='relu', kernel_initializer='random_uniform'))
    cls.add(Dense(3, activation='softmax',
                  kernel_initializer='random_uniform'))

    opt = Adam(lr=INIT_LR)

    cls.compile(loss="categorical_crossentropy",
                optimizer=opt,
                metrics=["accuracy"])

    cls.fit(X_train, y_train, epochs=EPOCHS, steps_per_epoch=10, verbose=0)

    inferenceTest = cls.predict(X_test)
    gt = y_test

    correct = 0

    for t in range(len(gt)):
        if (get_highest(inferenceTest[t]) == gt[t]).all():
            correct += 1

    print correct
    score = correct / (len(gt) * 1.0)

    accTest = score
    if accTest > maxTstAcc:
Ejemplo n.º 13
0
def create_model(_rows, data_dict, max_flow):
    def rmse(y_true, y_pred, axis=0):
        return np.sqrt(((y_pred - y_true)**2).mean(axis=axis))

    def create_dataset_from_dict(ddata, lookback=1, steps=1):
        dataX = []
        dataY = []
        for dt, data in ddata.items():
            timestep = []
            yval = ddata.get(dt + timedelta(minutes=5 * steps))
            # check the future value exists and is not an error
            if yval is not None:
                for j in range(lookback):
                    offset = dt - timedelta(minutes=5 * steps * j)
                    # make sure we have all previous values in the lookback
                    if ddata.get(offset) is not None:
                        timestep.append(ddata[offset])
                if len(timestep) == lookback:
                    dataX.append(timestep)
                    dataY.append(yval[0])
        fields = len(dataX[0][0])
        return np.array(dataX,
                        dtype=np.double), np.array(dataY,
                                                   dtype=np.double), fields

    def fit_to_batch(arr, b_size):
        lim = len(arr) - (len(arr) % b_size)
        return arr[:lim]

    class TerminateOnNaN(Callback):
        """Callback that terminates training when a NaN loss is encountered.
        """
        def __init__(self):
            super(TerminateOnNaN, self).__init__()
            self.terminated = False

        def on_batch_end(self, batch, logs=None):
            logs = logs or {}
            loss = logs.get('loss')
            if loss is not None:
                if np.isnan(loss) or np.isinf(loss):
                    print('Batch %d: Invalid loss, terminating training' %
                          (batch))
                    self.model.stop_training = True
                    self.terminated = True

    # input fields are:
    """
    Input is: 
    [
        flow
        dayOfWeek
        MinuteOfDay
        month
        week
        isWeekend
    ]
    for `lookback` records
    """
    lookback = int({{quniform(1, 40, 1)}})
    scaler = MinMaxScaler((0, 1))
    # rows = scaler.fit_transform(_rows)
    # dataX, dataY, fields = create_dataset(rows, lookback)
    scaled = scaler.fit_transform(list(data_dict.values()))
    scaled_data_dict = dict(zip(data_dict.keys(), scaled))
    dataX, dataY, fields = create_dataset_from_dict(scaled_data_dict, lookback)

    test_train_split = 0.60  ## 60% training 40% test
    split_idx = int(len(dataX) * test_train_split)
    train_x = dataX[:split_idx]
    train_y = dataY[:split_idx]
    test_x = dataX[split_idx:]
    test_y = dataY[split_idx:]
    batch_size = int({{quniform(1, 5, 1)}})

    train_x = fit_to_batch(train_x, batch_size)
    train_y = fit_to_batch(train_y, batch_size)
    test_x = fit_to_batch(test_x, batch_size)
    test_y = fit_to_batch(test_y, batch_size)

    nb_epoch = 1
    lstm_size_1 = {{quniform(96, 300, 4)}}
    lstm_size_2 = {{quniform(96, 300, 4)}}
    lstm_size_3 = {{quniform(69, 300, 4)}}
    optimizer = {{choice(['adam', 'rmsprop'])}
                 }  #  'nadam', 'adamax', 'adadelta', 'adagrad'])}}
    l1_dropout = {{uniform(0.001, 0.7)}}
    l2_dropout = {{uniform(0.001, 0.7)}}
    l3_dropout = {{uniform(0.001, 0.7)}}
    output_activation = {{choice(['relu', 'tanh', 'linear'])}}
    # reset_interval = int({{quniform(1, 100, 1)}})
    # layer_count = {{choice([1, 2, 3])}}
    l1_reg = {{uniform(0.0001, 0.1)}}
    l2_reg = {{uniform(0.0001, 0.1)}}
    params = {
        'batch_size': batch_size,
        'lookback': lookback,
        'lstm_size_1': lstm_size_1,
        'lstm_size_2': lstm_size_2,
        'lstm_size_3': lstm_size_3,
        'l1_dropout': l1_dropout,
        'l2_dropout': l2_dropout,
        'l3_dropout': l3_dropout,
        'l1_reg': l1_reg,
        'l2_reg': l2_reg,
        'optimizer': optimizer,
        'output_activation': output_activation,
        # 'state_reset': reset_interval,
        # 'layer_count': layer_count,
        # 'use_embedding': use_embedding
    }
    print("PARAMS=", json.dumps(params, indent=4))

    def krmse(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

    def geh(y_true, y_pred):
        return K.sqrt(2 * K.pow(y_pred - y_true, 2) /
                      (y_pred + y_true)).mean(axis=-1)

    reg = L1L2(l1_reg, l2_reg)
    start = datetime.now()
    model = Sequential()
    # if conditional(use_embedding):
    #     model.add(Embedding())
    model.add(
        LSTM(int(lstm_size_1),
             batch_input_shape=(batch_size, lookback, fields),
             return_sequences=True,
             stateful=True,
             activity_regularizer=reg,
             bias_initializer='ones'))
    model.add(Dropout(l1_dropout))
    model.add(Activation('relu'))
    model.add(
        LSTM(int(lstm_size_2),
             return_sequences=True,
             bias_initializer='ones',
             stateful=True,
             activity_regularizer=reg))
    model.add(Dropout(l2_dropout))
    model.add(Activation('relu'))
    model.add(
        LSTM(int(lstm_size_3),
             bias_initializer='ones',
             stateful=True,
             activity_regularizer=reg))
    model.add(Dropout(l3_dropout))
    model.add(Activation('relu'))
    model.add(Dense(1, activation='relu'))

    terminate_cb = TerminateOnNaN()
    model.compile(loss='mse', optimizer=optimizer)
    try:
        model.fit(
            train_x,
            train_y,
            epochs=1,
            verbose=1,
            batch_size=batch_size,
            shuffle=False,
            callbacks=[terminate_cb],
        )
    except Exception as e:
        print(e)
        return {'status': STATUS_FAIL, 'msg': e}
    if terminate_cb.terminated:
        return {'status': STATUS_FAIL, 'msg': "Invalid loss"}
    # have it continue learning during this phase
    # split the test_x,test_y
    preds = []

    def group(iterable, n):
        it = iter(iterable)
        while True:
            chunk = tuple(itertools.islice(it, n))
            if not chunk:
                return
            yield chunk

    test_y_it = iter(group(test_y, batch_size))
    test_batch_idx = 0
    prog = tqdm(range(len(test_y / batch_size)), desc='Train ')
    for batch in group(test_x, batch_size):

        batch = np.array(batch)
        test_y_batch = np.array(next(test_y_it))
        model.train_on_batch(batch, test_y_batch)
        batch_preds = model.predict_on_batch(batch)[:, 0]
        preds.extend(batch_preds)
        test_batch_idx += 1
        prog.update()
        # if test_batch_idx % reset_interval == 0:
        #     model.reset_states()
    preds = np.array(preds)
    finish = datetime.now()
    preds_pad = np.zeros((preds.shape[0], fields))
    preds_pad[:, 0] = preds.flatten()
    test_y_pad = np.zeros((preds.shape[0], fields))
    test_y_pad[:, 0] = test_y.flatten()
    unscaled_pred = scaler.inverse_transform(preds_pad)
    unscaled_test_y = scaler.inverse_transform(test_y_pad)
    rmse_result = rmse(unscaled_pred, unscaled_test_y)[0]

    plot_x = np.arange(test_x.shape[0])
    dpi = 80
    width = 1920 / dpi
    height = 1080 / dpi
    plt.figure(figsize=(width, height), dpi=dpi)
    plt.plot(plot_x, unscaled_test_y[:, 0], color='b', label='Actual')
    plt.plot(plot_x, unscaled_pred[:, 0], color='r', label='Predictions')
    plt.legend()

    plt.title("LSTM Discrete Predictions at 115, SI 2\nRMSE:{}".format(
        round(rmse_result, 3)))
    plt.xlabel('Time')
    plt.ylabel('Flow')
    fig_name = 'model_{}.png'.format(time())
    plt.savefig(fig_name)
    plt.show()
    with open(fig_name, 'rb') as img_file:
        fig_b64 = base64.b64encode(img_file.read()).decode('ascii')

    return {
        'loss': rmse_result,
        'status': STATUS_OK,
        'model': model._updated_config(),
        'metrics': {
            'rmse': rmse_result,
            # 'geh': geh(unscaled_pred, unscaled_test_y)[0],
            'duration': (finish - start).total_seconds()
        },
        'figure': fig_b64,
        'params': params
    }
Ejemplo n.º 14
0
    for k in range(f1, mid):
        f2m[i - 1, k] = (k - floor1[i - 1]) / (floor1[i] - floor1[i - 1])
    for k in range(mid, f2):
        f2m[i - 1, k] = (floor1[i + 1] - k) / (floor1[i + 1] - floor1[i])
print(np.shape(f2m))
print(f2m)

datatotrain = np.dot(np.abs(data_rfft), f2m.T)
print(np.shape(datatotrain))
print(np.count_nonzero((datatotrain)))
print(np.amax((datatotrain)))
xtrain = datatotrain / np.amax(datatotrain)
print(np.amin(datatotrain))
y_train = np.asarray(labels_list)

from keras.layers import Input, Dense
from keras.models import Model
from keras import optimizers
from keras.callbacks import TensorBoard
from time import time
from keras import Sequential
from keras.utils import np_utils
labels_train = np_utils.to_categorical(y_train, 46)
model = Sequential()
model.add(Dense(128, input_dim=40, activation='relu'))
model.add(Dense(46, activation='relu'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.fit(xtrain, labels_train, epochs=150, batch_size=10, verbose=1)
train_data=train_data.drop(['REPORT_ID',"ID_CARD",'LOAN_DATE'],1)
train_data=train_data.dropna()
# print(train_data.info())
X=train_data.drop(['Y'],1).as_matrix()#7
y=train_data['Y'].as_matrix()#1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

model=Sequential()
model.add(Dense(14,input_shape=(7,)))
model.add(Activation('relu'))
model.add(Dense(1))
model.add((Dropout(0.3)))
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()

model.fit(X_train,y_train,epochs=10000,batch_size=16)
t=model.predict(X_test)

rate=0

for i in range(len(t)):
    if t[i]==y_test[i]:
        rate+=1
    else:
        pass
rate=1.0*rate/len(t)

print(rate)


# test_data=pd.read_csv('D:\sufe\A\contest_basic_test.tsv',sep='\t')
Ejemplo n.º 16
0
class NeuralNetwork(object):
    def __init__(self, input_nodes, hidden_nodes, output_nodes, lr=None):
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes
        self.lr = lr
        self.scales_x = []
        self.scales_y = []

        input_kernel_range = np.sqrt(6) / (np.sqrt(input_nodes) + np.sqrt(hidden_nodes))
        input_kernel_initializer = RandomUniform(minval=-input_kernel_range, maxval=input_kernel_range)
        input_layer = Dense(input_nodes,
                            kernel_initializer=input_kernel_initializer,
                            name='input')

        hidden_kernel_range = np.sqrt(6) / (np.sqrt(hidden_nodes) + np.sqrt(output_nodes))
        hidden_kernel_initializer = RandomUniform(minval=-hidden_kernel_range, maxval=hidden_kernel_range)
        hidden_layer = Dense(hidden_nodes,
                             kernel_initializer=hidden_kernel_initializer,
                             name='hidden')

        output_layer = Dense(output_nodes,
                             name='output')

        self.model = Sequential()
        self.model.add(input_layer)
        self.model.add(hidden_layer)
        self.model.add(output_layer)

    def train(self, x_train, y_train):
        self.set_normalize_scales(x_train, y_train)
        x_train = self.normalize(x_train, self.scales_x)
        y_train = self.normalize(y_train, self.scales_y)

        optimizer = SGD(lr=self.lr)
        self.model.compile(loss='mse', optimizer=optimizer)
        self.model.fit(x_train, y_train, batch_size=20, epochs=500)

    def evaluate(self, x_test, y_test):
        x_test = self.normalize(x_test, self.scales_x)
        y_test = self.normalize(y_test, self.scales_y)
        return self.model.evaluate(x_test, y_test)

    def predict(self, x):
        x = self.normalize(x, self.scales_x)
        y = self.model.predict(x)
        return self.unnormalize(y, self.scales_y)

    def set_normalize_scales(self, x, y):
        for i in range(x.shape[1]):
            mean, std = x[:, i].mean(), x[:, i].std()
            self.scales_x.append([mean, std])
        for i in range(y.shape[1]):
            mean, std = y[:, i].mean(), y[:, i].std()
            self.scales_y.append([mean, std])

    @staticmethod
    def normalize(data, scales):
        for i in range(0, len(scales)):
            mean, std = scales[i]
            data[:, i] = (data[:, i] - mean) / std
        return data

    @staticmethod
    def unnormalize(data, scales):
        for i in range(0, len(scales)):
            mean, std = scales[i]
            data[:, i] = data[:, i] * std + mean
        return data
Ejemplo n.º 17
0
from keras.datasets import imdb
from keras.layers import Flatten, Dense, Embedding, SimpleRNN
from keras.preprocessing import sequence
from keras import Sequential

max_features = 1000
max_len = 20

(xtrain, ytrain), (xtest, ytest) = imdb.load_data(num_words=max_features)
xtrain = sequence.pad_sequences(xtrain, maxlen=max_len)
xtest = sequence.pad_sequences(xtest, maxlen=max_len)

model = Sequential([
    Embedding(10000, 8, input_length=max_len),
    Flatten(),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.summary()
history = model.fit(xtrain,
                    ytrain,
                    epochs=10,
                    batch_size=32,
                    validation_split=0.2)
Ejemplo n.º 18
0
        model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])

        label = 'seq_len:{}, units:{}'.format(seq_lenth, units)

        with open(file, 'a') as outfile:
            outfile.write('+++++++++++++++++++++++++++++++++++++++\n')
            outfile.write(label)
            outfile.write('\n+++++++++++++++++++++++++++++++++++++++\n')

        loss = []
        #Iterate through each training duration
        for ind, epochs in enumerate(epochs_list):
            #Fit the model for the current number of epochs
            history = model.fit(X, y, epochs=epochs, batch_size=batch_size)
            loss.extend(history.history['loss'])
            # print(history.history['loss'])

            #Extract the weights from the training model to set the weights for the
            #character generation model
            weights = model.get_weights()
            trained_model.set_weights(weights)

            #Select a sample from X to be the seed
            seed = np.array(X[100])

            #Generate the results and output to file
            with open(file, 'a') as outfile:
                #Write the header line with number of epochs, seed, etc
                outfile.write(
Ejemplo n.º 19
0
class SemiSupLabeler():
    """
    @_init_: initialises the model
    - data_lab:     labelled data
    - data_unlab:   unlabelled data
    - data_submit:  the submit version of the data
    """
    def __init__(self, data_lab, data_unlab, data_submit):
        ###########################Default parameters#####################
        #NB:if some mandatory parameters are lacking in the json, default values will be taken
        #list of all potential parameters
        """
      @params_nn: parameters of neural network 
        - loss :             loss used for the NN, cf the dictionnary above
        - optimizer:         Adam, SGD, etc
        - learning rate:     speaks for itself
        - metrics            accuracy, we wont change it normally
        - decay:             decay of the learning rate, generally of the order 1e-5
        - momentum:          momentum of the lr
        - patience:          number of epochs you wait if you use earlystopmode for the validation accuracy to increase again
        - layers:            shape of the network
      """
        self.params_nn = [
            'loss', 'optimizer', 'learning rate', 'metrics', 'decay',
            'momentum', 'batch_size', 'number of epochs', 'layers', 'patience'
        ]
        """
      @params_ss: parameters of label spreading
        - manyfit:           since the ss accuracy has some variance but doesnt take much to be computed, manyfit designs 
                             how many independant times we run it before averaging it in order to obtain a 
                             better estimation of the accuracy in question
        - ss_model:          'LabSpr' or 'LabProp'. So far, only LabSpr has converged
        - ss_kernel:         'knn' or 'rbf. So far only knn converges. ***WATCH OUT***: when using rbf, 
                             euler will complain that you use too much memory!!
        - gamma              parameter for the rbf
        - neighbor           parameter for knn
        - alpha              parameter for knn and rbf: tells at which point you will take the 
                             information of your neighbors into account
      """
        self.params_ss = [
            'UsingSS', 'manyfit', 'ss_model', 'ss_kernel', 'gamma', 'neighbor',
            'alpha'
        ]
        """
      @param_list: list of all parameters
        - Ratio:              ratio represented by the training set
        - pca:                number of principal components to use. if not present, no pca will be done
        - UsingNN:            if set to false, the NN is not used.
        - data_state:         'save' or 'load'. If you want to train the NN only without having to run the 
                              ss algo again, do one run with data_state to true, 
                              and use data_state= 'load for the next ones.
        - scaler:             'normal' or 'standard' describes the preprocessing before applying the pca
        - paramsout:          designates which parameters will be present in the output name 
                              ==> put the one you're playing with in order to easily see the difference
      """
        self.param_list = [
            'Ratio', 'pca', 'UsingNN', 'paramsout', 'data_state', 'scaler'
        ] + self.params_nn + self.params_ss

        self.param_out = ['Ratio', 'pca', 'optimizer', 'layers']

        self.data_lab = data_lab

        self.data_unlab = data_unlab

        self.data_submit = data_submit

        #--------------------- DATA IF NO JSON PROVIDED --------------------------

        #Training:
        self.RATIO = 0.9
        self.INPUT_DIM = 139

        #PCA:
        self.scaler = 'Standard'
        self.PCA_MODE = True
        self.pca = 50

        #Early stopping:
        self.EARLY_STOP_MODE = False
        self.patience = 50

        #NEURAL NETWORK:
        self.USING_NN = True

        self.USING_SS = False

        assert (self.USING_NN or self.USING_SS)

        self.loss = "sparse_categorical_crossentropy"

        self.opt = "SGD"

        self.lr = 0.001

        self.metric = "accuracy"

        self.decay = 0

        self.momentum = 0

        self.batch_size = 32

        self.epochs = 5

        self.lay_node = [("relu", 206), ('dropout', 0.33)]

        #Semi Supervised learning:
        self.datastate = 'save'

        self.ss_mod = 'LabSpr'

        self.ss_kern = 'knn'

        self.gamma = 20

        self.neighbors = 7

        self.alpha = 0.2

        self.manyfit = 1

        #-----------------------  JSON AS ARGUMENT: -------------------------

        #Checks wether the provided JSON is well formed:
        def check(inner, outer):
            for i in inner:
                if not (i in outer):
                    print('unknown parameter. abort.', i)

                    exit()

        self.JSON_MODE = (len(sys.argv) > 1)

        #In case a JSON was provided for the parameters:
        if (self.JSON_MODE):

            fn = sys.argv[1]

            if os.path.isfile(fn):

                print("successfully read the json file." + sys.argv[1])

                self.json_dict = json.load(open(fn))

                assert ('UsingNN' and 'paramsout' in self.json_dict)

                self.USING_NN = self.json_dict['UsingNN']

                self.USING_SS = self.json_dict['UsingSS']

                check(self.json_dict, self.param_list)

                check(self.json_dict['paramsout'], self.param_list)

                #iterate over the printed parameters and ensure they exist:
                self.param_out = self.json_dict['paramsout']

                self.RATIO = self.json_dict['Ratio']

                self.ss_mod = self.json_dict['ss_model']

                self.ss_kern = self.json_dict['ss_kernel']

                self.gamma = self.json_dict['gamma']

                self.neighbors = self.json_dict['neighbor']

                self.alpha = self.json_dict['alpha']

                self.datastate = self.json_dict['data_state']

                self.scaler = self.json_dict['scaler']

                if ('manyfit' in self.json_dict):

                    self.manyfit = self.json_dict['manyfit']

                if (self.USING_NN):
                    self.loss = self.json_dict['loss']

                    self.opt = self.json_dict['optimizer']

                    self.lr = self.json_dict['learning rate']

                    self.metric = self.json_dict['metrics']

                    self.decay = self.json_dict['decay']

                    self.momentum = self.json_dict['momentum']

                    self.batch_size = self.json_dict['batch_size']

                    self.epochs = self.json_dict['number of epochs']

                    lay_node = self.json_dict['layers']

                self.PCA_MODE = ('pca' in self.json_dict)

                if (self.PCA_MODE):
                    self.pca = self.json_dict['pca']

                    self.INPUT_DIM = self.pca

                self.EARLY_STOP_MODE = ('patience' in self.json_dict)

                if (self.EARLY_STOP_MODE):
                    self.patience = self.json_dict['patience']

            else:
                print("uncorrect path. abort.")

                print(sys.argv[1])

                exit()

        #if no JSON is provided, the values are taken from the code:
        else:
            print("taking the values of the code because no JSON was given.")

            #Dictionnary of all the values of parameters used:
            self.json_dict = {
                'Ratio': self.RATIO,
                'UsingNN': self.USING_NN,
                'UsingSS': self.USING_SS,
                'ss_model': self.ss_mod,
                'ss_kernel': self.ss_kern,
                'loss': self.loss,
                'optimizer': self.opt,
                'learning rate': self.lr,
                'metrics': self.metric,
                'decay': self.decay,
                'momentum': self.momentum,
                'batch_size': self.batch_size,
                'number of epochs': self.epochs,
                'gamma': self.gamma,
                'neighbor': self.neighbors,
                'alpha': self.alpha,
                'layers': self.lay_node,
                'manyfit': self.manyfit,
                'scaler': self.scaler
            }

            if (self.PCA_MODE):
                self.json_dict['pca'] = self.pca

                self.INPUT_DIM = self.pca

            if (self.EARLY_STOP_MODE):
                self.jsondict['patience'] = self.patience

        self.build_output_name()

        #Tensorboard/log part:
        self.logs_base_dir = "./logs"

        os.makedirs(self.logs_base_dir, exist_ok=True)

        self.log_spec = os.path.join(self.logs_base_dir, self.output_name)

        os.makedirs(self.log_spec, exist_ok=True)

        self.init_variables()

    """
    @label_spr: performs label spreading
    """

    def label_spr(self):

        RESULT_ACC_SS = 0

        for i in range(self.manyfit):

            #Initialisinig of variables:
            self.init_variables()

            #PCA preprocessing:
            if (self.PCA_MODE): self.pca_preprocess(self.pca)

            #Semi supervised algo
            if (self.ss_mod == 'LabSpr' and self.ss_kern == 'knn'):
                self.label_prop_model = LabelSpreading(
                    kernel='knn',
                    gamma=self.gamma,
                    n_neighbors=self.neighbors,
                    alpha=self.alpha)

            elif (self.ss_mod == 'LabProp' and self.ss_kern == 'rbf'):
                self.label_prop_model = LabelPropagation(
                    kernel='rbf',
                    gamma=self.gamma,
                    n_neighbors=self.neighbors,
                    alpha=self.alpha,
                    max_iter=10)
            else:
                self.label_prop_model = LabelPropagtion(
                    kernel=self.ss_kern,
                    gamma=self.gamma,
                    n_neighbors=self.neighbors)

            print('Starting to fit. Run for shelter!')

            self.label_prop_model.fit(self.X_tot, self.y_tot)

            temp_acc = self.label_prop_model.score(self.X_valid_lab,
                                                   self.y_valid)

            print('{} / {} :accuracy = {}'.format(i, self.manyfit, temp_acc))

            RESULT_ACC_SS += temp_acc

        self.y_tot = self.label_prop_model.transduction_

        self.y_submit = self.label_prop_model.predict(self.X_submit)

        if (self.datastate == "save"):
            self.save_to_csv(self.X_tot, self.y_tot, self.X_valid_lab,
                             self.y_valid)

        RESULT_ACC_SS /= self.manyfit

        self.json_dict['ss_accuracy'] = RESULT_ACC_SS

        print('accuracy obtained on the test set of the ss algo:',
              RESULT_ACC_SS)

    """
       @labelspr_predict: returns the predicion of the label spreading
    """

    def labelspr_predict(self, X):
        return self.label_prop_model.predict(X)

    """
       @init_variables : transforms the input data so that it is usable 
    """

    def init_variables(self):
        X_submit = self.data_submit.to_numpy()

        X_big_lab = (self.data_lab.to_numpy())[:, 1:]

        y_big = ((self.data_lab.to_numpy())[:, 0]).astype(int)

        X_train_lab, X_valid_lab, self.y_train, self.y_valid = train_test_split(
            X_big_lab, y_big, test_size=(1 - self.RATIO), random_state=14)

        X_unlab = self.data_unlab.to_numpy()

        X_tot = np.concatenate((X_train_lab, X_unlab), axis=0)

        self.y_tot = np.concatenate((self.y_train, np.full(len(X_unlab), -1)))

        if (self.scaler == 'Standard'):
            scaler = StandardScaler()

        elif (self.scaler == 'Normal'):
            scaler = Normalizer()

        else:
            scaler = StandardScaler()

        self.X_tot = scaler.fit_transform(X_tot)

        self.X_train_lab = scaler.transform(X_train_lab)

        self.X_unlab = scaler.transform(X_unlab)

        self.X_valid_lab = scaler.transform(X_valid_lab)

        self.X_submit = scaler.transform(X_submit)

    """@pca_preprocess: performs the preprocessing before the PCA
    """

    def pca_preprocess(self, number):
        pca_mod = PCA(n_components=number)

        self.X_tot = pca_mod.fit_transform(self.X_tot)

        self.X_train_lab = pca_mod.transform(self.X_train_lab)

        self.X_unlab = pca_mod.transform(self.X_unlab)

        self.X_valid_lab = pca_mod.transform(self.X_valid_lab)

        self.X_submit = pca_mod.transform(self.X_submit)

        self.INPUT_DIM = number

    """@build_model: creates the model of the neural network 
    """

    def build_model(self):
        self.model = Sequential()

        for counter, (name, num) in enumerate(self.lay_node):
            if (counter == 0):
                self.model.add(
                    Dense(num, activation='relu', input_dim=self.INPUT_DIM))

            elif (name == 'dropout'):
                self.model.add(Dropout(rate=num))

            elif (name == 'relu'):
                self.model.add(Dense(num, activation=tf.nn.relu))

            elif (name == 'relu_bn'):
                self.model.add(Dense(num))

                self.model.add(BatchNormalization())

                self.model.add(Activation('relu'))

            else:
                print('uncorrect name for the layers. exit.')
                exit()

        #Last layer of neural network:
        self.model.add(Dense(10, activation='softmax'))

        #optimizer
        if (self.opt == 'SGD'):
            optimiz = SGD(lr=self.lr, decay=self.decay, momentum=self.momentum)

        elif (self.opt == 'Adam'):
            optimiz = Adam(lr=self.lr, decay=self.decay)

        else:
            print('uncorrect name for the layers. exit.')

            exit()

        self.model.compile(optimizer=optimiz,
                           loss=self.loss,
                           metrics=[self.metric])

    """
    @fit_lab: trains the neural network on labeled data
    """

    def fit_lab(self):
        temp = self.nn_fit(self.X_train_lab, self.y_train)

        self.json_dict["small_lab_dataset_nn_acc"] = temp

    """
    @fit_tot: trains the neural network on the total data
    """

    def fit_tot(self):
        temp = self.nn_fit(self.X_tot, self.y_tot)

        self.json_dict["big_dataset_nn_acc"] = temp

    def fit_tot_mesh():
        tableau = []
        tabl = []
        number_it = 10

        temp = self.nn_fit(self.X_tot, self.y_tot)

        for i in range(number_it):
            probabs_values = self.model.predict(self.X_submit)
            tableau.append(self.probas_values)

        tabl = [(sum(x) / number_it) for x in zip(*tableau)]
        self.y_submit = np.array([np.argmax(i) for i in tabl])

    """
    @nn_fit: fits the neural network to input data X and y provided. 
    """

    def nn_fit(self, X, y):
        call_back_list = []

        #call_back_list.append(keras.callbacks.TensorBoard(self.log_spec,histogram_freq=1,write_grads=True))

        if (self.EARLY_STOP_MODE):
            call_back_list.append(
                EarlyStopping(patience=self.patience,
                              verbose=1,
                              mode='min',
                              restore_best_weights=True))
        self.model.fit(x=X,
                       y=y,
                       epochs=self.epochs,
                       batch_size=self.batch_size,
                       validation_data=(self.X_valid_lab, self.y_valid))

        test_loss, aut_acc = self.model.evaluate(self.X_valid_lab,
                                                 self.y_valid)

        y_temp = self.model.predict(self.X_submit)

        self.y_submit = np.array([np.argmax(i) for i in y_temp])

        return aut_acc

    """
    @complete_ublab: completes the unlabeled data by predicting the labels for it
    """

    def complete_unlab(self):
        y_missing = self.model.predict(self.X_unlab)

        y_missing = np.array([np.argmax(i) for i in y_missing])

        self.X_tot = np.concatenate((self.X_train_lab, self.X_unlab), axis=0)

        self.y_tot = np.concatenate((self.y_train, y_missing), axis=0)

    def probas_values(self):
        temp = self.nn_fit(self.X_train_lab, self.y_train)

        probas_val = self.model.predict_proba(self.X_unlab)

        return probas_val

    def mesh(self):
        tableau = []

        number_it = 20
        tabl = []

        for i in range(number_it):
            tableau.append(self.probas_values())

        tabl = [(sum(x) / number_it) for x in zip(*tableau)]

        #print((tabl[0]))
        predict = []
        """"
      for x in tabl:
        for j in range(len(x)):
          if max(x) == x[j]:
            predict.append(j)
      """

        #print(predict[0])
        predict = [np.argmax(i) for i in tabl]
        y_missing = predict

        self.X_tot = np.concatenate((self.X_train_lab, self.X_unlab), axis=0)

        self.y_tot = np.concatenate((self.y_train, y_missing), axis=0)

    def filtered_mesh(self):
        tableau = []

        number_it = 10

        tabl = []

        for i in range(number_it):
            tableau.append(self.probas_values())

        tabl = [(sum(x) / number_it) for x in zip(*tableau)]

        THRESHOLD_PROBAS = 0.7

        #Si la probabilité maximale est en dessous du threshold:
        truncated_tabl = [i for i in tabl if max(i) > THRESHOLD_PROBAS]
        print(len(truncated_tabl))

        #Trouver les indices de ses points pour les enlever du unlabeled set.
        indices = []

        for i in range(len(tabl)):
            if max(tabl[i]) <= THRESHOLD_PROBAS: indices.append(i)

        self.X_unlab_truncated = np.delete(self.X_unlab, indices, axis=0)
        print(len(self.X_unlab_truncated))

        #print((tabl[0]))
        #print(predict[0])

        #Faire la prédiction que sur les points au dessus du threshold:
        predict = [np.argmax(i) for i in truncated_tabl]

        y_missing = predict

        self.X_tot = np.concatenate((self.X_train_lab, self.X_unlab_truncated),
                                    axis=0)

        self.y_tot = np.concatenate((self.y_train, y_missing), axis=0)

    """ @build_output_name: provides the name of the output with all the parameters
    """

    def build_output_name(self):
        self.output_name = (datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

        if (self.JSON_MODE):
            if ('origin' in os.path.basename(os.path.normpath(sys.argv[1]))):
                self.output_name += '_OR_'

            nn_string = 'NN:'

            ss_string = 'SS:'

            for i in self.param_out:
                temp = (i + '=' + str(self.json_dict[i]))

                if (i in self.params_nn):
                    nn_string += temp

                elif (i in self.params_ss):
                    ss_string += temp

                else:
                    self.output_name += temp

            self.output_name += ss_string

            if (self.USING_NN):
                self.output_name += nn_string

    """
    @submission_formed: provides the good format to the submission file
    - predicted_y : the predicted values
    - name: name containing all parameters
    """

    def submission_formed(self, predicted_y, name):
        result_dir = "./results"

        os.makedirs(result_dir, exist_ok=True)

        out = pd.DataFrame(predicted_y)

        out.insert(0, 'Id', range(30000, len(out) + 30000))

        out.rename(columns={"Id": "Id", 0: "y"}, inplace=True)

        path = 'results/' + name + '.csv'

        out.to_csv(os.path.join(path), index=False)

    """
    @save_to_csv: useful when self.datastate is set to 'save': save the datas obtained after the ss algorithm
    """

    def save_to_csv(self, X_tot, y_tot, X_valid, y_valid):
        out_x = pd.DataFrame(X_tot)

        out_y = pd.DataFrame(y_tot)

        out_xv = pd.DataFrame(X_valid)

        out_yv = pd.DataFrame(y_valid)

        os.makedirs('./saved_datas', exist_ok=True)

        path_x = 'saved_datas/X_tot.csv'

        path_y = 'saved_datas/y_tot.csv'

        path_xv = 'saved_datas/X_valid.csv'

        path_yv = 'saved_datas/y_valid.csv'

        out_x.to_csv(os.path.join(path_x), index=False)

        out_y.to_csv(os.path.join(path_y), index=False)

        out_xv.to_csv(os.path.join(path_xv), index=False)

        out_yv.to_csv(os.path.join(path_yv), index=False)

    """
    @load_xy: when self.datastate is set to 'load', loads data from saved data
    """

    def load_xy(self):
        print('Loading the X and y...')

        self.X_valid_lab = (pd.read_csv('saved_datas/X_valid.csv')).to_numpy()

        self.y_valid = (pd.read_csv('saved_datas/y_valid.csv')).to_numpy()

        self.X_tot = (pd.read_csv('saved_datas/X_tot.csv')).to_numpy()

        self.y_tot = (pd.read_csv('saved_datas/y_tot.csv')).to_numpy()

    """@out: final output of the programm 
    """

    def out(self):
        self.submission_formed(self.y_submit, self.output_name)

        with open(self.log_spec + '/recap.json', 'w') as fp:
            json.dump(self.json_dict, fp, indent=1)

        print(
            '########################################DONE##################################'
        )

        print("\n")
Ejemplo n.º 20
0
 #First Hidden Layer
 classifier.add(
     Dense(6,
           activation='tanh',
           kernel_initializer='random_normal',
           input_dim=13))
 #Second  Hidden Layer
 classifier.add(
     Dense(7, activation='tanh', kernel_initializer='random_normal'))
 #Output Layer
 classifier.add(
     Dense(1, activation='sigmoid', kernel_initializer='random_normal'))
 classifier.compile(optimizer='adam',
                    loss='binary_crossentropy',
                    metrics=['accuracy'])
 history = classifier.fit(x_train, y_train, batch_size=20000, epochs=40)
 #end for
 eval_model = classifier.evaluate(x_train, y_train)
 eval_model
 filename2 = "11-03304.txt"
 f22 = open(filename2, "r")
 f2 = f22.readlines()
 ar2 = []
 i = 0
 for x2 in f2:
     ar2.append(x2.split())
 i = 0
 j = 0
 t2 = []
 for i in range(0, len(ar2)):
     for j in range(int(float(ar2[i][0]) * 100),
Ejemplo n.º 21
0
    # train/dev set division
    x_train, x_test, y_train, y_test = train_test_split(flattened_images,
                                                        categorical_labels,
                                                        test_size=0.1,
                                                        random_state=42)

    # noise addition
    x_train_noisy = x_train + noise_factor * np.random.normal(
        loc=0.0, scale=1.0, size=x_train.shape)
    x_test_noisy = x_test + noise_factor * np.random.normal(
        loc=0.0, scale=1.0, size=x_test.shape)
    x_train_noisy = np.clip(x_train_noisy, 0., 1.)
    x_test_noisy = np.clip(x_test_noisy, 0., 1.)

    # model building
    model = Sequential()
    model.add(Dense(90, activation='relu',
                    input_dim=flattened_images.shape[1]))
    model.add(Dense(10, activation='sigmoid'))

    sgd = optimizers.SGD(lr=0.1)
    model.compile(optimizer=sgd,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.fit(x_train_noisy, y_train, epochs=10, verbose=0)

    # model evaluation
    score = model.evaluate(x_test_noisy, y_test, verbose=0)
    print('Test set loss : ', score[0])
    print('Test set accuracy : ', score[1])
    print("#" * 50)
Ejemplo n.º 22
0
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

optimizer = Adam(lr=0.001)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

import time
start_time = time.time()

hist = model.fit(x_train,
                 y_train,
                 validation_data=(x_val, y_val),
                 batch_size=256,
                 epochs=30,
                 verbose=1)
training_time = time.time() - start_time

print(hist.history)
model.evaluate(test_images, test_labels)
model.save('my_2cnn3fc_model.h5')

#training time
mm = training_time // 60
ss = training_time % 60
print('Training {} epochs in {}:{}'.format(10, int(mm), round(ss, 1)))

#plot loss and accuracy
loss = hist.history['loss']
Ejemplo n.º 23
0
#model = Sequential
#model_age = load_model('./models/age.h5')

model = Sequential()

model.add(
    TimeDistributed(Conv2D(36, kernel_size=(5, 5)), input_shape=input_shape))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

model.add(TimeDistributed(Flatten()))

model.add(CuDNNLSTM(128))

model.add(Dense(90))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(10))
model.add(Activation('softmax'))

model.summary()
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x=x_train, y=y_train, epochs=5, batch_size=256)

acc = model.evaluate(x_test, y_test)
print('acc test = ', acc[1], "%")
Ejemplo n.º 24
0
X_train = Train_data[:, 0]
Y_train = Train_data[:, 1]

X_test = Test_data[:, 0]
Y_test = Test_data[:, 1]

model = Sequential()

model.add(Dense(units=100, input_shape=(1, ), activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=1, dtype=np.int))

model.compile(Adam(lr=0.001), loss='mean_squared_error', metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=100, batch_size=16, verbose=2)

Y_predict = model.predict(X_test)
'''
print(Y_predict)
print(Y_test)

plt.plot(Y_predict, color = 'red')
plt.plot(Y_test, color = 'green')
plt.show()
'''

New_predicted_data = []
for i in range(len(Y_predict)):
    New_predicted_data.append([X_test[i], Y_predict[i]])
Ejemplo n.º 25
0
def train_A(Clean_trainA):

    #load tidy data
    data = pd.read_csv(Clean_trainA)
    #remain label and tidy tweet
    data = data[['Sentiment', 'tidy_tweet']]
    #creat dataset and tokenization
    tk = TweetTokenizer(reduce_len=True)
    global X, Y
    X = []
    Y = []
    tidy_token_list = []
    for x, y in zip(data['tidy_tweet'], data['Sentiment']):
        x = json.dumps(x)
        X.append(tk.tokenize(x))
        Y.append(y)
        tidy_token_list.append((tk.tokenize(x), y))

    #print(tidy_token_list)
    word_to_index, index_to_word, word_to_vec_map = read_glove_vecs(
        'model\\glove.6B.100d.txt')

    max_len = 30
    print('max_len:', max_len)

    X = np.zeros((len(tidy_token_list), max_len))
    Y = np.zeros((len(tidy_token_list), ))

    for i, tk_lb in enumerate(tidy_token_list):
        tokens, label = tk_lb
        sentence_to_indices(tokens, word_to_index, max_len, i)
        Y[i] = label

    Y = to_categorical(Y, 3)

    model = Sequential()

    model.add(
        pretrained_embedding_layer(word_to_vec_map, word_to_index, max_len))

    #model.add(Conv1D(filters, kernel_size, padding='valid', activation='relu', strides=1))
    model.add(Conv1D(256, 2, padding='same', activation='relu', strides=1))
    model.add(Dropout(0.4))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(256, 3, padding='same', activation='relu', strides=1))
    model.add(Dropout(0.4))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(256, 4, padding='same', activation='relu', strides=1))
    model.add(Dropout(0.4))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Bidirectional(LSTM(units=256, return_sequences=True)))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(300))

    model.add(Dense(3, activation='softmax'))

    model.summary()

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    H = model.fit(X,
                  Y,
                  validation_split=0.2,
                  epochs=9,
                  batch_size=128,
                  shuffle=True)
    model.save('A\\modelA.hdf5')
    plt.plot(H.history['acc'])
    plt.plot(H.history['val_acc'])
    plt.title('Model A accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()
    plt.savefig("Accuracy_A.png")
    plt.plot(H.history['loss'])
    plt.plot(H.history['val_loss'])
    plt.title('Model A loss')
    plt.ylabel('loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper right')
    plt.show()
    plt.savefig("Loss_A.png")

    return H.history['acc'][-1], H.history['val_acc'][-1]
Ejemplo n.º 26
0
    model = Sequential()
    model.add(layers.Embedding(vocab_size, embedding_dim, input_length=maxlen))
    model.add(layers.Conv1D(128, 5, activation='relu'))
    model.add(layers.GlobalMaxPooling1D())
    model.add(layers.Dense(10, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.summary()

    #train the model
    history = model.fit(X_train,
                        y_train,
                        epochs=2,
                        verbose=True,
                        validation_data=(X_test, y_test),
                        batch_size=10)
    loss, accuracy = model.evaluate(X_train, y_train, verbose=False)
    print("Training Accuracy: {:.4f}".format(accuracy))
    loss, accuracy = model.evaluate(X_test, y_test, verbose=False)
    print("Testing Accuracy:  {:.4f}".format(accuracy))
    plot_history(history)

    #test examples
    ex_sent = "conservative feeling"
    X_ex_sent = tokenizer.texts_to_sequences([ex_sent])
    X_ex_sent = pad_sequences(X_ex_sent, padding='post', maxlen=maxlen)
    print(model.predict(X_ex_sent))

    #save the model
Ejemplo n.º 27
0
# define model
model = Sequential()
model.add(
    LSTM(n_steps_in,
         activation='relu',
         return_sequences=True,
         input_shape=(n_steps_in, n_features)))
model.add(LSTM(n_steps_in, activation='relu', return_sequences=True))
model.add(Dense(n_features))
model.compile(optimizer='adam', loss='mse')
model.summary()
print("done!")

print("training model...")
# fit model
history = model.fit(X, y, epochs=E, verbose=1, callbacks=[es], batch_size=B)
model_name = M
model.save(model_name)
print("done!")

loss = history.history['loss']

outputfilename = T

output_file = open(outputfilename, "w")

print("Opened txt file")

output_file.write("loss\n")
for i in range(len(loss)):
    output_file.write(str(loss[i]))
# To summarize, our model is a simple RNN model with 1 embedding, 1 LSTM and 1 dense layers. 213,301 parameters in total need to be trained.

# ### Train and evaluate our model
#
# We first need to compile our model by specifying the loss function and optimizer we want to use while training, as well as any evaluation metrics we'd like to measure. Specify the approprate parameters, including at least one metric 'accuracy'.

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Once compiled, we can kick off the training process. There are two important training parameters that we have to specify - batch size and number of training epochs, which together with our model architecture determine the total training time.
#
# Training may take a while, so grab a cup of coffee, or better, go for a run!

batch_size = 64
num_epochs = 3

X_valid, y_valid = X_train[:batch_size], y_train[:batch_size]
X_train2, y_train2 = X_train[batch_size:], y_train[batch_size:]

model.fit(X_train2,
          y_train2,
          validation_data=(X_valid, y_valid),
          batch_size=batch_size,
          epochs=num_epochs)

# scores[1] will correspond to accuracy if we pass metrics=['accuracy']

scores = model.evaluate(X_test, y_test, verbose=0)
print('Test accuracy:', scores[1])
Ejemplo n.º 29
0
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

print('model summary: ')
model.summary()
# save model summary in model folder so we can reference it later when comparing models
with open(MODEL_PATH + '/summary.txt', 'w') as handle:
    model.summary(print_fn=lambda x: handle.write(x + '\n'))

# make sure we only keep the weights from the epoch with the best accuracy, rather than the last set of weights
checkpointer = ModelCheckpoint(filepath=MODEL_PATH + '/model.h5',
                               verbose=1,
                               save_best_only=True)
history_checkpointer = util.SaveHistoryCheckpoint(model_path=MODEL_PATH)

util.print_memory()

history = model.fit(x_train,
                    y_train,
                    validation_data=(x_val, y_val),
                    epochs=NUM_EPOCHS,
                    batch_size=BATCH_SIZE,
                    callbacks=[checkpointer, history_checkpointer])

util.copy_model_to_latest(BASE_PATH, MODEL_PATH, MODEL_NAME)

print('total time: %ds' % round(util.total_time()))
util.print_memory()
Ejemplo n.º 30
0
    classifier.add(
        Dense(4,
              activation='relu',
              kernel_initializer='random_normal',
              input_dim=5))
    #Second  Hidden Layer
    classifier.add(
        Dense(4, activation='relu', kernel_initializer='random_normal'))
    #Output Layer
    classifier.add(
        Dense(1, activation='sigmoid', kernel_initializer='random_normal'))
    #Compiling the neural network
    classifier.compile(optimizer='adam',
                       loss='binary_crossentropy',
                       metrics=['accuracy'])
    classifier.fit(X_train, y_train, batch_size=10, epochs=100)

    #Dumping model in a file
    file1 = open('model_neural.pkl', 'wb')
    pickle.dump(classifier, file1)
    '''df=pd.read_csv('data.csv')


  #getting splitted data from function
  train,test = data_split(df, 0.3)

  # splitting features and output for prediction
  x_train = train[['fever','bodypain','age','runnynose','diffbreath']].to_numpy()
  x_test = test[['fever','bodypain','age','runnynose','diffbreath']].to_numpy()

  y_train = train[['infected']].to_numpy().reshape(train.shape[0],)
Ejemplo n.º 31
0
sequences_tr = sequence.pad_sequences(X_train, maxlen=max_len)
sequences_te = sequence.pad_sequences(X_test, maxlen=max_len)
sequences_tr = to_categorical(sequences_tr)
sequences_te = to_categorical(sequences_te)

print(sequences_tr)
#print(type(sequences_hot.shape))
#print(sequences_hot.shape)
#sequences_hot = list(sequences_hot)
#print(sequences_hot)

#sequences_hot = pd.DataFrame(sequences_hot)
#sequences_hot.to_csv("seq.csv")
#print(sequences_hot)

model = Sequential()
model.add(Embedding(256, len(sequences_tr[0])))  # 사용된 단어 수 & input 하나 당 size
model.add(LSTM(len(sequences_tr[0])))
model.add(Dense(3, activation='softmax'))  # 카테고리 수

model.summary()
class_weight = {0.: 1., 1.: 7.}

hist = model.fit(sequences_tr,
                 Y_train,
                 batch_size=128,
                 epochs=10,
                 validation_split=0.2,
                 class_weight=class_weight)
Ejemplo n.º 32
0
import numpy as np
from keras import Sequential
from keras.layers import Dense

data = np.random.random((1000, 32))
label = np.random.random((1000, 10))

model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(32, )))
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.compile('adam', 'categorical_crossentropy')

model.fit(data, label, epochs=100)

model.save('my_model.h5')

Ejemplo n.º 33
0
model.add(Activation('sigmoid'))
#再加一層fully connected layer(dense), 不用input, 跟前一層output一樣
model.add(Dense(500))
model.add(Activation('sigmoid'))
model.add(Dense(10))
model.add(Activation('softmax'))

#evaluate model好壞
#configuration(optimizer)
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

#batch越大,速度越快,但是結果會爛掉
#batch越小,速度越慢,gpu平行運算加速效果不明顯
model.fit(data_train, target_train, batch_size=100, epochs=20)

score = model.evaluate(data_test, target_test)

im = Image.fromarray(data_test[4].reshape(28, 28))
plt.imshow(im)

predict = model.predict(data_test)[4]
np.where(predict == np.amax(predict))

# read handwrite image
data = Image.open(r'C:\Users\yt335\Desktop\handwriting number.png').convert(
    'L')
data_np = np.array(data).reshape(1, 784)
data_np = data_np / 255
predict = model.predict(data_np)
Ejemplo n.º 34
0
print('Minimum review length: {}'.format(len(min((X_test + X_test), key=len))))


from keras.preprocessing import sequence
max_words = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

from keras import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
embedding_size=32
model=Sequential()
model.add(Embedding(vocabulary_size, embedding_size, input_length=max_words))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
print(model.summary())


model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

batch_size = 64
num_epochs = 3
X_valid, y_valid = X_train[:batch_size], y_train[:batch_size]
X_train2, y_train2 = X_train[batch_size:], y_train[batch_size:]
model.fit(X_train2, y_train2, validation_data=(X_valid, y_valid), batch_size=batch_size, epochs=num_epochs)

scores = model.evaluate(X_test, y_test, verbose=0)
print('Test accuracy:', scores[1])
Ejemplo n.º 35
0
model.add(layers.Dense(label_size, activation="softmax"))
model = utils.multi_gpu_model(model, gpus=4)
model.compile(metrics=['acc'], loss='categorical_crossentropy', optimizer='adam')
"""

mirrored_strategy = tf.distribute.MirroredStrategy()

with mirrored_strategy.scope():
    model = Sequential()
    model.add(
        efn.EfficientNetB3(weights="imagenet",
                           include_top=False,
                           pooling='avg'))
    model.add(layers.Dense(label_size, activation="softmax"))
    model.compile(metrics=['acc'],
                  loss='categorical_crossentropy',
                  optimizer='adam')
# """

checkpointer = ModelCheckpoint(filepath='best.hdf5',
                               verbose=1,
                               save_best_only=True)  # Save best weight file
csv_logger = CSVLogger('history.log')

history = model.fit(train_generator,
                    validation_data=validation_generator,
                    epochs=150,
                    callbacks=[csv_logger, checkpointer])

model.save('last.hdf5')  # Save last weight file