예제 #1
0
def model(X_train, Y_train, X_test, Y_test):

    nb_dim = 20
    img_rows, img_cols = 32, 32
    img_channels = 3

    dense_layer_size = {{choice([256, 512, 1024])}}
    optimizer = {{choice(['rmsprop', 'adam', 'sgd'])}}
    batch_size = {{choice([32, 64, 128])}}
    num_conv1 = int({{quniform(24, 64, 1)}})
    num_conv2 = int({{quniform(32, 96, 1)}})
    params = {'dense_layer_size':dense_layer_size,
              'optimizer':optimizer,
              'batch_size':batch_size,
              'num_conv1':num_conv1,
              'num_conv2':num_conv2,
             }


    model = Sequential()

    model.add(Convolution2D(num_conv1, 3, 3, border_mode='same',
                            input_shape=(img_channels, img_rows, img_cols)))
    model.add(Activation('relu'))
    model.add(Convolution2D(num_conv1, 3, 3))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Convolution2D(num_conv2, 3, 3, border_mode='same'))
    model.add(Activation('relu'))
    model.add(Convolution2D(num_conv2, 3, 3))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(dense_layer_size))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(nb_dim))
    model.add(Activation('softmax'))
    
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=optimizer)

    model.fit(X_train, Y_train,
             batch_size=batch_size,
             nb_epoch=30,
             show_accuracy=True,
             verbose=2,
             validation_data=(X_test, Y_test))

    score, acc = model.evaluate(X_test, Y_test, verbose=0)
    print('Test accuracy:', acc)
    #return {'loss': -acc, 'status': STATUS_OK, 'model':model}
    return {'loss': -acc, 'status': STATUS_OK, 'params':params}
예제 #2
0
def create_model(x_train, y_train, x_test, y_test):
    """
    Create your model...
    """
    layer_1_size = {{quniform(12, 256, 4)}}
    l1_dropout = {{uniform(0.001, 0.7)}}
    params = {
        'l1_size': layer_1_size,
        'l1_dropout': l1_dropout
    }
    num_classes = 10
    model = Sequential()
    model.add(Dense(int(layer_1_size), activation='relu'))
    model.add(Dropout(l1_dropout))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer=RMSprop(),
                  metrics=['accuracy'])
    model.fit(x_train, y_train, batch_size=128, epochs=10, validation_data=(x_test, y_test))
    score, acc = model.evaluate(x_test, y_test, verbose=0)
    out = {
        'loss': -acc,
        'score': score,
        'status': STATUS_OK,
        'model_params': params,
    }
    # optionally store a dump of your model here so you can get it from the database later
    temp_name = tempfile.gettempdir()+'/'+next(tempfile._get_candidate_names()) + '.h5'
    model.save(temp_name)
    with open(temp_name, 'rb') as infile:
        model_bytes = infile.read()
    out['model_serial'] = model_bytes
    return out
예제 #3
0
def model(x_train, y_train, x_test, y_test):
    model = Sequential()  # create an object of the Sequential model of keras
    model.add(
        Dense(13, input_dim=13, kernel_initializer='normal', activation='relu')
    )  # add the first layer with 13 neurons recieving a 13-dimensional input
    model.add(
        Dense(int({{quniform(1, 10, 1)}}),
              kernel_initializer='normal',
              activation='relu')
    )  # add a second layer with tunable number of nodes found in the quantum uniform search space between 1 to 10
    model.add(
        Dense(3, kernel_initializer='normal', activation='softmax')
    )  # make the classification layer with number of nodes equal to the number of classes
    model.compile(
        loss='categorical_crossentropy',
        optimizer={{choice(['adam', 'sgd'])}},
        metrics=['accuracy']
    )  # compile the model with the loss function 'categorical_crossentropy' using either of the optimization method 'adam' or 'sgd' found by hyperparameter tuning and moreover, find the accuarcy of th model
    model.fit(x_train, y_train,
              verbose=10)  # fit the model with the training set
    loss, acc = model.evaluate(
        x_test, y_test
    )  # evaluate the model with the test dataset and find the loss and accuracy of the model.
    return {
        'loss': -acc,
        'status': STATUS_OK,
        'model': model
    }  # return a dictionary containing the loss to be minimized, acc (here we take the negative of acc because we need to optimize it to the lowest possible value), an ok status info if executed well, and the model
예제 #4
0
def create_model(x_train, y_train, x_val, y_val, x_test, y_test):

    if sys.argv[1] == 'german':
        input_n = 24
    elif sys.argv[1] == 'australian':
        input_n = 15

    batch_size = 32
    epochs = 500
    inits = [
        'Zeros', 'Ones', 'RandomNormal', 'RandomUniform', 'TruncatedNormal',
        'Orthogonal', 'lecun_uniform', 'lecun_normal', 'he_uniform',
        'he_normal', 'glorot_uniform', 'glorot_normal'
    ]
    acts = [
        'tanh', 'softsign', 'sigmoid', 'hard_sigmoid', 'relu', 'softplus',
        'LeakyReLU', 'PReLU', 'elu', 'selu'
    ]
    init = inits[int({{quniform(0, 11, 1)}})]
    act = acts[9]

    neurons = int({{quniform(9, 180, 9)}})
    layers = {{choice([1, 2, 4, 8])}}
    norm = {{choice(['no', 'l1', 'l2'])}}
    dropout = {{choice([0, 1])}}
    earlystop = {{choice([0, 1])}}
    k1 = None
    k2 = None
    p = None

    if norm == 'no':
        reg = None
    elif norm == 'l1':
        k1 = {{loguniform(-9.2, -2.3)}}
        reg = regularizers.l1(k1)
    elif norm == 'l2':
        k2 = {{loguniform(-9.2, -2.3)}}
        reg = regularizers.l2(k2)

    X_input = Input((input_n, ))
    X = X_input

    for _ in range(layers):
        X = Dense(
            neurons,
            kernel_initializer=init,
            kernel_regularizer=reg,
        )(X)

        if act == 'LeakyReLU':
            X = LeakyReLU()(X)
        elif act == 'PReLU':
            X = PReLU()(X)
        else:
            X = Activation(act)(X)

        if dropout == 1:
            p = {{uniform(0, 1)}}
            X = Dropout(p)(X)

    X = Dense(1, kernel_initializer=init, kernel_regularizer=reg)(X)
    X_outputs = Activation('sigmoid')(X)

    model = Model(inputs=X_input, outputs=X_outputs)
    model.compile(
        loss='binary_crossentropy',
        optimizer='adam',
        metrics=['accuracy'],
    )

    patience = int({{quniform(1, 500, 1)}})
    es = EarlyStopping(
        monitor='val_loss',
        patience=patience,
        verbose=0,
        mode='auto',
    )
    if earlystop == 1:
        model.fit(
            x_train,
            y_train,
            batch_size=batch_size,
            verbose=0,
            epochs=epochs,
            validation_data=(x_val, y_val),
            callbacks=[es],
        )
    else:
        model.fit(
            x_train,
            y_train,
            batch_size=batch_size,
            verbose=0,
            epochs=epochs,
            validation_data=(x_val, y_val),
        )

    loss_t, score_t = model.evaluate(x_train, y_train, verbose=0)
    loss_v, score_v = model.evaluate(x_val, y_val, verbose=0)
    loss_te, score_te = model.evaluate(x_test, y_test, verbose=0)

    print(init + '\t' + act + '\t' + str(neurons) + '\t' + str(layers) + '\t' +
          str(norm) + '\t' + str(dropout) + '\t' + str(earlystop) +
          '%-24s%-24s%-24s%s' % (str(k1), str(k2), str(p), str(patience)) +
          '  ' + str(score_v) + '  ' + str(loss_v) + '  ' + str(score_te) +
          '  ' + str(loss_te))
    return {'loss': loss_v, 'status': STATUS_OK, 'model': model}
예제 #5
0
def create_model(rows, _max_flow):
    def rmse(y_true, y_pred, axis=0):
        return np.sqrt(((y_pred - y_true)**2).mean(axis=axis))

    def geh(y_true, y_pred, axis=0):
        return np.sqrt(2 * np.power(y_pred - y_true, 2) /
                       (y_pred + y_true)).mean(axis=axis)

    null = None
    true = True
    false = False
    columnCount = {{choice([2048, 1024, 512])}}
    max_flow = _max_flow
    max_cycle_time = 210
    flow_buckets = {{quniform(10, 50, 1)}}
    cycle_time_buckets = {{quniform(10, 50, 1)}}
    synPermConnected = {{uniform(0.05, 0.25)}}
    activeColumns = {{quniform(20, 64, 1)}}
    synPermInactiveDec = {{uniform(0.0003, 0.1)}}
    synPermActiveInc = {{uniform(0.001, 0.1)}}
    potentialPct = {{uniform(0.2, 0.85)}}
    activationThreshold = {{quniform(5, 20, 1)}}
    pamLength = {{quniform(1, 10, 1)}}
    cellsPerColumn = {{quniform(8, 32, 2)}}
    minThreshold = {{quniform(4, 32, 1)}}
    alpha = {{uniform(0.0001, 0.2)}}
    boost = {{uniform(0.0, 0.1)}}
    tmPermanenceInc = {{uniform(0.05, 0.2)}}
    maxSynapsesPerSegment = {{quniform(28, 72, 2)}}
    newSynapseRatio = {{uniform(0.4, 0.8)}}
    newSynapseCount = maxSynapsesPerSegment * newSynapseRatio
    initialPerm = {{uniform(0.1, 0.33)}}
    maxSegmentsPerCell = {{quniform(32, 66, 2)}}
    permanenceDec = {{uniform(0.01, 0.2)}}
    weekend_radius = {{quniform(0, 90, 2)}}
    weekend_radius = int(1 + weekend_radius)
    timeOfDay_width = {{quniform(16, 101, 2)}}
    timeOfDay_width = int(1 + timeOfDay_width)
    dayOfWeek_width = {{quniform(20, 90, 2)}}
    dayOfWeek_width = int(1 + dayOfWeek_width)
    # must always be odd
    dayOfWeek_radius = {{uniform(7, 13)}}
    timeOfDay_radius = {{uniform(7, 13)}}
    params = {
        "aggregationInfo": {
            "hours": 0,
            "microseconds": 0,
            "seconds": 0,
            "fields": [],
            "weeks": 0,
            "months": 0,
            "minutes": 0,
            "days": 0,
            "milliseconds": 0,
            "years": 0
        },
        "model": "HTMPrediction",
        "version": 1,
        "predictAheadTime": null,
        "modelParams": {
            "sensorParams": {
                "verbosity": 0,
                "encoders": {
                    "datetime_weekend": {
                        'fieldname': 'datetime',
                        'name': 'datetime_weekend',
                        'weekend': weekend_radius,
                        'type': 'DateEncoder'
                    },
                    "datetime_timeOfDay": {
                        'fieldname': 'datetime',
                        'name': 'datetime_timeOfDay',
                        'type': 'DateEncoder',
                        'timeOfDay': (timeOfDay_width, timeOfDay_radius)
                    },
                    "datetime_dayOfWeek": {
                        'fieldname': 'datetime',
                        'name': 'datetime_dayOfWeek',
                        'type': 'DateEncoder',
                        'dayOfWeek': (dayOfWeek_width, dayOfWeek_radius)
                    },
                    "measured_flow": {
                        'fieldname': "measured_flow",
                        'name': 'measured_flow',
                        'type': 'RandomDistributedScalarEncoder',
                        'resolution': max(0.001, (max_flow - 1) / flow_buckets)
                    },
                    'phase_time': {
                        'fieldname':
                        "phase_time",
                        'name':
                        'phase_time',
                        'type':
                        'RandomDistributedScalarEncoder',
                        'resolution':
                        max(0.001, (max_cycle_time - 1) / cycle_time_buckets)
                    }
                },
                "sensorAutoReset": null
            },
            "anomalyParams": {
                "anomalyCacheRecords": null,
                "autoDetectThreshold": null,
                "autoDetectWaitRecords": null
            },
            "spParams": {
                "columnCount": columnCount,
                "spVerbosity": 0,
                "spatialImp": "cpp",
                "synPermConnected": synPermConnected,
                "seed": 1956,
                "numActiveColumnsPerInhArea": int(activeColumns),
                "globalInhibition": 1,
                "inputWidth": 0,
                "synPermInactiveDec": synPermInactiveDec,
                "synPermActiveInc": synPermActiveInc,
                "potentialPct": potentialPct,
                "boostStrength": boost
            },
            "spEnable": true,
            "clEnable": true,
            "clParams": {
                "alpha": alpha,
                "verbosity": 0,
                "steps": "1",
                "regionName": "SDRClassifierRegion"
            },
            "inferenceType": "TemporalMultiStep",
            "trainSPNetOnlyIfRequested": false,
            "tmParams": {
                "columnCount": columnCount,
                "activationThreshold": int(activationThreshold),
                "pamLength": int(pamLength),
                "cellsPerColumn": int(cellsPerColumn),
                "permanenceInc": tmPermanenceInc,
                "minThreshold": int(minThreshold),
                "verbosity": 0,
                "maxSynapsesPerSegment": int(maxSynapsesPerSegment),
                "outputType": "normal",
                "initialPerm": initialPerm,
                "globalDecay": 0.0,
                "maxAge": 0,
                "permanenceDec": permanenceDec,
                "seed": 1960,
                "newSynapseCount": int(newSynapseCount),
                "maxSegmentsPerCell": int(maxSegmentsPerCell),
                "temporalImp": "cpp",
                "inputWidth": columnCount
            },
            "tmEnable": true
        }
    }
    #    print(json.dumps(params, indent=4))
    start = datetime.now()
    model = ModelFactory.create(params)
    model.enableInference({'predictedField': 'measured_flow'})
    model.enableLearning()

    actualDict = {}
    predictionsDict = {}
    for row in tqdm(rows, desc='HTM '):
        actualDict[row['datetime']] = row['measured_flow']
        future_time = row['datetime'] + timedelta(minutes=5)
        predictionsDict[future_time] = model.run(
            row).inferences['multiStepBestPredictions'][1]

    actual_x = []
    actual_y = []
    pred_y = []
    pred_x = []
    error_scores = []
    error_scores_x = []
    # now make a good set to evaluate

    for x, y in sorted(actualDict.items()):
        if x in predictionsDict:
            actual_y.append(y)
            actual_x.append(x)
            pred_x.append(x)
            pred_y.append(predictionsDict[x])

    # make a plot
    def npa(l):
        return np.array(l)

    split_idx = int(len(actual_y) * 0.6)
    npactual_y = npa(actual_y[split_idx:])
    nppred_y = npa(pred_y[split_idx:])

    # calculate a running error score on the last 500 predictions
    # lb = 500
    # for idx, i in enumerate(actual_y):
    #     run_actual = np.array(actual_y[max(0, idx-lb):idx+1])
    #     run_pred = np.array(pred_y[max(0, idx-lb):idx+1])
    #     error_scores.append(rmse(run_actual, run_pred))
    #     error_scores_x.append(actual_x[idx])
    rmse_result = rmse(npactual_y, nppred_y)
    metric_results = {
        'rmse': rmse_result,
        'mgeh': geh(npactual_y, nppred_y),
        'duration': (datetime.now() - start).total_seconds()
    }
    dpi = 80
    width = 1920 / dpi
    height = 1080 / dpi
    plt.figure(figsize=(width, height), dpi=dpi)
    plt.plot(actual_x, actual_y, color='b', label='Actual')
    plt.plot(pred_x, pred_y, color='r', label='Predictions')
    plt.plot(error_scores_x, error_scores, color='g', label='Error')
    plt.legend()
    plt.title("HTM Predictions at 115, SI 2: {}".format(
        metric_results['rmse']))
    plt.xlabel('Time')
    plt.ylabel('Flow')
    fig_name = 'model_{}_{}.png'.format(time(), rmse_result)
    plt.savefig(fig_name)
    print("Save image to", fig_name)
    bytes_out = BytesIO()
    plt.savefig(bytes_out, format='png')
    pkl_out = BytesIO()
    pickle.dump(
        {
            'true_x': actual_x,
            'true_y': actual_y,
            'pred_x': pred_x,
            'pred_y': pred_y
        }, pkl_out)
    print("RMSE: {} in {}s".format(*metric_results.values()))

    return {
        'loss': metric_results['rmse'],
        'status': STATUS_OK,
        'model': params,
        'metrics': metric_results,
        'figure': Binary(bytes_out.getvalue()),
        'pred_data': Binary(pkl_out.getvalue())
    }
def model(x_train, y_train, x_test, y_test):
    """
    Model providing function.
    Wrap the parameters you want to optimize into double curly brackets and choose 
    a distribution over which to run the algorithm.
    """
    keras.backend.clear_session()
    # define the number of outputs and hit features
    num_outputs = 3  # mcx,mcy,mcz,mcu,mcv,mcw,mct
    num_features = 5  # pmtX,pmtY,pmtZ,pmtT,pmtQ

    model = Sequential()
    model.add(Masking(mask_value=0, input_shape=(num_features, num_hits)))
    model.add(Flatten())
    model.add(
        Dense(int({{quniform(1, 512, 1)}}),
              kernel_initializer={{
                  choice(['normal', 'he_normal', 'uniform', 'he_uniform'])
              }},
              activation={{choice(['softmax', 'relu', 'linear'])}}))
    model.add(Dropout({{uniform(0, 1)}}))
    model.add(
        Dense(int({{quniform(1, 512, 1)}}),
              kernel_initializer={{
                  choice(['normal', 'he_normal', 'uniform', 'he_uniform'])
              }},
              activation={{choice(['softmax', 'relu', 'linear'])}}))
    model.add(Dropout({{uniform(0, 1)}}))
    model.add(
        Dense(int({{quniform(1, 512, 1)}}),
              kernel_initializer={{
                  choice(['normal', 'he_normal', 'uniform', 'he_uniform'])
              }},
              activation={{choice(['softmax', 'relu', 'linear'])}}))
    model.add(Dropout({{uniform(0, 1)}}))
    # If we choose 'five', add an additional layer
    num_layers = {{choice(['four', 'five'])}}
    if num_layers == 'five':
        model.add(
            Dense(int({{quniform(1, 512, 1)}}),
                  kernel_initializer={{
                      choice(['uniform', 'normal', 'he_normal', 'he_uniform'])
                  }},
                  activation={{choice(['softmax', 'relu', 'linear'])}}))
        model.add(Dropout({{uniform(0, 1)}}))
    model.add(
        Dense(num_outputs,
              kernel_initializer={{
                  choice(['uniform', 'normal', 'he_normal', 'he_uniform'])
              }},
              activation={{choice(['linear', 'relu'])}
                          }))  # final output has 7 dimensions
    # Print model summary
    model.summary()
    # Compile model
    # mean_squared_error for metrics (potentially more informative than accuracy)
    model.compile(loss='mean_squared_error',
                  optimizer='Adamax',
                  metrics=['accuracy'])
    #{{choice(['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax','Nadam'])}}
    print(model.metrics_names)
    #    tbCallBack = keras.callbacks.TensorBoard(log_dir='./logs/fit', histogram_freq=1)
    early_stopping = EarlyStopping(monitor='val_loss', patience=4)
    checkpointer = ModelCheckpoint(filepath='keras_weights_optimisation.hdf5',
                                   verbose=1,
                                   save_best_only=True)

    result = model.fit(x_train,
                       y_train,
                       batch_size=int({{quniform(2, 256, 1)}}),
                       epochs=int({{quniform(2, 64, 1)}}),
                       verbose=2,
                       validation_data=(x_test, y_test),
                       callbacks=[early_stopping, checkpointer])

    mse, acc = model.evaluate(x_test, y_test, verbose=0)

    print('mse,acc:', mse, acc)
    return {'loss': mse, 'status': STATUS_OK, 'model': model}
예제 #7
0
def create_model(x_train, y_train, x_val, y_val):

    batch_size = 64
    epochs = 500
    init = 'lecun_normal'
    act = 'tanh'

    neurons = int({{quniform(9, 180, 9)}})
    layers = {{choice([1, 2, 4, 8])}}
    norm = {{choice(['no', 'l1', 'l2'])}}
    dropout = {{choice([0, 1])}}
    earlystop = {{choice([0, 1])}}
    k = None
    p = None
    patience = None

    if norm == 'no':
        reg = None
    elif norm == 'l1':
        k = {{loguniform(-9.2, -2.3)}}
        reg = regularizers.l1(k)
    elif norm == 'l2':
        k = {{loguniform(-9.2, -2.3)}}
        reg = regularizers.l2(k)

    X_input = Input((24, ))
    X = Reshape((-1, ))(X_input)

    for _ in range(layers):
        X = Dense(neurons, kernel_initializer=init, kernel_regularizer=reg)(X)
        X = Activation(act)(X)

        if dropout == 1:
            p = {{uniform(0, 1)}}
            X = Dropout(p)(X)

    X = Dense(1, kernel_initializer=init, kernel_regularizer=reg)(X)
    X_outputs = Activation('sigmoid')(X)

    model = Model(inputs=X_input, outputs=X_outputs)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    if earlystop == 0:
        model.fit(x_train,
                  y_train,
                  batch_size=batch_size,
                  verbose=0,
                  epochs=epochs,
                  validation_data=(x_val, y_val))
    elif earlystop == 1:
        patience = int({{quniform(1, 500, 1)}})
        es = EarlyStopping(monitor='val_loss',
                           patience=patience,
                           verbose=0,
                           mode='auto')
        model.fit(x_train,
                  y_train,
                  batch_size=batch_size,
                  verbose=0,
                  epochs=epochs,
                  validation_data=(x_val, y_val),
                  callbacks=[es])

    loss_t, score_t = model.evaluate(x_train, y_train, verbose=0)
    loss_v, score_v = model.evaluate(x_val, y_val, verbose=0)

    print(
        str(neurons) + '\t' + str(layers) + '\t' + str(norm) + '\t' +
        str(dropout) + '\t' + str(earlystop) + '\t\t' + '%-24s%-24s%s' %
        (str(k), str(p), str(patience)))
    return {'loss': loss_v, 'status': STATUS_OK, 'model': model}
예제 #8
0
def do_model(all_data):
    _steps, tts_factor, num_epochs = get_steps_extra()
    # features = all_data[:-_steps]
    # labels = all_data[_steps:, 4:]
    # tts = train_test_split(features, labels, test_size=0.4)
    # X_train = tts[0]
    # X_test = tts[1]
    # Y_train = tts[2].astype(np.float64)
    # Y_test = tts[3].astype(np.float64)
    split_pos = int(len(all_data) * tts_factor)
    train_data, test_data = all_data[:split_pos], all_data[split_pos:]
    dataX, dataY, fields = create_dataset(test_data, 1, _steps)

    optimiser = {{choice(['adam', 'rmsprop'])}}
    hidden_neurons = int({{quniform(16, 256, 4)}})
    loss_function = 'mse'
    batch_size = int({{quniform(1, 10, 1)}})
    dropout = {{uniform(0, 0.5)}}
    dropout_dense = {{uniform(0, 0.5)}}
    hidden_inner_factor = {{uniform(0.1, 1.9)}}
    inner_hidden_neurons = int(hidden_inner_factor * hidden_neurons)
    dropout_inner = {{uniform(0, 0.5)}}

    dataX = fit_to_batch(dataX, batch_size)
    dataY = fit_to_batch(dataY, batch_size)

    extra_layer = {{choice([True, False])}}
    if not extra_layer:
        dropout_inner = 0

    # X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
    # X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

    # print("X train shape:\t", X_train.shape)
    # print("X test shape:\t", X_test.shape)
    # print("Y train shape:\t", Y_train.shape)
    # print("Y test shape:\t", Y_test.shape)
    print("Steps:\t", _steps)
    print("Extra layer:\t", extra_layer)
    print("Batch size:\t", batch_size)

    # in_neurons = X_train.shape[2]

    out_neurons = 1

    model = Sequential()
    best_weight = BestWeight()
    model.add(
        LSTM(units=hidden_neurons,
             batch_input_shape=(batch_size, 1, fields),
             return_sequences=extra_layer,
             stateful=True,
             dropout=dropout))
    model.add(Activation('relu'))

    if extra_layer:
        dense_input = inner_hidden_neurons
        model.add(
            LSTM(
                units=dense_input,
                # input_shape=hidden_neurons,
                stateful=True,
                return_sequences=False,
                dropout=dropout_inner))
        model.add(Activation('relu'))

    model.add(Dense(units=out_neurons, activation='relu'))
    model.add(Dropout(dropout_dense))
    model.compile(loss=loss_function, optimizer=optimiser)

    history = model.fit(dataX,
                        dataY,
                        batch_size=batch_size,
                        epochs=num_epochs,
                        validation_split=0.3,
                        shuffle=False,
                        callbacks=[best_weight])

    model.set_weights(best_weight.get_best())
    X_test, Y_test, _fields = create_dataset(test_data, 1, _steps)
    X_test, Y_test = fit_to_batch(X_test, batch_size), fit_to_batch(
        Y_test, batch_size)
    predicted = model.predict(X_test, batch_size=batch_size) + EPS
    rmse_val = rmse(Y_test, predicted)
    metrics = OrderedDict([
        ('hidden', hidden_neurons),
        ('steps', _steps),
        ('geh', geh(Y_test, predicted)),
        ('rmse', rmse_val),
        ('mape', mean_absolute_percentage_error(Y_test, predicted)),
        # ('smape', smape(predicted, _Y_test)),
        ('median_pe', median_percentage_error(predicted, Y_test)),
        # ('mase', MASE(_Y_train, _Y_test, predicted)),
        ('mae', mean_absolute_error(y_true=Y_test, y_pred=predicted)),
        ('batch_size', batch_size),
        ('optimiser', optimiser),
        ('dropout', dropout),
        ('extra_layer', extra_layer),
        ('extra_layer_dropout', dropout_inner),
        ('dropout_dense', dropout_dense),
        ('extra_layer_neurons', inner_hidden_neurons),
        ('loss function', loss_function)
        # 'history': history.history
    ])
    print(metrics)
    return {'loss': -rmse_val, 'status': STATUS_OK, 'metrics': metrics}
예제 #9
0
def create_model(_rows, data_dict, max_flow):
    def rmse(y_true, y_pred, axis=0):
        return np.sqrt(((y_pred - y_true)**2).mean(axis=axis))

    def create_dataset_from_dict(ddata, lookback=1, steps=1):
        dataX = []
        dataY = []
        for dt, data in ddata.items():
            timestep = []
            yval = ddata.get(dt + timedelta(minutes=5 * steps))
            # check the future value exists and is not an error
            if yval is not None:
                for j in range(lookback):
                    offset = dt - timedelta(minutes=5 * steps * j)
                    # make sure we have all previous values in the lookback
                    if ddata.get(offset) is not None:
                        timestep.append(ddata[offset])
                if len(timestep) == lookback:
                    dataX.append(timestep)
                    dataY.append(yval[0])
        fields = len(dataX[0][0])
        return np.array(dataX,
                        dtype=np.double), np.array(dataY,
                                                   dtype=np.double), fields

    def fit_to_batch(arr, b_size):
        lim = len(arr) - (len(arr) % b_size)
        return arr[:lim]

    class TerminateOnNaN(Callback):
        """Callback that terminates training when a NaN loss is encountered.
        """
        def __init__(self):
            super(TerminateOnNaN, self).__init__()
            self.terminated = False

        def on_batch_end(self, batch, logs=None):
            logs = logs or {}
            loss = logs.get('loss')
            if loss is not None:
                if np.isnan(loss) or np.isinf(loss):
                    print('Batch %d: Invalid loss, terminating training' %
                          (batch))
                    self.model.stop_training = True
                    self.terminated = True

    # input fields are:
    """
    Input is: 
    [
        flow
        dayOfWeek
        MinuteOfDay
        month
        week
        isWeekend
    ]
    for `lookback` records
    """
    lookback = int({{quniform(1, 40, 1)}})
    scaler = MinMaxScaler((0, 1))
    # rows = scaler.fit_transform(_rows)
    # dataX, dataY, fields = create_dataset(rows, lookback)
    scaled = scaler.fit_transform(list(data_dict.values()))
    scaled_data_dict = dict(zip(data_dict.keys(), scaled))
    dataX, dataY, fields = create_dataset_from_dict(scaled_data_dict, lookback)

    test_train_split = 0.60  ## 60% training 40% test
    split_idx = int(len(dataX) * test_train_split)
    train_x = dataX[:split_idx]
    train_y = dataY[:split_idx]
    test_x = dataX[split_idx:]
    test_y = dataY[split_idx:]
    batch_size = int({{quniform(1, 5, 1)}})

    train_x = fit_to_batch(train_x, batch_size)
    train_y = fit_to_batch(train_y, batch_size)
    test_x = fit_to_batch(test_x, batch_size)
    test_y = fit_to_batch(test_y, batch_size)

    nb_epoch = 1
    lstm_size_1 = {{quniform(96, 300, 4)}}
    lstm_size_2 = {{quniform(96, 300, 4)}}
    lstm_size_3 = {{quniform(69, 300, 4)}}
    optimizer = {{choice(['adam', 'rmsprop'])}
                 }  #  'nadam', 'adamax', 'adadelta', 'adagrad'])}}
    l1_dropout = {{uniform(0.001, 0.7)}}
    l2_dropout = {{uniform(0.001, 0.7)}}
    l3_dropout = {{uniform(0.001, 0.7)}}
    output_activation = {{choice(['relu', 'tanh', 'linear'])}}
    # reset_interval = int({{quniform(1, 100, 1)}})
    # layer_count = {{choice([1, 2, 3])}}
    l1_reg = {{uniform(0.0001, 0.1)}}
    l2_reg = {{uniform(0.0001, 0.1)}}
    params = {
        'batch_size': batch_size,
        'lookback': lookback,
        'lstm_size_1': lstm_size_1,
        'lstm_size_2': lstm_size_2,
        'lstm_size_3': lstm_size_3,
        'l1_dropout': l1_dropout,
        'l2_dropout': l2_dropout,
        'l3_dropout': l3_dropout,
        'l1_reg': l1_reg,
        'l2_reg': l2_reg,
        'optimizer': optimizer,
        'output_activation': output_activation,
        # 'state_reset': reset_interval,
        # 'layer_count': layer_count,
        # 'use_embedding': use_embedding
    }
    print("PARAMS=", json.dumps(params, indent=4))

    def krmse(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

    def geh(y_true, y_pred):
        return K.sqrt(2 * K.pow(y_pred - y_true, 2) /
                      (y_pred + y_true)).mean(axis=-1)

    reg = L1L2(l1_reg, l2_reg)
    start = datetime.now()
    model = Sequential()
    # if conditional(use_embedding):
    #     model.add(Embedding())
    model.add(
        LSTM(int(lstm_size_1),
             batch_input_shape=(batch_size, lookback, fields),
             return_sequences=True,
             stateful=True,
             activity_regularizer=reg,
             bias_initializer='ones'))
    model.add(Dropout(l1_dropout))
    model.add(Activation('relu'))
    model.add(
        LSTM(int(lstm_size_2),
             return_sequences=True,
             bias_initializer='ones',
             stateful=True,
             activity_regularizer=reg))
    model.add(Dropout(l2_dropout))
    model.add(Activation('relu'))
    model.add(
        LSTM(int(lstm_size_3),
             bias_initializer='ones',
             stateful=True,
             activity_regularizer=reg))
    model.add(Dropout(l3_dropout))
    model.add(Activation('relu'))
    model.add(Dense(1, activation='relu'))

    terminate_cb = TerminateOnNaN()
    model.compile(loss='mse', optimizer=optimizer)
    try:
        model.fit(
            train_x,
            train_y,
            epochs=1,
            verbose=1,
            batch_size=batch_size,
            shuffle=False,
            callbacks=[terminate_cb],
        )
    except Exception as e:
        print(e)
        return {'status': STATUS_FAIL, 'msg': e}
    if terminate_cb.terminated:
        return {'status': STATUS_FAIL, 'msg': "Invalid loss"}
    # have it continue learning during this phase
    # split the test_x,test_y
    preds = []

    def group(iterable, n):
        it = iter(iterable)
        while True:
            chunk = tuple(itertools.islice(it, n))
            if not chunk:
                return
            yield chunk

    test_y_it = iter(group(test_y, batch_size))
    test_batch_idx = 0
    prog = tqdm(range(len(test_y / batch_size)), desc='Train ')
    for batch in group(test_x, batch_size):

        batch = np.array(batch)
        test_y_batch = np.array(next(test_y_it))
        model.train_on_batch(batch, test_y_batch)
        batch_preds = model.predict_on_batch(batch)[:, 0]
        preds.extend(batch_preds)
        test_batch_idx += 1
        prog.update()
        # if test_batch_idx % reset_interval == 0:
        #     model.reset_states()
    preds = np.array(preds)
    finish = datetime.now()
    preds_pad = np.zeros((preds.shape[0], fields))
    preds_pad[:, 0] = preds.flatten()
    test_y_pad = np.zeros((preds.shape[0], fields))
    test_y_pad[:, 0] = test_y.flatten()
    unscaled_pred = scaler.inverse_transform(preds_pad)
    unscaled_test_y = scaler.inverse_transform(test_y_pad)
    rmse_result = rmse(unscaled_pred, unscaled_test_y)[0]

    plot_x = np.arange(test_x.shape[0])
    dpi = 80
    width = 1920 / dpi
    height = 1080 / dpi
    plt.figure(figsize=(width, height), dpi=dpi)
    plt.plot(plot_x, unscaled_test_y[:, 0], color='b', label='Actual')
    plt.plot(plot_x, unscaled_pred[:, 0], color='r', label='Predictions')
    plt.legend()

    plt.title("LSTM Discrete Predictions at 115, SI 2\nRMSE:{}".format(
        round(rmse_result, 3)))
    plt.xlabel('Time')
    plt.ylabel('Flow')
    fig_name = 'model_{}.png'.format(time())
    plt.savefig(fig_name)
    plt.show()
    with open(fig_name, 'rb') as img_file:
        fig_b64 = base64.b64encode(img_file.read()).decode('ascii')

    return {
        'loss': rmse_result,
        'status': STATUS_OK,
        'model': model._updated_config(),
        'metrics': {
            'rmse': rmse_result,
            # 'geh': geh(unscaled_pred, unscaled_test_y)[0],
            'duration': (finish - start).total_seconds()
        },
        'figure': fig_b64,
        'params': params
    }
예제 #10
0
def model(X_train, Y_train, X_test, Y_test):
    img_rows, img_cols = 32, 32
    img_channels = 3
    nb_dim = 50
    nb_epoch=15#35#30

    dense_layer_size = {{choice([256, 512, 1024])}}
    objective = 'mse'
    #optimizer = {{choice(['rmsprop', 'adam', 'sgd'])}}
    optimizer = {{choice(['rmsprop', 'sgd'])}}
    batch_size = {{choice([32, 64, 128])}}
    num_conv1 = int({{quniform(24, 64, 1)}})
    num_conv2 = int({{quniform(32, 96, 1)}})
    size_conv1 = int({{quniform(2, 5, 1)}})
    size_conv2 = int({{quniform(2, 5, 1)}})
    early_dropout = {{uniform(0,.75)}}
    late_dropout = {{uniform(0,.75)}}
    data_augmentation = {{choice(['True','False'])}}
    final_activation = {{choice(['none','linear'])}}
    params = {'dense_layer_size':dense_layer_size,
              'optimizer':optimizer,
              'batch_size':batch_size,
              'num_conv1':num_conv1,
              'num_conv2':num_conv2,
              'size_conv1':size_conv1,
              'size_conv2':size_conv2,
              'final_activation':final_activation,
              'early_dropout':early_dropout,
              'late_dropout':late_dropout
             }
    if optimizer == 'sgd':
        learning_rate = {{loguniform(np.log(0.001),np.log(0.999))}}
        params['learning_rate'] = learning_rate

    if data_augmentation:
        more_augmentation = {{choice(['True','False'])}}
        params['more_augmentation'] = more_augmentation

    model = Sequential()


    model.add(Convolution2D(num_conv1, size_conv1, size_conv1, border_mode='same',
                            input_shape=(img_channels, img_rows, img_cols)))
    model.add(Activation('relu'))
    model.add(Convolution2D(num_conv1, size_conv1, size_conv1))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(early_dropout))

    model.add(Convolution2D(num_conv2, size_conv2, size_conv2, border_mode='same'))
    model.add(Activation('relu'))
    model.add(Convolution2D(num_conv2, size_conv2, size_conv2))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(early_dropout))

    model.add(Flatten())
    model.add(Dense(dense_layer_size))
    model.add(Activation('relu'))
    model.add(Dropout(late_dropout))
    model.add(Dense(nb_dim))

    if final_activation != 'none':
        model.add(Activation(final_activation))

    if optimizer == 'sgd':
        # let's train the model using SGD + momentum (how original).
        sgd = SGD(lr=learning_rate, decay=1e-6, momentum=0.9, nesterov=True)
        model.compile(loss=objective, optimizer=sgd)
    elif optimizer == 'rmsprop':
        model.compile(loss=objective, optimizer='rmsprop')
    else:
        model.compile(loss=objective, optimizer=optimizer)

    print(params)

    if not data_augmentation:
        print('Not using data augmentation.')
        history = model.fit(X_train, Y_train, batch_size=batch_size,
                  nb_epoch=nb_epoch, show_accuracy=True,
                  validation_data=(X_test, Y_test), shuffle=True)
    else:
        print('Using real-time data augmentation.')
        if more_augmentation:
            # this will do preprocessing and realtime data augmentation
            datagen = ImageDataGenerator(
                featurewise_center=True,  # set input mean to 0 over the dataset
                samplewise_center=False,  # set each sample mean to 0
                featurewise_std_normalization=True,  # divide inputs by std of the dataset
                samplewise_std_normalization=False,  # divide each input by its std
                zca_whitening=False,  # apply ZCA whitening
                rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
                width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
                height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
                horizontal_flip=True,  # randomly flip images
                vertical_flip=False)  # randomly flip images
        else:
            # this will do preprocessing and realtime data augmentation
            datagen = ImageDataGenerator(
                featurewise_center=False,  # set input mean to 0 over the dataset
                samplewise_center=False,  # set each sample mean to 0
                featurewise_std_normalization=False,  # divide inputs by std of the dataset
                samplewise_std_normalization=False,  # divide each input by its std
                zca_whitening=False,  # apply ZCA whitening
                rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
                width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
                height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
                horizontal_flip=True,  # randomly flip images
                vertical_flip=False)  # randomly flip images

        # compute quantities required for featurewise normalization
        # (std, mean, and principal components if ZCA whitening is applied)
        datagen.fit(X_train)

        # fit the model on the batches generated by datagen.flow()
        history = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),
                            samples_per_epoch=X_train.shape[0],
                            nb_epoch=nb_epoch, show_accuracy=True,
                            validation_data=(X_test, Y_test),
                            nb_worker=1)

    #score, acc = model.evaluate(X_test, Y_test, verbose=0)
    loss = model.evaluate(X_test, Y_test, verbose=0)
    print('Test loss:', loss)

    return {'loss': loss, 'status': STATUS_OK, 'params':params}
예제 #11
0
def create_model(x_train, y_train, x_test, y_test):
    """
    Model providing function:

    Create Keras model with double curly brackets dropped-in as needed.
    Return value has to be a valid python dictionary with two customary keys:
        - loss: Specify a numeric evaluation metric to be minimized
        - status: Just use STATUS_OK and see hyperopt documentation if not feasible
    The last one is optional, though recommended, namely:
        - model: specify the model just created so that we can later use it again.
    """
    ## NUMBER OF GPUS
    # GET NUMBER OF AVAILABLE GPUS
    CUDA_VISIBLE_DEVICES = getenv('CUDA_VISIBLE_DEVICES')
    if CUDA_VISIBLE_DEVICES is None:
        N_GPUS = 1
    else:
        N_GPUS = len(CUDA_VISIBLE_DEVICES.split(","))

    ## PARAMETERS
    params = utils.AttnParams()
    params["latent_dim"] = 60
    params["bottleneck"] = "average1"
    params["kl_pretrain_epochs"] = 1
    params["kl_anneal_epochs"] = 1
    params["batch_size"] = 50
    params["epochs"] = 4

    # model params to change
    d_model = {{quniform(60, 150, 4)}}
    d_inner_hid = {{quniform(128, 2048, 4)}}
    d_k = {{quniform(4, 30, 2)}}
    layers = {{quniform(1, 5, 1)}}

    params["d_model"] = int(d_model)
    params["d_inner_hid"] = int(d_inner_hid)
    params["d_k"] = int(d_k)
    params["layers"] = int(layers)
    params["pp_weight"] = 1.25
    # Automatically set params from above
    params["d_v"] = params["d_k"]
    params["heads"] = int(np.ceil(d_model / d_k))
    params.setIDparams()
    # GET TOKENS
    _, _, _, _, tokens = utils.load_dataset('data/zinc_100k.h5', "TRANSFORMER",
                                            True)

    model, result = trainTransformer(params,
                                     tokens=tokens,
                                     data_train=x_train,
                                     data_test=x_test,
                                     callbacks=["var_anneal"])

    # get the highest validation accuracy of the training epochs
    validation_acc = np.amax(result.history['val_acc'])
    output = PD(x_train[0],
                x_train[-1],
                num_seeds=200,
                num_decodings=3,
                model=model,
                beam_width=5)

    frac_valid = output["num_valid"] / output["num_mols"]
    print("With params:")
    params.dump()
    print("Validation acc:", validation_acc, "Fraction valid:", frac_valid)

    return {
        'loss': -frac_valid,
        'status': STATUS_OK,
        'model': model.autoencoder
    }