Beispiel #1
0
def NB(X, y, X_ind, y_ind):
    """Cross Validation and independent set test for Naive Bayes.

    Arguments:
        X (ndarray): Feature data of training and validation set for cross-validation.
                     m X n matrix, m is the No. of samples, n is the No. of fetures
        y (ndarray): Label data of training and validation set for cross-validation.
                     m-D vector, and m is the No. of samples.
        X_ind (ndarray): Feature data of independent test set for independent test.
                         It has the similar data structure as X.
        y_ind (ndarray): Feature data of independent set for for independent test.
                         It has the similar data structure as y
        out (str): The file path for saving the result data.

    Returns:
         cvs (ndarray): cross-validation results. The shape is (m, ), m is the No. of samples.
         inds (ndarray): independent test results. It has similar data structure as cvs.
    """
    folds = StratifiedKFold(5).split(X, y)
    cvs = np.zeros(y.shape)
    inds = np.zeros(y_ind.shape)
    for i, (trained, valided) in enumerate(folds):
        model = GaussianNB()
        model.fit(X[trained], y[trained])
        cvs[valided] = model.predict_proba(X[valided])[:, 1]
        inds += model.predict_proba(X_ind)[:, 1]
    return cvs, inds / 5
def randomLine():
    seed(1)
    alpha = 0.000003
    numOfSteps = 1000
    numOfLoops = 250
    model = LinearRegression()
    #generating feature x
    x = np.random.uniform(low=-4, high=6, size=(500, ))
    #generating coeffient and constant
    a = float(np.random.uniform(low=-5, high=10, size=(1, )))
    b = float(np.random.uniform(low=-5, high=5, size=(1, )))
    #adding gaussian noise
    noise = np.random.normal(0, 1, 500)
    #   labels
    y = []
    for i in range(len(x)):
        t = a * x[i] + b + noise[i]
        y.append(t)

    for i in range(len(x)):
        model.addSample(x[i], y[i])
    Samples = model.getSamples()
    Labels = model.getValues()
    for i in range(numOfLoops + 1):
        model.fit(alpha, numOfSteps)
        plt.show()
def main(args):
    model_id = build_model_id(args)

    model_path = build_model_path(args, model_id)

    setup_model_dir(args, model_path)

    if 'background' in args.mode:
        callback_logger = logging.info
        sys.stdout, sys.stderr = setup_logging(
            os.path.join(model_path, 'model.log'))
        verbose = 0
    else:
        callback_logger = callable_print
        verbose = 1

    json_cfg = load_model_json(args)
    json_cfg['model_path'] = model_path
    json_cfg['stdout'] = sys.stdout
    json_cfg['stderr'] = sys.stderr
    json_cfg['logger'] = callback_logger
    json_cfg['verbose'] = verbose

    config = ModelConfig(**json_cfg)

    if 'persistent' in args.mode:
        save_model_info(config, model_path)

    sys.path.append(args.model_dir)
    import model
    from model import fit

    model.fit(config)
Beispiel #4
0
def run(args, meta, model, callbacks, exp, id_=100, data=None):
    train_ds, val_ds, train_len, validation_len = prerun(args, meta, data)

    init_weights_path = Path(args["run_dir"], 'initial_model_weights.h5')
    if init_weights_path.exists():
        model.load_weights(str(init_weights_path))

    if not init_weights_path.exists():
        hist = model.fit(train_ds, epochs=1, steps_per_epoch=1)
        model.save_weights(str(init_weights_path))

    for i, cb in enumerate(callbacks):
        if type(cb) == my_callbacks.ValidationMonitor:
            cb.set(val_ds, validation_len, id_, exp)
        if type(cb) == my_callbacks.ImageLogger:
            cb.set_dataset(train_ds, len(args["channels"]))

    hist = model.fit(
        train_ds,
        epochs=args["epochs"],
        steps_per_epoch=int(np.ceil(train_len / args["batch_size"])),
        callbacks=callbacks,
        validation_data=val_ds,
        validation_steps=int(np.ceil(validation_len / args["batch_size"])))

    return hist
Beispiel #5
0
def train(model, training_data, callback=True, batch_size=256, epochs=10):

    (x_train, y_train), (x_test, y_test), mapping, nb_classes = training_data

    # convert class vectors to binary class matrices
    y_train = np_utils.to_categorical(y_train, nb_classes)
    y_test = np_utils.to_categorical(y_test, nb_classes)

    if callback == True:
        # Callback for analysis in TensorBoard
        tbCallBack = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)

    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(x_test, y_test),
              callbacks=[tbCallBack] if callback else None)

    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])

    # Offload model to file
    model_yaml = model.to_yaml()
    with open("bin/model.yaml", "w") as yaml_file:
        yaml_file.write(model_yaml)
    save_model(model, 'bin/model.h5')
Beispiel #6
0
def train_model(model, X_train, y_train, name, config, data):
    """train
    train a single model.

    # Arguments
        model: Model, NN model to train.
        X_train: ndarray(number, lags), Input data for train.
        y_train: ndarray(number, ), result data for train.
        name: String, name of model.
        config: Dict, parameter for train.
    """

    if name in ['lstm', 'gru', 'saes', 'cnn_lstm', 'en_1', 'en_2', 'en_3']:
        #model.compile(loss="mse", optimizer="rmsprop", metrics=['mape'])
        model.compile(loss="mse", optimizer="adam", metrics=['mse'])
        es = EarlyStopping(monitor='val_loss',
                           patience=10,
                           verbose=0,
                           mode='min')
        if data == "pems":
            mc = ModelCheckpoint('model_pems/' + name + '.h5',
                                 monitor='val_loss',
                                 mode='auto',
                                 verbose=1,
                                 save_best_only=True)
        elif data == "nyc":
            mc = ModelCheckpoint('model_nyc/' + name + '.h5',
                                 monitor='val_loss',
                                 mode='auto',
                                 verbose=1,
                                 save_best_only=True)

        hist = model.fit(X_train,
                         y_train,
                         batch_size=config["batch"],
                         epochs=config["epochs"],
                         validation_split=0.05,
                         callbacks=[es, mc])

        #model.save('model/' + name + '.h5')
        df = pd.DataFrame.from_dict(hist.history)
        if data == "pems":
            df.to_csv('model_pems/' + name + ' loss.csv',
                      encoding='utf-8',
                      index=False)
        elif data == "nyc":
            df.to_csv('model_nyc/' + name + ' loss.csv',
                      encoding='utf-8',
                      index=False)

    elif name == 'rf':
        model.fit(X_train, y_train)

        if data == "pems":
            with open('model_pems/' + name + '.h5', 'wb') as f:
                cPickle.dump(model, f)
        elif data == "nyc":
            with open('model_nyc/' + name + '.h5', 'wb') as f:
                cPickle.dump(model, f)
Beispiel #7
0
def train(args):


  # create model
  model = get_model(, n_dim, r, from_ckpt=False, train=True)

  # train model
  model.fit(X_train, Y_train, X_val, Y_val, n_epoch=args.epochs)
Beispiel #8
0
def train_model(model, train_input, train_target, mini_batch_size):
    criterion = MSELoss(model)
    opt = SGD(model, lr=0.05)
    model.fit(train_input,
              train_target,
              opt,
              criterion,
              mini_batch_size,
              epoch=250)
Beispiel #9
0
    def fit(self, model, x_train, y_train):

        if len(x_train) > 0:
            x_train = np.array(x_train)
            y_train = np.array(y_train)

            tensorBoard = self.k.callbacks.TensorBoard()

            learning_rate_reduction = self.k.callbacks.ReduceLROnPlateau(
                monitor='loss',
                patience=5,
                verbose=1,
                factor=0.5,
                min_lr=1e-09)

            datagen = self.k.preprocessing.image.ImageDataGenerator(
                rotation_range=1,
                # width_shift_range=0.01,
                # height_shift_range=0.01,
                # shear_range=0.01,
                # zoom_range=0.01,
                # horizontal_flip=True,
                fill_mode='nearest')

            print("x_train", x_train.shape)
            print("y_train", y_train.shape)

            if y_train.shape[0] == 0:
                print("Bad dataset")
                exit(0)

            # datagen.fit(x_train)

            # for i in range(self.c.epochs):
            # print("Epoch " + str(i+1) + '/' + str(self.c.epochs))
            # model.fit_generator(datagen.flow(x_train, y_train, batch_size=10),
            model.fit(
                x_train,
                y_train,
                batch_size=32,
                # workers=8,
                # steps_per_epoch=50,
                epochs=30,
                # validation_data=(x_train, y_train),
                # validation_data=(x_test, y_test),
                shuffle=True,
                verbose=1,
                callbacks=[learning_rate_reduction, tensorBoard]
                # callbacks=[tensorBoard]
            )

            tfHelper.save_model(model, "model")
        return model
Beispiel #10
0
def combined_model(data):
    '''
	uses arima and regression  
	'''
    cols = list(data.columns.values)
    a, b = data.shape
    Y = data.pop("Global_active_power")
    X = data

    model = LinearRegression()
    data1 = data['Global_reactive_power'].values
    data2 = data['Voltage'].values
    data3 = data['Global_intensity'].values
    data4 = data['Sub_metering_1'].values
    data5 = data['Sub_metering_2'].values
    data6 = data['Sub_metering_3'].values
    look_back = 1440
    full_forecast = pd.DataFrame()
    indi_forecast = []
    dataframes = [data1, data2, data3, data4, data5, data6]

    i = 0
    try:
        for num in dataframes:
            data = num
            print data
            fore = pd.DataFrame()
            for num in range(0, 100):
                try:
                    print 'arima'
                    model = ARIMA(data, order=(2, 1, 2))
                    model_fit = model.fit(disp=0)
                    print 'forecast'
                    output = model_fit.forecast()
                    yhat = output[0]
                    indi_forecast.append(yhat)
                except:
                    break
            i = i + 1
            fore = pd.DataFrame(indi_forecast)
            full_forecast = pd.concat([full_forecast, fore], axis=1)

        full_forecast = full_forecast.dropna()
        model.fit(X, Y)
        z = model.predict(full_forecast)
        print 'mse', mse(data[1:], z)

    except:
        print 'unable to predict long sequence'
Beispiel #11
0
def main(args):
    model_id = build_model_id(args)

    model_path = build_model_path(args, model_id)

    setup_model_dir(args, model_path)

    json_cfg = load_model_json(args, x_train=None, n_classes=None)
    config = ModelConfig(**json_cfg)

    sys.path.append(args.model_dir)
    import model
    from model import fit

    model.fit(config)
Beispiel #12
0
def simple_models(data):
    '''
	uses arima only on 'Global_active_power'
	'''
    data = data['Global_active_power']
    data = scale(data.values)

    #####
    #rolling mean
    ma = pd.rolling_mean(data, 12)
    ew_avg = pd.ewma(data, halflife=12)
    #ARIMA
    model = ARIMA(data, order=(2, 1, 2))
    result = model.fit(disp=-1)
    plt.plot(data)
    plt.plot(result.fittedvalues, color='red')
    plt.show()
    result = result.predict()
    print 'mean squared error', mse(data[1:], result)
    print 'rolling mean:', ma[10:20]
    print 'exponential weighted moving average', ew_avg[10:20]
    print 'ARIMA', result[10:20]

    pd.DataFrame(ma, columns=['Forcast']).to_csv('../Output/rolling_mean',
                                                 index=False)
    pd.DataFrame(ew_avg, columns=['Forcast']).to_csv(
        '../Output/exponential_weighted_moving_average', index=False)
    pd.DataFrame(result, columns=['Forcast']).to_csv('../Output/ARIMA',
                                                     index=False)
Beispiel #13
0
def rabndomLine():
    seed(1)
    alpha = 0.003
    numOfSteps = 1000
    numOfLoops = 100
    noise = 20
    noOfFeatures = 2
    model = LinearRegression()
    x = []
    #generationg the values for variables given in assignment
    x1 = np.random.uniform(low=0, high=1, size=(5000, ))
    x2 = np.random.uniform(low=0, high=1, size=(5000, ))
    #generationg the values for coeffients given in assignment
    a = float(np.random.uniform(low=-100, high=100, size=(1, )))
    b = float(np.random.uniform(low=-100, high=100, size=(1, )))
    c = float(np.random.uniform(low=-20, high=20, size=(1, )))
    #generationg the value for noise given in assignment
    delta = np.random.uniform(low=-100, high=100, size=(5000, ))
    #calculating the label
    y = [(a * x1[i] + b * x2[i] + c + delta[i]) for i in range(len(x1))]
    #array of features as (x1,x2)
    features = list(zip(x1, x2))

    for i in range(len(y)):
        model.addSample(features[i], y[i])
    Samples = model.getSamples(noOfFeatures)
    Labels = model.getValues()
    for i in range(numOfLoops + 1):
        current_Hypothesis, cost = model.fit(alpha, numOfSteps)
        print("Current hypothesis: ", current_Hypothesis, ",  cost = ",
              "{0:.4f}".format(cost))
Beispiel #14
0
def stateless_fit(model, X, y, Xtest, ytest, params):
    """
    Train the model passed as 1st argument, and return the train_loss
    X and Y Training values are passed.
    Parameters dictionary is also necessary.
    """
    for i in range(params['lstm_num_epochs']):
        model.fit(
            X, y,
            epochs=1,
            validation_data=(Xtest, ytest),
            verbose=params['keras_verbose_level'],
            shuffle=False,
            batch_size=params['lstm_batch_size'])
        model.reset_states()
    return model
def setPlane():

    alpha = 0.000000003
    numOfSteps = 1000
    numOfLoops = 10
    model = LinearRegression()
    noOfFeatures = 2
    x = list(range(0, 1000))
    #calculating points for points((x,2x),5x)and ((2x,x),4x)
    x1, y1 = x, [i * 2 for i in x]
    x2, y2 = [i * 2 for i in x], x
    x = x1 + x2
    y = y1 + y2
    #calculating z = x + 2y
    z = [(x[i] + 2 * y[i]) for i in range(len(x))]
    #array of features as (x1,x2)
    features = list(zip(x, y))

    for i in range(len(z)):
        model.addSample(features[i], z[i])
    Samples = model.getSamples(noOfFeatures)
    Labels = model.getValues()
    for i in range(numOfLoops + 1):
        current_Hypothesis, cost = model.fit(alpha, numOfSteps)
        print("Current hypothesis: ", current_Hypothesis, ",  cost = ",
              "{0:.4f}".format(cost))
Beispiel #16
0
def SVM(X, y, X_ind, y_ind, is_reg=False):
    """Cross Validation and independent set test for Support Vector Machine (SVM)

    Arguments:
        X (ndarray): Feature data of training and validation set for cross-validation.
                     m X n matrix, m is the No. of samples, n is the No. of fetures
        y (ndarray): Label data of training and validation set for cross-validation.
                     m-D vector, and m is the No. of samples.
        X_ind (ndarray): Feature data of independent test set for independent test.
                         It has the similar data structure as X.
        y_ind (ndarray): Feature data of independent set for for independent test.
                         It has the similar data structure as y
        out (str): The file path for saving the result data.
        is_reg (bool, optional): define the model for regression (True) or classification (False) (Default: False)

    Returns:
         cvs (ndarray): cross-validation results. The shape is (m, ), m is the No. of samples.
         inds (ndarray): independent test results. It has similar data structure as cvs.
    """
    if is_reg:
        folds = KFold(5).split(X)
        model = SVR()
    else:
        folds = StratifiedKFold(5).split(X, y)
        model = SVC(probability=True)
    cvs = np.zeros(y.shape)
    inds = np.zeros(y_ind.shape)
    gs = GridSearchCV(model, {
        'C': 2.0**np.array([-5, 15]),
        'gamma': 2.0**np.array([-15, 5])
    },
                      n_jobs=5)
    gs.fit(X, y)
    params = gs.best_params_
    print(params)
    for i, (trained, valided) in enumerate(folds):
        model = SVC(probability=True, C=params['C'], gamma=params['gamma'])
        model.fit(X[trained], y[trained])
        if is_reg:
            cvs[valided] = model.predict(X[valided])
            inds += model.predict(X_ind)
        else:
            cvs[valided] = model.predict_proba(X[valided])[:, 1]
            inds += model.predict_proba(X_ind)[:, 1]
    return cvs, inds / 5
Beispiel #17
0
def standard():
    payload = request.json
    if payload is None:
        return "invalid payload", 400

    user_request = codec.Request.fromdict(payload)
    fitted_model = model.fit(user_request)

    return jsonify(response(user_request, fitted_model).todict())
def setLine():

    alpha = 0.000000003
    numOfSteps = 100
    numOfLoops = 50
    model = LinearRegression()
    #feature
    x = list(range(0, 1000))
    #label
    y = x
    for i in x:
        y = i
        model.addSample(i, y)
    Samples = model.getSamples()
    Labels = model.getValues()
    for i in range(numOfLoops + 1):
        model.fit(alpha, numOfSteps)
        plt.show()
Beispiel #19
0
def main(train_track, prediction_track, instruments_num, test_track=None):
    """
    :param train_track: path to an audio file with known number of instruments in compositions
    :param train_track_annotaion: number of centroids
    :param test_track: path to an audio file with consistent instruments to test prediction.
    """
    _, train_mfccs = audio_to_mfcc(train_track)
    labels = tag_frames(train_mfccs, instruments_num)
    # model = build_model(64, train_mfccs.shape[1], labels.shape[1])
    model = build_cnn_model()  #train_mfccs.shape[1])
    fit(model, train_mfccs, labels)
    if test_track:
        _, test_mfccs = audio_to_mfcc(test_track)
        print(model.evaluate(test_mfccs, labels, batch_size=32))
    duration, predict_mfccs = audio_to_mfcc(prediction_track)
    prediction = model.predict(predict_mfccs, batch_size=32)
    consistent_samples = make_consistent_samples(prediction, duration, 0.2)
    print(consistent_samples)
Beispiel #20
0
def train_model(X_train, X_test, y_train, y_test, model):
    X_train = X_train.reshape(X_train.shape[0], 300, 300, 3)
    X_test = X_test.reshape(X_test.shape[0], 300, 300, 3)

    print("X_train.shape=", X_train.shape)
    print("y_train.shape", y_train.shape)

    print("X_test.shape=", X_test.shape)
    print("y_test.shape", y_test.shape)

    # print(y_train[0])
    '''
    softmax layer -> output=10개의 노드. 각각이 0부터 9까지 숫자를 대표하는 클래스 

    이를 위해서 y값을 one-hot encoding 표현법으로 변환
    0: 1,0,0,0,0,0,0,0,0,0
    1: 0,1,0,0,0,0,0,0,0,0
    ...
    5: 0,0,0,0,0,1,0,0,0,0
    '''
    # reformat via one-hot encoding
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    # print(y_train[0])

    # catergorical_crossentropy = using when multi classficiation
    # metrics = output data type
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # batch_size : see  batch_size data and set delta in gradient decsending
    history = model.fit(X_train,
                        y_train,
                        validation_data=(X_test, y_test),
                        batch_size=16,
                        epochs=30,
                        verbose=1)

    plot_loss_curve(history.history)

    # print(history.history)
    print("train loss=", history.history['loss'][-1])
    print("validation loss=", history.history['val_loss'][-1])

    # save model in file
    # offering in KERAS
    model.save('model-201611263.model')

    history_df = pd.DataFrame(history.history)
    with open("history_data.csv", mode='w') as file:
        history_df.to_csv(file)

    return model
Beispiel #21
0
def run(train_data, valid_data, len_size, scale, EPOCHS, root_path='./', load_model_dir=None, saved_model_dir=None, log_dir=None, summary=False):
    if log_dir is None:
        log_dir = os.path.join(root_path, 'our_model', 'logs', 'model')
    logging.info(train_data)
    logging.info(valid_data)
    # get generator model and discriminator model
    Gen = model.make_generator_model(len_high_size=len_size, scale=scale)
    Dis = model.make_discriminator_model(len_high_size=len_size, scale=scale)
    if load_model_dir is not None:
    #load_model_dir = os.path.join(root_path, 'our_model', 'saved_model')
        file_path = os.path.join(load_model_dir, 'gen_model_'+str(len_size), 'gen_weights')
        if os.path.exists(file_path):
            Gen.load_weights(file_path)
        else:
            logging.info("generator doesn't exist. create a new one.")
        file_path = os.path.join(load_model_dir, 'dis_model_'+str(len_size), 'dis_weights')
        if os.path.exists(file_path):
            Dis.load_weights(file_path)
        else:
            logging.info("discriminator model doesn't exist. create a new one")

    if summary:
        logging.info(Gen.summary())
        tf.keras.utils.plot_model(Gen, to_file='G.png', show_shapes=True)
        logging.info(Dis.summary())
        tf.keras.utils.plot_model(Dis, to_file='D.png', show_shapes=True)

    if saved_model_dir is None:
        saved_model_dir = os.path.join(root_path, 'our_model', 'saved_model')

    model.fit(Gen, Dis, train_data, EPOCHS, len_size, scale,
                valid_data, log_dir=log_dir, saved_model_dir=saved_model_dir)

    file_path = os.path.join(
        saved_model_dir, 'gen_model_'+str(len_size), 'gen_weights')
    Gen.save_weights(file_path)

    file_path = os.path.join(
        saved_model_dir, 'dis_model_'+str(len_size), 'dis_weights')
    Dis.save_weights(file_path)
Beispiel #22
0
    def _test_train(self):
        d = model.MyDriver()
        (X_train, y_train) = d.load_example_training_data(
            examples=10, width=8,
            height=8)  # //d.load_traing_data("../data/driving_log.csv"))
        model = d.simple_network(8, 8)
        model.fit(X_train,
                  y_train,
                  validation_split=0.0,
                  shuffle=True,
                  nb_epoch=100,
                  verbose=1)
        if 0:
            model.save('test_train.md5')
            model = keras.models.load_model('test_train.md5')
        y = model.predict(X_train)
        y_index = np.argmax(y, 1)
        print(y_index)

        for i in range(0, 10):
            assert (y_index[i] == i)
        return
def train_model(model, X_train, y_train, name, config):
    model.compile(loss="mse", optimizer="adadelta", metrics=['mape'])
    # early = EarlyStopping(monitor='val_loss', patience=30, verbose=0, mode='auto')
    hist = model.fit(X_train,
                     y_train,
                     batch_size=config["batch"],
                     epochs=config["epochs"],
                     validation_split=0.1)  # 训练中

    model.save('models/' + name + '.h5')
    # df = pd.DataFrame.from_dict(hist.history)
    # df.to_csv('models/' + name + ' loss.csv', encoding='utf-8', index=False)
    return model
Beispiel #24
0
def train_model(model,
                x_train,
                y_train,
                out_dir,
                validation_data,
                n_epochs,
                batch_size,
                learning_rate,
                loss="binary_crossentropy",
                early_stopping=True,
                save_checkpoint=True,
                verbose=1,
                ckpt_name_prefix=""):
    print("Model summary:")
    print(model.model.summary())
    callbacks = []
    if save_checkpoint:
        # save the model at every epoch. 'val_loss' is the monitored quantity.
        # If save_best_only=True, the model with the best monitored quantity is not overwitten.
        # If save_weights_only=True, only the model weights are saved calling the method model.save_weights
        checkpoint = ModelCheckpoint(os.path.join(
            out_dir, ckpt_name_prefix + ".{epoch:02d}-{val_loss:.3f}.hdf5"),
                                     verbose=verbose,
                                     monitor='val_loss',
                                     save_weights_only=True,
                                     save_best_only=True)
        callbacks.append(checkpoint)
    if early_stopping:
        # Training stops when the monitored quantity (val_loss) stops improving.
        # patience is the number of epochs with no improvement after which training is stopped.
        stopping = EarlyStopping(monitor="val_loss",
                                 min_delta=0,
                                 patience=6,
                                 verbose=verbose,
                                 mode='auto')
        callbacks.append(stopping)
    adam = Adagrad(lr=learning_rate, epsilon=1e-08, decay=0.0, clipnorm=1.)
    model.compile(metrics=[], optimizer=adam, loss=loss)
    print("Training of model '%s' started." % model.model_name)
    start_time = time.time()
    history = model.fit(x_train,
                        y_train,
                        validation_data=validation_data,
                        n_epochs=n_epochs,
                        batch_size=batch_size,
                        callbacks=callbacks,
                        verbose=verbose)
    print("Training of model '%s' finished in %s." %
          (model.model_name,
           time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))))
    return history
Beispiel #25
0
def fit(model, X, y, Xtest, ytest, params):
    """
    Train the model passed as 1st argument, and return the train_loss
    X and Y Training values are passed.
    Parameters dictionary is also necessary.
    """
    train_loss = model.fit(
        X, y,
        validation_data=(Xtest, ytest),
        verbose=params['keras_verbose_level'],
        shuffle=params['lstm_shuffle'],
        batch_size=params['lstm_batch_size'],
        epochs=params['lstm_num_epochs'])
    return train_loss
Beispiel #26
0
def test():

    #some data

    X_train, X_test, y_train, y_test, index_train, index_test = dutil.load_titanic()

    X_train = X_train.astype(numpy.float64)

    y_train = y_train.reshape(1,y_train.shape[0])[0].astype(numpy.int32)

    X_test = X_test.astype(numpy.float64)

    y_test = y_test.reshape(1,y_test.shape[0])[0].astype(numpy.int32)

    #train

    model = Classification()

    model.add(dense.DenseLayer(7, 20, name="hiddenLayer"))

    model.add(dense.DenseLayer(20, 2, name="outputLayer", W_init=defa, activation=softmax, learning_rate=0.001))

    model.fit(X_train, X_test, y_train, y_test,)
Beispiel #27
0
def RF(X, y, X_ind, y_ind, is_reg=False):
    """Cross Validation and independent set test for Random Forest model

    Arguments:
        X (ndarray): Feature data of training and validation set for cross-validation.
                     m X n matrix, m is the No. of samples, n is the No. of fetures
        y (ndarray): Label data of training and validation set for cross-validation.
                     m-D vector, and m is the No. of samples.
        X_ind (ndarray): Feature data of independent test set for independent test.
                         It has the similar data structure as X.
        y_ind (ndarray): Feature data of independent set for for independent test.
                         It has the similar data structure as y
        out (str): The file path for saving the result data.
        is_reg (bool, optional): define the model for regression (True) or classification (False) (Default: False)

    Returns:
         cvs (ndarray): cross-validation results. The shape is (m, ), m is the No. of samples.
         inds (ndarray): independent test results. It has similar data structure as cvs.
        """
    if is_reg:
        folds = KFold(5).split(X)
        alg = RandomForestRegressor
    else:
        folds = StratifiedKFold(5).split(X, y)
        alg = RandomForestClassifier
    cvs = np.zeros(y.shape)
    inds = np.zeros(y_ind.shape)
    for i, (trained, valided) in enumerate(folds):
        model = alg(n_estimators=500, n_jobs=1)
        model.fit(X[trained], y[trained])
        if is_reg:
            cvs[valided] = model.predict(X[valided])
            inds += model.predict(X_ind)
        else:
            cvs[valided] = model.predict_proba(X[valided])[:, 1]
            inds += model.predict_proba(X_ind)[:, 1]
    return cvs, inds / 5
def randomDimension(noOfFeatures):

    seed(1)
    alpha = 0.00003
    numOfSteps = 1000
    numOfLoops = 100
    noise = 20
    model = LinearRegression()
    examples = 5000
    #array of coefficient
    t = []
    #array of all features of each 5000 examples
    x = []
    # array of r - label
    r = []
    #array of product of coefficient and x for each dimention
    tx = []
    #array of features as (x1,x2...xn)
    features = []
    #generating random variable for each coefficient t0 in assignment
    t0 = float(np.random.uniform(low=-100, high=100, size=(1, )))
    for i in range(1, noOfFeatures + 1):
        #generating random variable for each coefficient t in assignment
        a = float(np.random.uniform(low=-100, high=100, size=(1, )))
        t.append(a)
        #generating random variable for each feature x in assignment
        x1 = np.random.uniform(low=0, high=1, size=(5000, ))
        x.append(list(x1))
        #poduct of coefficient and x
        p = list(a * x1)
        tx.append(p)

    for j in range(examples):
        #summation of poduct of coefficient and x for each sample
        z = sum(i[j] for i in tx)
        #adding noise and t0 to find r to the summation
        y = z + t0 + noise
        r.append(y)
        g = [i[j] for i in x]
        features.append(g)

    for i in range(len(r)):
        model.addSample(features[i], r[i])
    Samples = model.getSamples(noOfFeatures)
    Labels = model.getValues()
    for i in range(numOfLoops + 1):
        current_Hypothesis, cost = model.fit(alpha, numOfSteps)
        print("Current hypothesis: ", current_Hypothesis, ",  cost = ",
              "{0:.4f}".format(cost))
Beispiel #29
0
def main():
    model = M.Network()

    training_msgs = np.array([[0, 0, 0], [1, 1, 1], [0, 1, 0], [1, 0, 1],
                              [1, 1, 0], [0, 0, 1], [0, 1, 1], [1, 0, 0]])
    X = one_hot_encoding(training_msgs.copy())
    y = encode_seqs(training_msgs.copy())

    validation_msgs = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                [1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0],
                                [0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
                                [1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]])
    Xv = one_hot_encoding(validation_msgs.copy())
    yv = encode_seqs(validation_msgs.copy())

    history = M.fit(model, X, y, validation_data=(Xv, yv), epochs=300)
Beispiel #30
0
def run_holts(train,
              validate,
              target_variable,
              exponential,
              smoothing_level=.1,
              smoothing_slope=.1):
    # Create model object
    model = Holt(train[target_variable], exponential=exponential)

    # Fit model
    model = model.fit(smoothing_level=smoothing_level,
                      smoothing_slope=smoothing_slope,
                      optimized=False)

    # Create predictions
    y_pred = model.predict(start=validate.index[0], end=validate.index[-1])

    return model, y_pred
Beispiel #31
0
def setLine():

    alpha = 0.000000003
    numOfSteps = 100
    numOfLoops = 50
    model = LinearRegression()
    #feature
    x = list(range(0, 1000))
    #label
    y = x
    for i in x:
        y = i
        model.addSample(i, y)
    Samples = model.getSamples()
    Labels = model.getValues()
    for i in range(numOfLoops + 1):
        current_Hypothesis, cost = model.fit(alpha, numOfSteps)
        print("Current hypothesis: ", current_Hypothesis, ",  cost = ",
              "{0:.4f}".format(cost))
Beispiel #32
0
def sydney():
    payload = request.json
    if payload is None:
        return "invalid payload", 400


#    with open("/tmp/request.json", "w") as file:
#        json.dump(payload, file)

    user_request = codec.Request.fromdict(payload)

    # TODO: merge this once we have enough Loop data.

    with open("sydney2019-11-20.json") as file:
        canned_payload = json.load(file)
        canned_request = codec.Request.fromdict(canned_payload)
        user_request.timeseries = canned_request.timeseries

    fitted_model = model.fit(user_request)

    return jsonify(response(user_request, fitted_model).todict())
Beispiel #33
0
def exp(regressor, pca_n_components=None, **regressor_param):
	df = preprocess()
	df = df.iloc[:, 1:]
	#print(df.head())
	sample_num = df.shape[0]
	val_pred = []
	
	for row in range(sample_num):
		val = df.iloc[[row]]
		train = df.drop([row])
		if pca_n_components != None:
			train, val = apply_pca(train, val, n_components=pca_n_components)
		model = fit(regressor, train, **regressor_param)
		pred = predict(model, val)
		val_pred.append(pred[0])
			
	real_label = df.iloc[:, 0].values
	val_pred = np.array(val_pred)
	r2 = pearson_r_square(val_pred, real_label)
	print("pearson r square: {}".format(r2))
	roc_data = calculate_SSA(val_pred, real_label, 5, 2, 10, 0.1)
	auc = np.trapz(roc_data[::-1, 1], roc_data[::-1, 2])
	print("AUC: {}".format(auc))
	plot_corr2(val_pred, real_label, r2, auc, roc_data)
Beispiel #34
0
    print str(err) # will print something like "option -a not recognized"
    usage(name)
    sys.exit(2)
ranking = False
for o,a in optlist:
    if o == '--ranking':
        ranking = True
    else:
        assert False, "unhandled option"
if len(args) < 3:
    usage(name)
    sys.exit(2)
ml_type = args[0]
ml_param_str = args[1]
model_path = args[2]

print >> sys.stderr, "Reading the data..."
in_data = VowpalWabbitData(ranking=ranking)
(X_all, Y, tags) = in_data.read(sys.stdin)

X = []
for x in X_all:
    X += x

print >> sys.stderr, "Building the model..."
h = HTMLParser.HTMLParser()
params = h.unescape(ml_param_str)
model = model.Model(ml_type,params)
model.fit(X,Y)
model.save(model_path)
Beispiel #35
0
	# 						   word_embedding_matrix, NB_FILTER)
	# model = model.buildLstmPool(nb_words, word_embedding_matrix ,MAX_SEQUENCE_LENGTH)
	model = model.LSTM3(nb_words, word_embedding_matrix, MAX_SEQUENCE_LENGTH)
	# model = model.BiLSTM(nb_words, word_embedding_matrix, MAX_SEQUENCE_LENGTH)
	# model = model.BiLstmPool(nb_words, word_embedding_matrix, MAX_SEQUENCE_LENGTH, POOL_LENGTH)

	model.compile(loss='categorical_crossentropy', optimizer='adagrad',  # adam
				  metrics=['accuracy'])
	model.summary()  # 打印出模型概况
	callbacks = [ModelCheckpoint(MODEL_WEIGHTS_FILE,
								 monitor='val_acc', save_best_only=True)]

	t0 = time.time()
	history = model.fit(X_train, train_label,
						batch_size=BATCH_SIZE,
						verbose=1,
						validation_split=VALIDATION_SPLIT, # (X_test, test_label)
						callbacks=callbacks,
						nb_epoch=NB_EPOCHS)
	t1 = time.time()
	print("Minutes elapsed: %f" % ((t1 - t0) / 60.))

	# 将模型和权重保存到指定路径
	model.save(model_path)
	# 加载权重到当前模型
	# model = load_model(model_path)

	# Print best validation accuracy and epoch in valid_set
	max_val_acc, idx = max((val, idx) for (idx, val) in enumerate(history.history['val_acc']))
	print('Maximum accuracy at epoch', '{:d}'.format(idx + 1), '=', '{:.4f}'.format(max_val_acc))

	# plot the result
Beispiel #36
0
def main(args):
    model_id = build_model_id(args)
    model_path = build_model_path(args, model_id)
    setup_model_dir(args, model_path)
    sys.stdout, sys.stderr = setup_logging(args, model_path)

    x_train, y_train = load_model_data(args.train_file,
            args.data_name, args.target_name)
    x_validation, y_validation = load_model_data(
            args.validation_file,
            args.data_name, args.target_name)

    rng = np.random.RandomState(args.seed)

    if args.n_classes > -1:
        n_classes = args.n_classes
    else:
        n_classes = max(y_train)+1

    n_classes, target_names, class_weight = load_target_data(args, n_classes)

    if len(class_weight) == 0:
        n_samples = len(y_train)
        print('n_samples', n_samples)
        print('classes', range(n_classes))
        print('weights', n_samples / (n_classes * np.bincount(y_train)))
        class_weight = dict(zip(range(n_classes),
            n_samples / (n_classes * np.bincount(y_train))))
    print('class_weight', class_weight)

    logging.debug("n_classes {0} min {1} max {2}".format(
        n_classes, min(y_train), max(y_train)))

    y_train_one_hot = np_utils.to_categorical(y_train, n_classes)
    y_validation_one_hot = np_utils.to_categorical(y_validation, n_classes)

    logging.debug("y_train_one_hot " + str(y_train_one_hot.shape))
    logging.debug("x_train " + str(x_train.shape))

    min_vocab_index = np.min(x_train)
    max_vocab_index = np.max(x_train)
    logging.debug("min vocab index {0} max vocab index {1}".format(
        min_vocab_index, max_vocab_index))

    json_cfg = load_model_json(args, x_train, n_classes)

    logging.debug("loading model")

    sys.path.append(args.model_dir)
    import model
    from model import build_model

    #######################################################################      
    # Subsetting
    #######################################################################      
    if args.subsetting_function:
        subsetter = getattr(model, args.subsetting_function)
    else:
        subsetter = None

    def take_subset(subsetter, path, x, y, y_one_hot, n):
        if subsetter is None:
            return x[0:n], y[0:n], y_one_hot[0:n]
        else:
            mask = subsetter(path)
            idx = np.where(mask)[0]
            idx = idx[0:n]
        return x[idx], y[idx], y_one_hot[idx]

    x_train, y_train, y_train_one_hot = take_subset(
            subsetter, args.train_file,
            x_train, y_train, y_train_one_hot,
            n=args.n_train)

    x_validation, y_validation, y_validation_one_hot = take_subset(
            subsetter, args.validation_file,
            x_validation, y_validation, y_validation_one_hot,
            n=args.n_validation)

    #######################################################################      
    # Preprocessing
    #######################################################################      
    if args.preprocessing_class:
        preprocessor = getattr(model, args.preprocessing_class)(seed=args.seed)
    else:
        preprocessor = modeling.preprocess.NullPreprocessor()

    logging.debug("y_train_one_hot " + str(y_train_one_hot.shape))
    logging.debug("x_train " + str(x_train.shape))

    model_cfg = ModelConfig(**json_cfg)
    logging.info("model_cfg " + str(model_cfg))
    model = build_model(model_cfg)
    setattr(model, 'stop_training', False)

    logging.info('model has {n_params} parameters'.format(
        n_params=count_parameters(model)))

    if len(args.extra_train_file) > 1:
        callbacks = keras.callbacks.CallbackList()
    else:
        callbacks = []

    save_model_info(args, model_path, model_cfg)

    if not args.no_save:
        if args.save_all_checkpoints:
            filepath = model_path + '/model-{epoch:04d}.h5'
        else:
            filepath = model_path + '/model.h5'
        callbacks.append(ModelCheckpoint(
            filepath=filepath,
            verbose=1,
            save_best_only=not args.save_every_epoch))

    callback_logger = logging.info if args.log else callable_print

    if args.n_epochs < sys.maxsize:
        # Number of epochs overrides patience.  If the number of epochs
        # is specified on the command line, the model is trained for
        # exactly that number; otherwise, the model is trained with
        # early stopping using the patience specified in the model 
        # configuration.
        callbacks.append(EarlyStopping(
            monitor='val_loss', patience=model_cfg.patience, verbose=1))

    if args.classification_report:
        cr = ClassificationReport(x_validation, y_validation,
                callback_logger,
                target_names=target_names)
        callbacks.append(cr)

    if model_cfg.optimizer == 'SGD':
        callbacks.append(SingleStepLearningRateSchedule(patience=10))

    if len(args.extra_train_file) > 1:
        args.extra_train_file.append(args.train_file)
        logging.info("Using the following files for training: " +
                ','.join(args.extra_train_file))

        train_file_iter = itertools.cycle(args.extra_train_file)
        current_train = args.train_file

        callbacks._set_model(model)
        callbacks.on_train_begin(logs={})

        epoch = batch = 0

        while True:
            x_train, y_train_one_hot = preprocessor.fit_transform(
                    x_train, y_train_one_hot)
            x_validation, y_validation_one_hot = preprocessor.transform(
                    x_validation, y_validation_one_hot)

            iteration = batch % len(args.extra_train_file)

            logging.info("epoch {epoch} iteration {iteration} - training with {train_file}".format(
                    epoch=epoch, iteration=iteration, train_file=current_train))
            callbacks.on_epoch_begin(epoch, logs={})

            n_train = x_train.shape[0]

            callbacks.on_batch_begin(batch, logs={'size': n_train})

            index_array = np.arange(n_train)
            if args.shuffle:
                rng.shuffle(index_array)

            batches = keras.models.make_batches(n_train, model_cfg.batch_size)
            logging.info("epoch {epoch} iteration {iteration} - starting {n_batches} batches".format(
                    epoch=epoch, iteration=iteration, n_batches=len(batches)))

            avg_train_loss = avg_train_accuracy = 0.
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]

                if isinstance(model, keras.models.Graph):
                    data = {
                            'input': x_train[batch_ids],
                            'output': y_train_one_hot[batch_ids]
                            }
                    train_loss = model.train_on_batch(data, class_weight=class_weight)
                    train_accuracy = 0.
                else:
                    train_loss, train_accuracy = model.train_on_batch(
                            x_train[batch_ids], y_train_one_hot[batch_ids],
                            accuracy=True, class_weight=class_weight)

                batch_end_logs = {'loss': train_loss, 'accuracy': train_accuracy}

                avg_train_loss = (avg_train_loss * batch_index + train_loss)/(batch_index + 1)
                avg_train_accuracy = (avg_train_accuracy * batch_index + train_accuracy)/(batch_index + 1)

                callbacks.on_batch_end(batch,
                        logs={'loss': train_loss, 'accuracy': train_accuracy})

            logging.info("epoch {epoch} iteration {iteration} - finished {n_batches} batches".format(
                    epoch=epoch, iteration=iteration, n_batches=len(batches)))

            logging.info("epoch {epoch} iteration {iteration} - loss: {loss} - acc: {acc}".format(
                    epoch=epoch, iteration=iteration, loss=avg_train_loss, acc=avg_train_accuracy))

            batch += 1

            # Validation frequency (this if-block) doesn't necessarily
            # occur in the same iteration as beginning of an epoch
            # (next if-block), so model.evaluate appears twice here.
            kwargs = { 'verbose': 0 if args.log else 1 }
            pargs = []
            validation_data = {}
            if isinstance(model, keras.models.Graph):
                validation_data = {
                        'input': x_validation,
                        'output': y_validation_one_hot
                        }
                pargs = [validation_data]
            else:
                pargs = [x_validation, y_validation_one_hot]
                kwargs['show_accuracy'] = True

            if (iteration + 1) % args.validation_freq == 0:
                if isinstance(model, keras.models.Graph):
                    val_loss = model.evaluate(*pargs, **kwargs)
                    y_hat = model.predict(validation_data)
                    val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1))
                else:
                    val_loss, val_acc = model.evaluate(
                            *pargs, **kwargs)
                logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format(
                        epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc))
                epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc}
                callbacks.on_epoch_end(epoch, epoch_end_logs)

            if batch % len(args.extra_train_file) == 0:
                if isinstance(model, keras.models.Graph):
                    val_loss = model.evaluate(*pargs, **kwargs)
                    y_hat = model.predict(validation_data)
                    val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1))
                else:
                    val_loss, val_acc = model.evaluate(
                            *pargs, **kwargs)
                logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format(
                        epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc))
                epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc}
                epoch += 1
                callbacks.on_epoch_end(epoch, epoch_end_logs)

            if model.stop_training:
                logging.info("epoch {epoch} iteration {iteration} - done training".format(
                    epoch=epoch, iteration=iteration))
                break

            current_train = next(train_file_iter)
            x_train, y_train = load_model_data(current_train,
                    args.data_name, args.target_name)
            y_train_one_hot = np_utils.to_categorical(y_train, n_classes)

            if epoch > args.n_epochs:
                break

        callbacks.on_train_end(logs={})
    else:
        x_train, y_train_one_hot = preprocessor.fit_transform(
                x_train, y_train_one_hot)
        x_validation, y_validation_one_hot = preprocessor.transform(
                x_validation, y_validation_one_hot)
        if isinstance(model, keras.models.Graph):
            data = {
                    'input': x_train,
                    'output': y_train_one_hot
                    }
            validation_data = {
                    'input': x_validation,
                    'output': y_validation_one_hot
                    }
            model.fit(data,
                shuffle=args.shuffle,
                nb_epoch=args.n_epochs,
                batch_size=model_cfg.batch_size,
                validation_data=validation_data,
                callbacks=callbacks,
                class_weight=class_weight,
                verbose=2 if args.log else 1)
            y_hat = model.predict(validation_data)
            print('val_acc %.04f' % 
                    accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1)))
        else:
            model.fit(x_train, y_train_one_hot,
                shuffle=args.shuffle,
                nb_epoch=args.n_epochs,
                batch_size=model_cfg.batch_size,
                show_accuracy=True,
                validation_data=(x_validation, y_validation_one_hot),
                callbacks=callbacks,
                class_weight=class_weight,
                verbose=2 if args.log else 1)
Beispiel #37
0
def main():
    init()
    fit()
    predict(isOffline=ISOFFLINE)