def NB(X, y, X_ind, y_ind): """Cross Validation and independent set test for Naive Bayes. Arguments: X (ndarray): Feature data of training and validation set for cross-validation. m X n matrix, m is the No. of samples, n is the No. of fetures y (ndarray): Label data of training and validation set for cross-validation. m-D vector, and m is the No. of samples. X_ind (ndarray): Feature data of independent test set for independent test. It has the similar data structure as X. y_ind (ndarray): Feature data of independent set for for independent test. It has the similar data structure as y out (str): The file path for saving the result data. Returns: cvs (ndarray): cross-validation results. The shape is (m, ), m is the No. of samples. inds (ndarray): independent test results. It has similar data structure as cvs. """ folds = StratifiedKFold(5).split(X, y) cvs = np.zeros(y.shape) inds = np.zeros(y_ind.shape) for i, (trained, valided) in enumerate(folds): model = GaussianNB() model.fit(X[trained], y[trained]) cvs[valided] = model.predict_proba(X[valided])[:, 1] inds += model.predict_proba(X_ind)[:, 1] return cvs, inds / 5
def randomLine(): seed(1) alpha = 0.000003 numOfSteps = 1000 numOfLoops = 250 model = LinearRegression() #generating feature x x = np.random.uniform(low=-4, high=6, size=(500, )) #generating coeffient and constant a = float(np.random.uniform(low=-5, high=10, size=(1, ))) b = float(np.random.uniform(low=-5, high=5, size=(1, ))) #adding gaussian noise noise = np.random.normal(0, 1, 500) # labels y = [] for i in range(len(x)): t = a * x[i] + b + noise[i] y.append(t) for i in range(len(x)): model.addSample(x[i], y[i]) Samples = model.getSamples() Labels = model.getValues() for i in range(numOfLoops + 1): model.fit(alpha, numOfSteps) plt.show()
def main(args): model_id = build_model_id(args) model_path = build_model_path(args, model_id) setup_model_dir(args, model_path) if 'background' in args.mode: callback_logger = logging.info sys.stdout, sys.stderr = setup_logging( os.path.join(model_path, 'model.log')) verbose = 0 else: callback_logger = callable_print verbose = 1 json_cfg = load_model_json(args) json_cfg['model_path'] = model_path json_cfg['stdout'] = sys.stdout json_cfg['stderr'] = sys.stderr json_cfg['logger'] = callback_logger json_cfg['verbose'] = verbose config = ModelConfig(**json_cfg) if 'persistent' in args.mode: save_model_info(config, model_path) sys.path.append(args.model_dir) import model from model import fit model.fit(config)
def run(args, meta, model, callbacks, exp, id_=100, data=None): train_ds, val_ds, train_len, validation_len = prerun(args, meta, data) init_weights_path = Path(args["run_dir"], 'initial_model_weights.h5') if init_weights_path.exists(): model.load_weights(str(init_weights_path)) if not init_weights_path.exists(): hist = model.fit(train_ds, epochs=1, steps_per_epoch=1) model.save_weights(str(init_weights_path)) for i, cb in enumerate(callbacks): if type(cb) == my_callbacks.ValidationMonitor: cb.set(val_ds, validation_len, id_, exp) if type(cb) == my_callbacks.ImageLogger: cb.set_dataset(train_ds, len(args["channels"])) hist = model.fit( train_ds, epochs=args["epochs"], steps_per_epoch=int(np.ceil(train_len / args["batch_size"])), callbacks=callbacks, validation_data=val_ds, validation_steps=int(np.ceil(validation_len / args["batch_size"]))) return hist
def train(model, training_data, callback=True, batch_size=256, epochs=10): (x_train, y_train), (x_test, y_test), mapping, nb_classes = training_data # convert class vectors to binary class matrices y_train = np_utils.to_categorical(y_train, nb_classes) y_test = np_utils.to_categorical(y_test, nb_classes) if callback == True: # Callback for analysis in TensorBoard tbCallBack = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test), callbacks=[tbCallBack] if callback else None) score = model.evaluate(x_test, y_test, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) # Offload model to file model_yaml = model.to_yaml() with open("bin/model.yaml", "w") as yaml_file: yaml_file.write(model_yaml) save_model(model, 'bin/model.h5')
def train_model(model, X_train, y_train, name, config, data): """train train a single model. # Arguments model: Model, NN model to train. X_train: ndarray(number, lags), Input data for train. y_train: ndarray(number, ), result data for train. name: String, name of model. config: Dict, parameter for train. """ if name in ['lstm', 'gru', 'saes', 'cnn_lstm', 'en_1', 'en_2', 'en_3']: #model.compile(loss="mse", optimizer="rmsprop", metrics=['mape']) model.compile(loss="mse", optimizer="adam", metrics=['mse']) es = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min') if data == "pems": mc = ModelCheckpoint('model_pems/' + name + '.h5', monitor='val_loss', mode='auto', verbose=1, save_best_only=True) elif data == "nyc": mc = ModelCheckpoint('model_nyc/' + name + '.h5', monitor='val_loss', mode='auto', verbose=1, save_best_only=True) hist = model.fit(X_train, y_train, batch_size=config["batch"], epochs=config["epochs"], validation_split=0.05, callbacks=[es, mc]) #model.save('model/' + name + '.h5') df = pd.DataFrame.from_dict(hist.history) if data == "pems": df.to_csv('model_pems/' + name + ' loss.csv', encoding='utf-8', index=False) elif data == "nyc": df.to_csv('model_nyc/' + name + ' loss.csv', encoding='utf-8', index=False) elif name == 'rf': model.fit(X_train, y_train) if data == "pems": with open('model_pems/' + name + '.h5', 'wb') as f: cPickle.dump(model, f) elif data == "nyc": with open('model_nyc/' + name + '.h5', 'wb') as f: cPickle.dump(model, f)
def train(args): # create model model = get_model(, n_dim, r, from_ckpt=False, train=True) # train model model.fit(X_train, Y_train, X_val, Y_val, n_epoch=args.epochs)
def train_model(model, train_input, train_target, mini_batch_size): criterion = MSELoss(model) opt = SGD(model, lr=0.05) model.fit(train_input, train_target, opt, criterion, mini_batch_size, epoch=250)
def fit(self, model, x_train, y_train): if len(x_train) > 0: x_train = np.array(x_train) y_train = np.array(y_train) tensorBoard = self.k.callbacks.TensorBoard() learning_rate_reduction = self.k.callbacks.ReduceLROnPlateau( monitor='loss', patience=5, verbose=1, factor=0.5, min_lr=1e-09) datagen = self.k.preprocessing.image.ImageDataGenerator( rotation_range=1, # width_shift_range=0.01, # height_shift_range=0.01, # shear_range=0.01, # zoom_range=0.01, # horizontal_flip=True, fill_mode='nearest') print("x_train", x_train.shape) print("y_train", y_train.shape) if y_train.shape[0] == 0: print("Bad dataset") exit(0) # datagen.fit(x_train) # for i in range(self.c.epochs): # print("Epoch " + str(i+1) + '/' + str(self.c.epochs)) # model.fit_generator(datagen.flow(x_train, y_train, batch_size=10), model.fit( x_train, y_train, batch_size=32, # workers=8, # steps_per_epoch=50, epochs=30, # validation_data=(x_train, y_train), # validation_data=(x_test, y_test), shuffle=True, verbose=1, callbacks=[learning_rate_reduction, tensorBoard] # callbacks=[tensorBoard] ) tfHelper.save_model(model, "model") return model
def combined_model(data): ''' uses arima and regression ''' cols = list(data.columns.values) a, b = data.shape Y = data.pop("Global_active_power") X = data model = LinearRegression() data1 = data['Global_reactive_power'].values data2 = data['Voltage'].values data3 = data['Global_intensity'].values data4 = data['Sub_metering_1'].values data5 = data['Sub_metering_2'].values data6 = data['Sub_metering_3'].values look_back = 1440 full_forecast = pd.DataFrame() indi_forecast = [] dataframes = [data1, data2, data3, data4, data5, data6] i = 0 try: for num in dataframes: data = num print data fore = pd.DataFrame() for num in range(0, 100): try: print 'arima' model = ARIMA(data, order=(2, 1, 2)) model_fit = model.fit(disp=0) print 'forecast' output = model_fit.forecast() yhat = output[0] indi_forecast.append(yhat) except: break i = i + 1 fore = pd.DataFrame(indi_forecast) full_forecast = pd.concat([full_forecast, fore], axis=1) full_forecast = full_forecast.dropna() model.fit(X, Y) z = model.predict(full_forecast) print 'mse', mse(data[1:], z) except: print 'unable to predict long sequence'
def main(args): model_id = build_model_id(args) model_path = build_model_path(args, model_id) setup_model_dir(args, model_path) json_cfg = load_model_json(args, x_train=None, n_classes=None) config = ModelConfig(**json_cfg) sys.path.append(args.model_dir) import model from model import fit model.fit(config)
def simple_models(data): ''' uses arima only on 'Global_active_power' ''' data = data['Global_active_power'] data = scale(data.values) ##### #rolling mean ma = pd.rolling_mean(data, 12) ew_avg = pd.ewma(data, halflife=12) #ARIMA model = ARIMA(data, order=(2, 1, 2)) result = model.fit(disp=-1) plt.plot(data) plt.plot(result.fittedvalues, color='red') plt.show() result = result.predict() print 'mean squared error', mse(data[1:], result) print 'rolling mean:', ma[10:20] print 'exponential weighted moving average', ew_avg[10:20] print 'ARIMA', result[10:20] pd.DataFrame(ma, columns=['Forcast']).to_csv('../Output/rolling_mean', index=False) pd.DataFrame(ew_avg, columns=['Forcast']).to_csv( '../Output/exponential_weighted_moving_average', index=False) pd.DataFrame(result, columns=['Forcast']).to_csv('../Output/ARIMA', index=False)
def rabndomLine(): seed(1) alpha = 0.003 numOfSteps = 1000 numOfLoops = 100 noise = 20 noOfFeatures = 2 model = LinearRegression() x = [] #generationg the values for variables given in assignment x1 = np.random.uniform(low=0, high=1, size=(5000, )) x2 = np.random.uniform(low=0, high=1, size=(5000, )) #generationg the values for coeffients given in assignment a = float(np.random.uniform(low=-100, high=100, size=(1, ))) b = float(np.random.uniform(low=-100, high=100, size=(1, ))) c = float(np.random.uniform(low=-20, high=20, size=(1, ))) #generationg the value for noise given in assignment delta = np.random.uniform(low=-100, high=100, size=(5000, )) #calculating the label y = [(a * x1[i] + b * x2[i] + c + delta[i]) for i in range(len(x1))] #array of features as (x1,x2) features = list(zip(x1, x2)) for i in range(len(y)): model.addSample(features[i], y[i]) Samples = model.getSamples(noOfFeatures) Labels = model.getValues() for i in range(numOfLoops + 1): current_Hypothesis, cost = model.fit(alpha, numOfSteps) print("Current hypothesis: ", current_Hypothesis, ", cost = ", "{0:.4f}".format(cost))
def stateless_fit(model, X, y, Xtest, ytest, params): """ Train the model passed as 1st argument, and return the train_loss X and Y Training values are passed. Parameters dictionary is also necessary. """ for i in range(params['lstm_num_epochs']): model.fit( X, y, epochs=1, validation_data=(Xtest, ytest), verbose=params['keras_verbose_level'], shuffle=False, batch_size=params['lstm_batch_size']) model.reset_states() return model
def setPlane(): alpha = 0.000000003 numOfSteps = 1000 numOfLoops = 10 model = LinearRegression() noOfFeatures = 2 x = list(range(0, 1000)) #calculating points for points((x,2x),5x)and ((2x,x),4x) x1, y1 = x, [i * 2 for i in x] x2, y2 = [i * 2 for i in x], x x = x1 + x2 y = y1 + y2 #calculating z = x + 2y z = [(x[i] + 2 * y[i]) for i in range(len(x))] #array of features as (x1,x2) features = list(zip(x, y)) for i in range(len(z)): model.addSample(features[i], z[i]) Samples = model.getSamples(noOfFeatures) Labels = model.getValues() for i in range(numOfLoops + 1): current_Hypothesis, cost = model.fit(alpha, numOfSteps) print("Current hypothesis: ", current_Hypothesis, ", cost = ", "{0:.4f}".format(cost))
def SVM(X, y, X_ind, y_ind, is_reg=False): """Cross Validation and independent set test for Support Vector Machine (SVM) Arguments: X (ndarray): Feature data of training and validation set for cross-validation. m X n matrix, m is the No. of samples, n is the No. of fetures y (ndarray): Label data of training and validation set for cross-validation. m-D vector, and m is the No. of samples. X_ind (ndarray): Feature data of independent test set for independent test. It has the similar data structure as X. y_ind (ndarray): Feature data of independent set for for independent test. It has the similar data structure as y out (str): The file path for saving the result data. is_reg (bool, optional): define the model for regression (True) or classification (False) (Default: False) Returns: cvs (ndarray): cross-validation results. The shape is (m, ), m is the No. of samples. inds (ndarray): independent test results. It has similar data structure as cvs. """ if is_reg: folds = KFold(5).split(X) model = SVR() else: folds = StratifiedKFold(5).split(X, y) model = SVC(probability=True) cvs = np.zeros(y.shape) inds = np.zeros(y_ind.shape) gs = GridSearchCV(model, { 'C': 2.0**np.array([-5, 15]), 'gamma': 2.0**np.array([-15, 5]) }, n_jobs=5) gs.fit(X, y) params = gs.best_params_ print(params) for i, (trained, valided) in enumerate(folds): model = SVC(probability=True, C=params['C'], gamma=params['gamma']) model.fit(X[trained], y[trained]) if is_reg: cvs[valided] = model.predict(X[valided]) inds += model.predict(X_ind) else: cvs[valided] = model.predict_proba(X[valided])[:, 1] inds += model.predict_proba(X_ind)[:, 1] return cvs, inds / 5
def standard(): payload = request.json if payload is None: return "invalid payload", 400 user_request = codec.Request.fromdict(payload) fitted_model = model.fit(user_request) return jsonify(response(user_request, fitted_model).todict())
def setLine(): alpha = 0.000000003 numOfSteps = 100 numOfLoops = 50 model = LinearRegression() #feature x = list(range(0, 1000)) #label y = x for i in x: y = i model.addSample(i, y) Samples = model.getSamples() Labels = model.getValues() for i in range(numOfLoops + 1): model.fit(alpha, numOfSteps) plt.show()
def main(train_track, prediction_track, instruments_num, test_track=None): """ :param train_track: path to an audio file with known number of instruments in compositions :param train_track_annotaion: number of centroids :param test_track: path to an audio file with consistent instruments to test prediction. """ _, train_mfccs = audio_to_mfcc(train_track) labels = tag_frames(train_mfccs, instruments_num) # model = build_model(64, train_mfccs.shape[1], labels.shape[1]) model = build_cnn_model() #train_mfccs.shape[1]) fit(model, train_mfccs, labels) if test_track: _, test_mfccs = audio_to_mfcc(test_track) print(model.evaluate(test_mfccs, labels, batch_size=32)) duration, predict_mfccs = audio_to_mfcc(prediction_track) prediction = model.predict(predict_mfccs, batch_size=32) consistent_samples = make_consistent_samples(prediction, duration, 0.2) print(consistent_samples)
def train_model(X_train, X_test, y_train, y_test, model): X_train = X_train.reshape(X_train.shape[0], 300, 300, 3) X_test = X_test.reshape(X_test.shape[0], 300, 300, 3) print("X_train.shape=", X_train.shape) print("y_train.shape", y_train.shape) print("X_test.shape=", X_test.shape) print("y_test.shape", y_test.shape) # print(y_train[0]) ''' softmax layer -> output=10개의 노드. 각각이 0부터 9까지 숫자를 대표하는 클래스 이를 위해서 y값을 one-hot encoding 표현법으로 변환 0: 1,0,0,0,0,0,0,0,0,0 1: 0,1,0,0,0,0,0,0,0,0 ... 5: 0,0,0,0,0,1,0,0,0,0 ''' # reformat via one-hot encoding y_train = to_categorical(y_train) y_test = to_categorical(y_test) # print(y_train[0]) # catergorical_crossentropy = using when multi classficiation # metrics = output data type model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # batch_size : see batch_size data and set delta in gradient decsending history = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=16, epochs=30, verbose=1) plot_loss_curve(history.history) # print(history.history) print("train loss=", history.history['loss'][-1]) print("validation loss=", history.history['val_loss'][-1]) # save model in file # offering in KERAS model.save('model-201611263.model') history_df = pd.DataFrame(history.history) with open("history_data.csv", mode='w') as file: history_df.to_csv(file) return model
def run(train_data, valid_data, len_size, scale, EPOCHS, root_path='./', load_model_dir=None, saved_model_dir=None, log_dir=None, summary=False): if log_dir is None: log_dir = os.path.join(root_path, 'our_model', 'logs', 'model') logging.info(train_data) logging.info(valid_data) # get generator model and discriminator model Gen = model.make_generator_model(len_high_size=len_size, scale=scale) Dis = model.make_discriminator_model(len_high_size=len_size, scale=scale) if load_model_dir is not None: #load_model_dir = os.path.join(root_path, 'our_model', 'saved_model') file_path = os.path.join(load_model_dir, 'gen_model_'+str(len_size), 'gen_weights') if os.path.exists(file_path): Gen.load_weights(file_path) else: logging.info("generator doesn't exist. create a new one.") file_path = os.path.join(load_model_dir, 'dis_model_'+str(len_size), 'dis_weights') if os.path.exists(file_path): Dis.load_weights(file_path) else: logging.info("discriminator model doesn't exist. create a new one") if summary: logging.info(Gen.summary()) tf.keras.utils.plot_model(Gen, to_file='G.png', show_shapes=True) logging.info(Dis.summary()) tf.keras.utils.plot_model(Dis, to_file='D.png', show_shapes=True) if saved_model_dir is None: saved_model_dir = os.path.join(root_path, 'our_model', 'saved_model') model.fit(Gen, Dis, train_data, EPOCHS, len_size, scale, valid_data, log_dir=log_dir, saved_model_dir=saved_model_dir) file_path = os.path.join( saved_model_dir, 'gen_model_'+str(len_size), 'gen_weights') Gen.save_weights(file_path) file_path = os.path.join( saved_model_dir, 'dis_model_'+str(len_size), 'dis_weights') Dis.save_weights(file_path)
def _test_train(self): d = model.MyDriver() (X_train, y_train) = d.load_example_training_data( examples=10, width=8, height=8) # //d.load_traing_data("../data/driving_log.csv")) model = d.simple_network(8, 8) model.fit(X_train, y_train, validation_split=0.0, shuffle=True, nb_epoch=100, verbose=1) if 0: model.save('test_train.md5') model = keras.models.load_model('test_train.md5') y = model.predict(X_train) y_index = np.argmax(y, 1) print(y_index) for i in range(0, 10): assert (y_index[i] == i) return
def train_model(model, X_train, y_train, name, config): model.compile(loss="mse", optimizer="adadelta", metrics=['mape']) # early = EarlyStopping(monitor='val_loss', patience=30, verbose=0, mode='auto') hist = model.fit(X_train, y_train, batch_size=config["batch"], epochs=config["epochs"], validation_split=0.1) # 训练中 model.save('models/' + name + '.h5') # df = pd.DataFrame.from_dict(hist.history) # df.to_csv('models/' + name + ' loss.csv', encoding='utf-8', index=False) return model
def train_model(model, x_train, y_train, out_dir, validation_data, n_epochs, batch_size, learning_rate, loss="binary_crossentropy", early_stopping=True, save_checkpoint=True, verbose=1, ckpt_name_prefix=""): print("Model summary:") print(model.model.summary()) callbacks = [] if save_checkpoint: # save the model at every epoch. 'val_loss' is the monitored quantity. # If save_best_only=True, the model with the best monitored quantity is not overwitten. # If save_weights_only=True, only the model weights are saved calling the method model.save_weights checkpoint = ModelCheckpoint(os.path.join( out_dir, ckpt_name_prefix + ".{epoch:02d}-{val_loss:.3f}.hdf5"), verbose=verbose, monitor='val_loss', save_weights_only=True, save_best_only=True) callbacks.append(checkpoint) if early_stopping: # Training stops when the monitored quantity (val_loss) stops improving. # patience is the number of epochs with no improvement after which training is stopped. stopping = EarlyStopping(monitor="val_loss", min_delta=0, patience=6, verbose=verbose, mode='auto') callbacks.append(stopping) adam = Adagrad(lr=learning_rate, epsilon=1e-08, decay=0.0, clipnorm=1.) model.compile(metrics=[], optimizer=adam, loss=loss) print("Training of model '%s' started." % model.model_name) start_time = time.time() history = model.fit(x_train, y_train, validation_data=validation_data, n_epochs=n_epochs, batch_size=batch_size, callbacks=callbacks, verbose=verbose) print("Training of model '%s' finished in %s." % (model.model_name, time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)))) return history
def fit(model, X, y, Xtest, ytest, params): """ Train the model passed as 1st argument, and return the train_loss X and Y Training values are passed. Parameters dictionary is also necessary. """ train_loss = model.fit( X, y, validation_data=(Xtest, ytest), verbose=params['keras_verbose_level'], shuffle=params['lstm_shuffle'], batch_size=params['lstm_batch_size'], epochs=params['lstm_num_epochs']) return train_loss
def test(): #some data X_train, X_test, y_train, y_test, index_train, index_test = dutil.load_titanic() X_train = X_train.astype(numpy.float64) y_train = y_train.reshape(1,y_train.shape[0])[0].astype(numpy.int32) X_test = X_test.astype(numpy.float64) y_test = y_test.reshape(1,y_test.shape[0])[0].astype(numpy.int32) #train model = Classification() model.add(dense.DenseLayer(7, 20, name="hiddenLayer")) model.add(dense.DenseLayer(20, 2, name="outputLayer", W_init=defa, activation=softmax, learning_rate=0.001)) model.fit(X_train, X_test, y_train, y_test,)
def RF(X, y, X_ind, y_ind, is_reg=False): """Cross Validation and independent set test for Random Forest model Arguments: X (ndarray): Feature data of training and validation set for cross-validation. m X n matrix, m is the No. of samples, n is the No. of fetures y (ndarray): Label data of training and validation set for cross-validation. m-D vector, and m is the No. of samples. X_ind (ndarray): Feature data of independent test set for independent test. It has the similar data structure as X. y_ind (ndarray): Feature data of independent set for for independent test. It has the similar data structure as y out (str): The file path for saving the result data. is_reg (bool, optional): define the model for regression (True) or classification (False) (Default: False) Returns: cvs (ndarray): cross-validation results. The shape is (m, ), m is the No. of samples. inds (ndarray): independent test results. It has similar data structure as cvs. """ if is_reg: folds = KFold(5).split(X) alg = RandomForestRegressor else: folds = StratifiedKFold(5).split(X, y) alg = RandomForestClassifier cvs = np.zeros(y.shape) inds = np.zeros(y_ind.shape) for i, (trained, valided) in enumerate(folds): model = alg(n_estimators=500, n_jobs=1) model.fit(X[trained], y[trained]) if is_reg: cvs[valided] = model.predict(X[valided]) inds += model.predict(X_ind) else: cvs[valided] = model.predict_proba(X[valided])[:, 1] inds += model.predict_proba(X_ind)[:, 1] return cvs, inds / 5
def randomDimension(noOfFeatures): seed(1) alpha = 0.00003 numOfSteps = 1000 numOfLoops = 100 noise = 20 model = LinearRegression() examples = 5000 #array of coefficient t = [] #array of all features of each 5000 examples x = [] # array of r - label r = [] #array of product of coefficient and x for each dimention tx = [] #array of features as (x1,x2...xn) features = [] #generating random variable for each coefficient t0 in assignment t0 = float(np.random.uniform(low=-100, high=100, size=(1, ))) for i in range(1, noOfFeatures + 1): #generating random variable for each coefficient t in assignment a = float(np.random.uniform(low=-100, high=100, size=(1, ))) t.append(a) #generating random variable for each feature x in assignment x1 = np.random.uniform(low=0, high=1, size=(5000, )) x.append(list(x1)) #poduct of coefficient and x p = list(a * x1) tx.append(p) for j in range(examples): #summation of poduct of coefficient and x for each sample z = sum(i[j] for i in tx) #adding noise and t0 to find r to the summation y = z + t0 + noise r.append(y) g = [i[j] for i in x] features.append(g) for i in range(len(r)): model.addSample(features[i], r[i]) Samples = model.getSamples(noOfFeatures) Labels = model.getValues() for i in range(numOfLoops + 1): current_Hypothesis, cost = model.fit(alpha, numOfSteps) print("Current hypothesis: ", current_Hypothesis, ", cost = ", "{0:.4f}".format(cost))
def main(): model = M.Network() training_msgs = np.array([[0, 0, 0], [1, 1, 1], [0, 1, 0], [1, 0, 1], [1, 1, 0], [0, 0, 1], [0, 1, 1], [1, 0, 0]]) X = one_hot_encoding(training_msgs.copy()) y = encode_seqs(training_msgs.copy()) validation_msgs = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]]) Xv = one_hot_encoding(validation_msgs.copy()) yv = encode_seqs(validation_msgs.copy()) history = M.fit(model, X, y, validation_data=(Xv, yv), epochs=300)
def run_holts(train, validate, target_variable, exponential, smoothing_level=.1, smoothing_slope=.1): # Create model object model = Holt(train[target_variable], exponential=exponential) # Fit model model = model.fit(smoothing_level=smoothing_level, smoothing_slope=smoothing_slope, optimized=False) # Create predictions y_pred = model.predict(start=validate.index[0], end=validate.index[-1]) return model, y_pred
def setLine(): alpha = 0.000000003 numOfSteps = 100 numOfLoops = 50 model = LinearRegression() #feature x = list(range(0, 1000)) #label y = x for i in x: y = i model.addSample(i, y) Samples = model.getSamples() Labels = model.getValues() for i in range(numOfLoops + 1): current_Hypothesis, cost = model.fit(alpha, numOfSteps) print("Current hypothesis: ", current_Hypothesis, ", cost = ", "{0:.4f}".format(cost))
def sydney(): payload = request.json if payload is None: return "invalid payload", 400 # with open("/tmp/request.json", "w") as file: # json.dump(payload, file) user_request = codec.Request.fromdict(payload) # TODO: merge this once we have enough Loop data. with open("sydney2019-11-20.json") as file: canned_payload = json.load(file) canned_request = codec.Request.fromdict(canned_payload) user_request.timeseries = canned_request.timeseries fitted_model = model.fit(user_request) return jsonify(response(user_request, fitted_model).todict())
def exp(regressor, pca_n_components=None, **regressor_param): df = preprocess() df = df.iloc[:, 1:] #print(df.head()) sample_num = df.shape[0] val_pred = [] for row in range(sample_num): val = df.iloc[[row]] train = df.drop([row]) if pca_n_components != None: train, val = apply_pca(train, val, n_components=pca_n_components) model = fit(regressor, train, **regressor_param) pred = predict(model, val) val_pred.append(pred[0]) real_label = df.iloc[:, 0].values val_pred = np.array(val_pred) r2 = pearson_r_square(val_pred, real_label) print("pearson r square: {}".format(r2)) roc_data = calculate_SSA(val_pred, real_label, 5, 2, 10, 0.1) auc = np.trapz(roc_data[::-1, 1], roc_data[::-1, 2]) print("AUC: {}".format(auc)) plot_corr2(val_pred, real_label, r2, auc, roc_data)
print str(err) # will print something like "option -a not recognized" usage(name) sys.exit(2) ranking = False for o,a in optlist: if o == '--ranking': ranking = True else: assert False, "unhandled option" if len(args) < 3: usage(name) sys.exit(2) ml_type = args[0] ml_param_str = args[1] model_path = args[2] print >> sys.stderr, "Reading the data..." in_data = VowpalWabbitData(ranking=ranking) (X_all, Y, tags) = in_data.read(sys.stdin) X = [] for x in X_all: X += x print >> sys.stderr, "Building the model..." h = HTMLParser.HTMLParser() params = h.unescape(ml_param_str) model = model.Model(ml_type,params) model.fit(X,Y) model.save(model_path)
# word_embedding_matrix, NB_FILTER) # model = model.buildLstmPool(nb_words, word_embedding_matrix ,MAX_SEQUENCE_LENGTH) model = model.LSTM3(nb_words, word_embedding_matrix, MAX_SEQUENCE_LENGTH) # model = model.BiLSTM(nb_words, word_embedding_matrix, MAX_SEQUENCE_LENGTH) # model = model.BiLstmPool(nb_words, word_embedding_matrix, MAX_SEQUENCE_LENGTH, POOL_LENGTH) model.compile(loss='categorical_crossentropy', optimizer='adagrad', # adam metrics=['accuracy']) model.summary() # 打印出模型概况 callbacks = [ModelCheckpoint(MODEL_WEIGHTS_FILE, monitor='val_acc', save_best_only=True)] t0 = time.time() history = model.fit(X_train, train_label, batch_size=BATCH_SIZE, verbose=1, validation_split=VALIDATION_SPLIT, # (X_test, test_label) callbacks=callbacks, nb_epoch=NB_EPOCHS) t1 = time.time() print("Minutes elapsed: %f" % ((t1 - t0) / 60.)) # 将模型和权重保存到指定路径 model.save(model_path) # 加载权重到当前模型 # model = load_model(model_path) # Print best validation accuracy and epoch in valid_set max_val_acc, idx = max((val, idx) for (idx, val) in enumerate(history.history['val_acc'])) print('Maximum accuracy at epoch', '{:d}'.format(idx + 1), '=', '{:.4f}'.format(max_val_acc)) # plot the result
def main(args): model_id = build_model_id(args) model_path = build_model_path(args, model_id) setup_model_dir(args, model_path) sys.stdout, sys.stderr = setup_logging(args, model_path) x_train, y_train = load_model_data(args.train_file, args.data_name, args.target_name) x_validation, y_validation = load_model_data( args.validation_file, args.data_name, args.target_name) rng = np.random.RandomState(args.seed) if args.n_classes > -1: n_classes = args.n_classes else: n_classes = max(y_train)+1 n_classes, target_names, class_weight = load_target_data(args, n_classes) if len(class_weight) == 0: n_samples = len(y_train) print('n_samples', n_samples) print('classes', range(n_classes)) print('weights', n_samples / (n_classes * np.bincount(y_train))) class_weight = dict(zip(range(n_classes), n_samples / (n_classes * np.bincount(y_train)))) print('class_weight', class_weight) logging.debug("n_classes {0} min {1} max {2}".format( n_classes, min(y_train), max(y_train))) y_train_one_hot = np_utils.to_categorical(y_train, n_classes) y_validation_one_hot = np_utils.to_categorical(y_validation, n_classes) logging.debug("y_train_one_hot " + str(y_train_one_hot.shape)) logging.debug("x_train " + str(x_train.shape)) min_vocab_index = np.min(x_train) max_vocab_index = np.max(x_train) logging.debug("min vocab index {0} max vocab index {1}".format( min_vocab_index, max_vocab_index)) json_cfg = load_model_json(args, x_train, n_classes) logging.debug("loading model") sys.path.append(args.model_dir) import model from model import build_model ####################################################################### # Subsetting ####################################################################### if args.subsetting_function: subsetter = getattr(model, args.subsetting_function) else: subsetter = None def take_subset(subsetter, path, x, y, y_one_hot, n): if subsetter is None: return x[0:n], y[0:n], y_one_hot[0:n] else: mask = subsetter(path) idx = np.where(mask)[0] idx = idx[0:n] return x[idx], y[idx], y_one_hot[idx] x_train, y_train, y_train_one_hot = take_subset( subsetter, args.train_file, x_train, y_train, y_train_one_hot, n=args.n_train) x_validation, y_validation, y_validation_one_hot = take_subset( subsetter, args.validation_file, x_validation, y_validation, y_validation_one_hot, n=args.n_validation) ####################################################################### # Preprocessing ####################################################################### if args.preprocessing_class: preprocessor = getattr(model, args.preprocessing_class)(seed=args.seed) else: preprocessor = modeling.preprocess.NullPreprocessor() logging.debug("y_train_one_hot " + str(y_train_one_hot.shape)) logging.debug("x_train " + str(x_train.shape)) model_cfg = ModelConfig(**json_cfg) logging.info("model_cfg " + str(model_cfg)) model = build_model(model_cfg) setattr(model, 'stop_training', False) logging.info('model has {n_params} parameters'.format( n_params=count_parameters(model))) if len(args.extra_train_file) > 1: callbacks = keras.callbacks.CallbackList() else: callbacks = [] save_model_info(args, model_path, model_cfg) if not args.no_save: if args.save_all_checkpoints: filepath = model_path + '/model-{epoch:04d}.h5' else: filepath = model_path + '/model.h5' callbacks.append(ModelCheckpoint( filepath=filepath, verbose=1, save_best_only=not args.save_every_epoch)) callback_logger = logging.info if args.log else callable_print if args.n_epochs < sys.maxsize: # Number of epochs overrides patience. If the number of epochs # is specified on the command line, the model is trained for # exactly that number; otherwise, the model is trained with # early stopping using the patience specified in the model # configuration. callbacks.append(EarlyStopping( monitor='val_loss', patience=model_cfg.patience, verbose=1)) if args.classification_report: cr = ClassificationReport(x_validation, y_validation, callback_logger, target_names=target_names) callbacks.append(cr) if model_cfg.optimizer == 'SGD': callbacks.append(SingleStepLearningRateSchedule(patience=10)) if len(args.extra_train_file) > 1: args.extra_train_file.append(args.train_file) logging.info("Using the following files for training: " + ','.join(args.extra_train_file)) train_file_iter = itertools.cycle(args.extra_train_file) current_train = args.train_file callbacks._set_model(model) callbacks.on_train_begin(logs={}) epoch = batch = 0 while True: x_train, y_train_one_hot = preprocessor.fit_transform( x_train, y_train_one_hot) x_validation, y_validation_one_hot = preprocessor.transform( x_validation, y_validation_one_hot) iteration = batch % len(args.extra_train_file) logging.info("epoch {epoch} iteration {iteration} - training with {train_file}".format( epoch=epoch, iteration=iteration, train_file=current_train)) callbacks.on_epoch_begin(epoch, logs={}) n_train = x_train.shape[0] callbacks.on_batch_begin(batch, logs={'size': n_train}) index_array = np.arange(n_train) if args.shuffle: rng.shuffle(index_array) batches = keras.models.make_batches(n_train, model_cfg.batch_size) logging.info("epoch {epoch} iteration {iteration} - starting {n_batches} batches".format( epoch=epoch, iteration=iteration, n_batches=len(batches))) avg_train_loss = avg_train_accuracy = 0. for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] if isinstance(model, keras.models.Graph): data = { 'input': x_train[batch_ids], 'output': y_train_one_hot[batch_ids] } train_loss = model.train_on_batch(data, class_weight=class_weight) train_accuracy = 0. else: train_loss, train_accuracy = model.train_on_batch( x_train[batch_ids], y_train_one_hot[batch_ids], accuracy=True, class_weight=class_weight) batch_end_logs = {'loss': train_loss, 'accuracy': train_accuracy} avg_train_loss = (avg_train_loss * batch_index + train_loss)/(batch_index + 1) avg_train_accuracy = (avg_train_accuracy * batch_index + train_accuracy)/(batch_index + 1) callbacks.on_batch_end(batch, logs={'loss': train_loss, 'accuracy': train_accuracy}) logging.info("epoch {epoch} iteration {iteration} - finished {n_batches} batches".format( epoch=epoch, iteration=iteration, n_batches=len(batches))) logging.info("epoch {epoch} iteration {iteration} - loss: {loss} - acc: {acc}".format( epoch=epoch, iteration=iteration, loss=avg_train_loss, acc=avg_train_accuracy)) batch += 1 # Validation frequency (this if-block) doesn't necessarily # occur in the same iteration as beginning of an epoch # (next if-block), so model.evaluate appears twice here. kwargs = { 'verbose': 0 if args.log else 1 } pargs = [] validation_data = {} if isinstance(model, keras.models.Graph): validation_data = { 'input': x_validation, 'output': y_validation_one_hot } pargs = [validation_data] else: pargs = [x_validation, y_validation_one_hot] kwargs['show_accuracy'] = True if (iteration + 1) % args.validation_freq == 0: if isinstance(model, keras.models.Graph): val_loss = model.evaluate(*pargs, **kwargs) y_hat = model.predict(validation_data) val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1)) else: val_loss, val_acc = model.evaluate( *pargs, **kwargs) logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format( epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc)) epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc} callbacks.on_epoch_end(epoch, epoch_end_logs) if batch % len(args.extra_train_file) == 0: if isinstance(model, keras.models.Graph): val_loss = model.evaluate(*pargs, **kwargs) y_hat = model.predict(validation_data) val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1)) else: val_loss, val_acc = model.evaluate( *pargs, **kwargs) logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format( epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc)) epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc} epoch += 1 callbacks.on_epoch_end(epoch, epoch_end_logs) if model.stop_training: logging.info("epoch {epoch} iteration {iteration} - done training".format( epoch=epoch, iteration=iteration)) break current_train = next(train_file_iter) x_train, y_train = load_model_data(current_train, args.data_name, args.target_name) y_train_one_hot = np_utils.to_categorical(y_train, n_classes) if epoch > args.n_epochs: break callbacks.on_train_end(logs={}) else: x_train, y_train_one_hot = preprocessor.fit_transform( x_train, y_train_one_hot) x_validation, y_validation_one_hot = preprocessor.transform( x_validation, y_validation_one_hot) if isinstance(model, keras.models.Graph): data = { 'input': x_train, 'output': y_train_one_hot } validation_data = { 'input': x_validation, 'output': y_validation_one_hot } model.fit(data, shuffle=args.shuffle, nb_epoch=args.n_epochs, batch_size=model_cfg.batch_size, validation_data=validation_data, callbacks=callbacks, class_weight=class_weight, verbose=2 if args.log else 1) y_hat = model.predict(validation_data) print('val_acc %.04f' % accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1))) else: model.fit(x_train, y_train_one_hot, shuffle=args.shuffle, nb_epoch=args.n_epochs, batch_size=model_cfg.batch_size, show_accuracy=True, validation_data=(x_validation, y_validation_one_hot), callbacks=callbacks, class_weight=class_weight, verbose=2 if args.log else 1)
def main(): init() fit() predict(isOffline=ISOFFLINE)