def validate(self, X, Y, k, isLogisticRegression, learningRate): (row, col) = X.shape numOfSection = int(row / k) totalAcc = 0.0 for a in range(0, k): i = a * numOfSection Xbefore = [] Ybefore = [] if i == 0: Xbefore = X[0:0, 0:0] Ybefore = Y[0:0] else: Xbefore = X[0:i - 1, 0:col] Ybefore = Y[0:i - 1] subX = X[i:i + numOfSection - 1, 0:col] subY = Y[i:i + numOfSection - 1] Xafter = X[i + numOfSection:row, 0:col] Yafter = Y[i + numOfSection:row] XConc = [] YConc = [] if i == 0: Xconc = Xafter Yconc = Yafter else: Xconc = np.concatenate((Xbefore, Xafter)) Yconc = np.concatenate((Ybefore, Yafter)) #Use to test the accuracy value from sci-kit model = None if (isLogisticRegression): print("Start training using logistic regression") model = MLModel.LogisticRegressionModel( Xconc, Yconc, subX, subY) model.fit(learningRate) else: print("Start training using gaussian naive bayes") model = MLModel.GaussianNaiveBayes(Xconc, Yconc, subX, subY) model.fit() targetY = model.predict(subX) acc = model.evaluate_acc(subY, targetY) #print("Accuracy is " + str(acc)) totalAcc = totalAcc + acc return (totalAcc / float(k))
def getTimeOfEvaluation(self, X, Y): plotX = [] plotY = [] i = 0 while (i < 8): plotX.append(i) startTime = time.time() print("Start training using logistic regression") model = MLModel.LogisticRegressionModel(X, Y, X, Y) model.fit(0.1**(7 - i)) timeUsed = time.time() - startTime plotY.append(timeUsed) i = i + 1 plt.plot(plotX, plotY, c='r', label='logistic regression') plt.xlabel("i such that learningRate = 0.1^(7-i)") plt.ylabel("RunningTime(second)") plt.title("Performance of LogisticRegression based on learningRate") plt.show()
def test_model_weights(): ## dataset = di.driving_data() # q_train, q_test, qadv_train, qadv_test, qphys_train, qphys_test, t_train, t_test, tadv_train, tadv_test, tphys_train, tphys_test = dataset[0], dataset[1], dataset[2], dataset[3], dataset[4], dataset[5], dataset[6], dataset[7] # Load testing data train_test_datadir = "/project/spice/radiation/ML/CRM/data/models/" dataset = np.load(train_test_datadir + 'train_test_data.npz') qadv_test = dataset['qadv_test'] q_test = dataset['q_test'] qphys_test = dataset['qphys_test'] checkpoint_dir = '/project/spice/radiation/ML/CRM/data/models/chkpts_keras/' latest = tf.train.latest_checkpoint(checkpoint_dir) # model = tf.keras.Sequential([ # keras.layers.Dense(128, activation='relu', input_shape=(70,)), # keras.layers.Dense(256, activation='relu'), # keras.layers.Dense(128, activation='relu'), # keras.layers.Dense(70, activation='tanh') # ]) model = mlm.get_model() loss = tf.keras.losses.MeanSquaredError() optimizer = tf.keras.optimizers.Adam() model.compile(optimizer=optimizer, loss=loss, metrics=[tf.keras.metrics.Accuracy()]) model.load_weights(latest) #qphys_normaliser = joblib.load('/project/spice/radiation/ML/CRM/data/models/normaliser/std_qphy.joblib') qphys_normaliser = joblib.load( '/project/spice/radiation/ML/CRM/data/models/normaliser/minmax_qphys.joblib' ) qphys_predict = model.predict(q_test) qphys_predict_denorm = qphys_normaliser.inverse_transform(qphys_predict) qphys_test_denorm = qphys_normaliser.inverse_transform(qphys_test) np.savez('qqphys_predict_wts', qphys_predict=qphys_predict_denorm, qphys_test=qphys_test_denorm, qphys_test_norm=qphys_test)
def create_and_train(): #q_train, q_test, qadv_train, qadv_test, qphys_train, qphys_test, t_train, t_test, tadv_train, tadv_test, tphys_train, tphys_test = dataset[0], dataset[1], dataset[2], dataset[3], dataset[4], dataset[5], dataset[6], dataset[7], dataset[8], dataset[9], dataset[10], dataset[11] train_test_datadir = "/project/spice/radiation/ML/CRM/data/models/" dataset = np.load(train_test_datadir + 'train_test_data.npz') qadv_train = dataset['qadv_train'] q_train = dataset['q_train'] qphys_train = dataset['qphys_train'] qphys_test = dataset['qphys_test'] q_test = dataset['q_test'] # model = tf.keras.Sequential([ # keras.layers.Dense(128, activation='relu', input_shape=(70,)), # keras.layers.Dense(256, activation='relu'), # keras.layers.Dense(128, activation='relu'), # keras.layers.Dense(70, activation='tanh') # ]) model = mlm.get_model() loss = tf.keras.losses.MeanSquaredError() optimizer = tf.keras.optimizers.Adam() model.compile(optimizer=optimizer, loss=loss, metrics=[tf.keras.metrics.Accuracy()]) batch = 100 n_epoch = 20 # history = model.fit(qadv_train, qphys_train,epochs=n_epoch,batch_size=batch, validation_data=(qadv_test,qphys_test)) history = model.fit(q_train, qphys_train, epochs=n_epoch, batch_size=batch, validation_data=(q_test, qphys_test)) model_fname = "model_q_epoch_{0}".format(n_epoch) model.save('/project/spice/radiation/ML/CRM/data/models/' + model_fname + '.h5') model.save_weights( '/project/spice/radiation/ML/CRM/data/models/chkpts_keras/' + model_fname) pickle_file = '/project/spice/radiation/ML/CRM/data/models/' + model_fname + '.history' pickle.dump(history.history, open(pickle_file, 'wb')) dataset.close()
X_2.append(np.concatenate((sample_spectrum, hit_spectrum))) Y_2.append(gt) indicies = [] for i in range(len(X_2)): indicies.append(i) X_train_2, X_test_2, y_train_2, y_test_2, indicies_train2, indicies_test2 = train_test_split( X_2, Y_2, indicies, test_size=0.20, random_state=100) for i in X_train_2: X.append(i) for i in y_train_2: Y.append(i) dnn = ml.DNNModel(args['-s']) X = np.array(X) Y = np.array(Y) i_class0 = np.where(Y == 0)[0] i_class2 = np.where(Y == 2)[0] n_class0 = len(i_class0) n_class2 = len(i_class2) if n_class0 > n_class2: i_class0_downsampled = np.random.choice(i_class0, size=n_class2, replace=False) y = np.concatenate((Y[i_class0_downsampled], Y[i_class2])) x_train = np.concatenate( (X[i_class0_downsampled], X[i_class2])) if n_class2 > n_class0:
def post(self): starttime = datetime.datetime.now() item_id = str(self.get_argument('item_id', '')) # 项目id model_id = str(self.get_argument('model_id', '')) ip = str(self.get_argument('ip', '')) up_url = str(self.get_argument('up_url', '')) down_url = str(self.get_argument('down_url', '')) access_url = str(self.get_argument('access_url', '')) access_key = str(self.get_argument('access_key', '')) _init_companyId = str(self.get_argument('_init_companyId', '')) model_type = str(self.get_argument('model_type', '')) # 模型类型 data_id = str(self.get_argument('data_id', '')) # 数据id Logger.log_DEBUG.info("==== 识别接口url获取参数打印 ====") Logger.log_DEBUG.info("item_id: %s" % item_id) Logger.log_DEBUG.info("model_id: %s" % model_id) Logger.log_DEBUG.info("ip: %s" % ip) Logger.log_DEBUG.info("up_url: %s" % up_url) Logger.log_DEBUG.info("down_url: %s" % down_url) Logger.log_DEBUG.info("access_url: %s" % access_url) Logger.log_DEBUG.info("access_key: %s" % access_key) Logger.log_DEBUG.info("_init_companyId: %s" % _init_companyId) Logger.log_DEBUG.info("model_type: %s" % model_type) Logger.log_DEBUG.info("data_id: %s" % data_id) if model_type == "": model_type = "AD_BR" try: if model_type == 'GRU': gru_mlb_file_id = str(self.get_argument('mlb_file_id', '')) # mlb模型id gru_tokenizer_file_id = str( self.get_argument('tokenizer_file_id', '')) # tokenizer模型id gru_model_file_id = str(self.get_argument('model_file_id', '')) # 模型id max_sequence_length = str( self.get_argument('max_sequence_length', '')) # 最大词个数 batch_size = str(self.get_argument('batch_size', '')) # batch大小 if max_sequence_length == "": max_sequence_length = "5000" if batch_size == "": batch_size = "128" print("Start GRU Predict") gru_result = GruModel.gru_predict( ip, up_url, down_url, access_url, access_key, _init_companyId, data_id, gru_mlb_file_id, gru_tokenizer_file_id, gru_model_file_id, max_sequence_length, batch_size) print("End GRU Predict") self.write(gru_result) elif model_type == 'TEXTCNN': textcnn_mlb_file_id = str(self.get_argument('mlb_file_id', '')) # mlb模型id textcnn_tokenizer_file_id = str( self.get_argument('tokenizer_file_id', '')) # tokenizer模型id textcnn_model_file_id = str( self.get_argument('model_file_id', '')) # 模型id max_sequence_length = str( self.get_argument('max_sequence_length', '')) # 最大词个数 batch_size = str(self.get_argument('batch_size', '')) # batch大小 if max_sequence_length == "": max_sequence_length = "5000" if batch_size == "": batch_size = "128" print("Start TEXTCNN Predict") textcnn_result = TextcnnModel.textcnn_predict( ip, up_url, down_url, access_url, access_key, _init_companyId, data_id, textcnn_mlb_file_id, textcnn_tokenizer_file_id, textcnn_model_file_id, max_sequence_length, batch_size) print("End TEXTCNN Predict") self.write(textcnn_result) elif model_type == 'MLKNN': knn_tfidf_file_id = str(self.get_argument('tfidf_file_id', '')) knn_mlb_file_id = str(self.get_argument('mlb_file_id', '')) knn_model_file_id = str(self.get_argument('model_file_id', '')) print('Start MLKNN Predict') ml_result = MLModel.knn_predict(ip, up_url, down_url, access_url, access_key, _init_companyId, data_id, knn_tfidf_file_id, knn_mlb_file_id, knn_model_file_id) print('End MLKNN Predict') self.write(ml_result) elif model_type == 'AD_BR': br_tfidf_file_id = str(self.get_argument('tfidf_file_id', '')) br_mlb_file_id = str(self.get_argument('mlb_file_id', '')) br_model_file_id = str(self.get_argument('model_file_id', '')) print('Start AD_BR Predict') br_result = MLModel.br_predict(ip, up_url, down_url, access_url, access_key, _init_companyId, data_id, br_tfidf_file_id, br_mlb_file_id, br_model_file_id) print('End AD_BR Predict') self.write(br_result) elif model_type == 'AD_CC': cc_tfidf_file_id = str(self.get_argument('tfidf_file_id', '')) cc_mlb_file_id = str(self.get_argument('mlb_file_id', '')) cc_model_file_id = str(self.get_argument('model_file_id', '')) print('Start AD_CC Predict') cc_result = MLModel.cc_predict(ip, up_url, down_url, access_url, access_key, _init_companyId, data_id, cc_tfidf_file_id, cc_mlb_file_id, cc_model_file_id) print('End AD_CC Predict') self.write(cc_result) elif model_type == 'AD_LP': lp_tfidf_file_id = str(self.get_argument('tfidf_file_id', '')) lp_mlb_file_id = str(self.get_argument('mlb_file_id', '')) lp_model_file_id = str(self.get_argument('model_file_id', '')) print('Start AD_LP Predict') lp_result = MLModel.lp_predict(ip, up_url, down_url, access_url, access_key, _init_companyId, data_id, lp_tfidf_file_id, lp_mlb_file_id, lp_model_file_id) print('End AD_LP Predict') self.write(lp_result) except Exception as e: # print('请检查参数输入是否正确' + str(e)) # print('==== 错误信息 ====') # print('traceback.print_exc():', traceback.print_exc()) # print('========') Logger.log_ERROR.error("请检查参数输入是否正确:" + str(e)) Logger.log_ERROR.error("错误详细信息:%s" % traceback.print_exc()) # Logger.log_ERROR.error(traceback.print_exc()) endtime = datetime.datetime.now() time_diff = endtime - starttime print('耗时:', time_diff) Logger.log_DEBUG.info("==== use time: %s" % str(time_diff))
def post(self): starttime = datetime.datetime.now() item_id = str(self.get_argument('item_id', '')) # 项目id model_id = str(self.get_argument('model_id', '')) ip = str(self.get_argument('ip', '')) up_url = str(self.get_argument('up_url', '')) down_url = str(self.get_argument('down_url', '')) access_url = str(self.get_argument('access_url', '')) access_key = str(self.get_argument('access_key', '')) _init_companyId = str(self.get_argument('_init_companyId', '')) model_type = str(self.get_argument('model_type', '')) # 模型类型 Logger.log_DEBUG.info("==== 训练接口url获取参数打印 ====") Logger.log_DEBUG.info("item_id: %s" % item_id) Logger.log_DEBUG.info("model_id: %s" % model_id) Logger.log_DEBUG.info("ip: %s" % ip) Logger.log_DEBUG.info("up_url: %s" % up_url) Logger.log_DEBUG.info("down_url: %s" % down_url) Logger.log_DEBUG.info("access_url: %s" % access_url) Logger.log_DEBUG.info("access_key: %s" % access_key) Logger.log_DEBUG.info("_init_companyId: %s" % _init_companyId) Logger.log_DEBUG.info("model_type: %s" % model_type) if model_type == "": model_type = "AD_BR" train_data_id = str(self.get_argument('train_data_id', '')) # 数据id Logger.log_DEBUG.info("train_data_id: %s" % train_data_id) try: if model_type == 'GRU': w2v_size = str(self.get_argument('w2v_size', '')) # 词向量维度 w2v_window = str(self.get_argument('w2v_window', '')) w2v_min_count = str(self.get_argument('w2v_min_count', '')) w2v_negative = str(self.get_argument('w2v_negative', '')) batch_size = str(self.get_argument('batch_size', '')) # batch大小 epochs = str(self.get_argument('epochs', '')) # 迭代次数 max_sequence_length = str( self.get_argument('max_sequence_length', '')) # 最大词个数 num_filter = str(self.get_argument('num_filter', '')) # 过滤器个数 drop_rate = str(self.get_argument('drop_rate', '')) # 衰减率 if w2v_size == "": w2v_size = "300" if w2v_window == "": w2v_window = "5" if w2v_min_count == "": w2v_min_count = "1" if w2v_negative == "": w2v_negative = "5" if batch_size == "": batch_size = "128" if epochs == "": epochs = "40" if max_sequence_length == "": max_sequence_length = "5000" if num_filter == "": num_filter = "128" if drop_rate == "": drop_rate = "0.4" print("Start GRU Training") gru_result = GruModel.gru_train( ip, up_url, down_url, access_url, access_key, _init_companyId, train_data_id, w2v_size, w2v_window, w2v_min_count, w2v_negative, batch_size, epochs, max_sequence_length, num_filter, drop_rate) print("End GRU Training") self.write(gru_result) # self.write('mlb_id:', gru_mlb_id) # self.write('tokenizer_id:', gru_tokenizer_id) # self.write('model_id:', gru_model_id) elif model_type == 'TEXTCNN': w2v_size = str(self.get_argument('w2v_size', '')) # 词向量维度 w2v_window = str(self.get_argument('w2v_window', '')) w2v_min_count = str(self.get_argument('w2v_min_count', '')) w2v_negative = str(self.get_argument('w2v_negative', '')) batch_size = str(self.get_argument('batch_size', '')) # batch大小 epochs = str(self.get_argument('epochs', '')) # 迭代次数 max_sequence_length = str( self.get_argument('max_sequence_length', '')) # 最大词个数 num_filter = str(self.get_argument('num_filter', '')) # 过滤器个数 drop_rate = str(self.get_argument('drop_rate', '')) # 衰减率 if w2v_size == "": w2v_size = "300" if w2v_window == "": w2v_window = "5" if w2v_min_count == "": w2v_min_count = "1" if w2v_negative == "": w2v_negative = "5" if batch_size == "": batch_size = "128" if epochs == "": epochs = "40" if max_sequence_length == "": max_sequence_length = "5000" if num_filter == "": num_filter = "128" if drop_rate == "": drop_rate = "0.4" print("Start TEXTCNN Training") textcnn_result = TextcnnModel.textcnn_train( ip, up_url, down_url, access_url, access_key, _init_companyId, train_data_id, w2v_size, w2v_window, w2v_min_count, w2v_negative, batch_size, epochs, max_sequence_length, num_filter, drop_rate) print("End TEXTCNN Training") self.write(textcnn_result) # self.write('mlb_id:', textcnn_mlb_id) # self.write('tokenizer_id:', textcnn_tokenizer_id) # self.write('model_id:', textcnn_model_id) elif model_type == 'MLKNN': ngram_num = str(self.get_argument('ngram_num', '')) feature_num = str(self.get_argument('feature_num', '')) ml_k = str(self.get_argument('ml_k', '')) ml_s = str(self.get_argument('ml_s', '')) if ngram_num == "": ngram_num = "3" if feature_num == "": feature_num = "8000" if ml_k == "": ml_k = "50" if ml_s == "": ml_s = "1.0" print("Start MLKNN Training") ml_result = MLModel.knn_train(ip, up_url, down_url, access_url, access_key, _init_companyId, train_data_id, ngram_num, feature_num, ml_k, ml_s) print("End MLKNN Training") self.write(ml_result) # self.write('tfidf_id:', knn_tfidf_id) # self.write('mlb_id:', knn_mlb_id) # self.write('model_id:', knn_classifier_id) elif model_type == 'AD_BR': ngram_num = str(self.get_argument('ngram_num', '')) feature_num = str(self.get_argument('feature_num', '')) samples_leaf = str(self.get_argument('samples_leaf', '')) samples_split = str(self.get_argument('samples_split', '')) if ngram_num == "": ngram_num = "3" if feature_num == "": feature_num = "8000" if samples_leaf == "": samples_leaf = "1" if samples_split == "": samples_split = "2" Logger.log_DEBUG.info("ngram_num: %s" % ngram_num) Logger.log_DEBUG.info("feature_num: %s" % feature_num) Logger.log_DEBUG.info("samples_leaf: %s" % samples_leaf) Logger.log_DEBUG.info("samples_split: %s" % samples_split) print("Start AD_BR Training") br_result = MLModel.br_train(ip, up_url, down_url, access_url, access_key, _init_companyId, train_data_id, ngram_num, feature_num, samples_leaf, samples_split) print("End AD_BR Training") self.write(br_result) # self.write('tfidf_id:', br_tfidf_id) # self.write('mlb_id:', br_mlb_id) # self.write('model_id:', br_classifier_id) elif model_type == 'AD_CC': ngram_num = str(self.get_argument('ngram_num', '')) feature_num = str(self.get_argument('feature_num', '')) samples_leaf = str(self.get_argument('samples_leaf', '')) samples_split = str(self.get_argument('samples_split', '')) if ngram_num == "": ngram_num = "3" if feature_num == "": feature_num = "8000" if samples_leaf == "": samples_leaf = "1" if samples_split == "": samples_split = "2" print("Start AD_CC Training") cc_result = MLModel.cc_train(ip, up_url, down_url, access_url, access_key, _init_companyId, train_data_id, ngram_num, feature_num, samples_leaf, samples_split) print("End AD_CC Training") self.write(cc_result) # self.write('tfidf_id:', cc_tfidf_id) # self.write('mlb_id:', cc_mlb_id) # self.write('model_id:', cc_classifier_id) elif model_type == 'AD_LP': ngram_num = str(self.get_argument('ngram_num', '')) feature_num = str(self.get_argument('feature_num', '')) samples_leaf = str(self.get_argument('samples_leaf', '')) samples_split = str(self.get_argument('samples_split', '')) if ngram_num == "": ngram_num = "3" if feature_num == "": feature_num = "8000" if samples_leaf == "": samples_leaf = "1" if samples_split == "": samples_split = "2" print("Start AD_LP Training") lp_result = MLModel.lp_train(ip, up_url, down_url, access_url, access_key, _init_companyId, train_data_id, ngram_num, feature_num, samples_leaf, samples_split) print("End AD_LP Training") self.write(lp_result) # self.write('tfidf_id:', lp_tfidf_id) # self.write('mlb_id:', lp_mlb_id) # self.write('model_id:', lp_classifier_id) except Exception as e: # print('请检查参数输入是否正确' + str(e)) # print('==== 错误信息 ====') # print('traceback.print_exc():', traceback.print_exc()) # print('========') Logger.log_ERROR.error("请检查参数输入是否正确:" + str(e)) Logger.log_ERROR.error("错误详细信息:%s" % traceback.print_exc()) # Logger.log_ERROR.error(traceback.print_exc()) endtime = datetime.datetime.now() time_diff = endtime - starttime print('耗时:', time_diff) Logger.log_DEBUG.info("==== use time: %s" % str(time_diff))