Example #1
0
 def validate(self, X, Y, k, isLogisticRegression, learningRate):
     (row, col) = X.shape
     numOfSection = int(row / k)
     totalAcc = 0.0
     for a in range(0, k):
         i = a * numOfSection
         Xbefore = []
         Ybefore = []
         if i == 0:
             Xbefore = X[0:0, 0:0]
             Ybefore = Y[0:0]
         else:
             Xbefore = X[0:i - 1, 0:col]
             Ybefore = Y[0:i - 1]
         subX = X[i:i + numOfSection - 1, 0:col]
         subY = Y[i:i + numOfSection - 1]
         Xafter = X[i + numOfSection:row, 0:col]
         Yafter = Y[i + numOfSection:row]
         XConc = []
         YConc = []
         if i == 0:
             Xconc = Xafter
             Yconc = Yafter
         else:
             Xconc = np.concatenate((Xbefore, Xafter))
             Yconc = np.concatenate((Ybefore, Yafter))
         #Use to test the accuracy value from sci-kit
         model = None
         if (isLogisticRegression):
             print("Start training using logistic regression")
             model = MLModel.LogisticRegressionModel(
                 Xconc, Yconc, subX, subY)
             model.fit(learningRate)
         else:
             print("Start training using gaussian naive bayes")
             model = MLModel.GaussianNaiveBayes(Xconc, Yconc, subX, subY)
             model.fit()
         targetY = model.predict(subX)
         acc = model.evaluate_acc(subY, targetY)
         #print("Accuracy is " + str(acc))
         totalAcc = totalAcc + acc
     return (totalAcc / float(k))
Example #2
0
 def getTimeOfEvaluation(self, X, Y):
     plotX = []
     plotY = []
     i = 0
     while (i < 8):
         plotX.append(i)
         startTime = time.time()
         print("Start training using logistic regression")
         model = MLModel.LogisticRegressionModel(X, Y, X, Y)
         model.fit(0.1**(7 - i))
         timeUsed = time.time() - startTime
         plotY.append(timeUsed)
         i = i + 1
     plt.plot(plotX, plotY, c='r', label='logistic regression')
     plt.xlabel("i such that learningRate = 0.1^(7-i)")
     plt.ylabel("RunningTime(second)")
     plt.title("Performance of LogisticRegression based on learningRate")
     plt.show()
Example #3
0
def test_model_weights():

    ## dataset = di.driving_data()
    # q_train, q_test, qadv_train, qadv_test, qphys_train, qphys_test, t_train, t_test, tadv_train, tadv_test, tphys_train, tphys_test = dataset[0], dataset[1], dataset[2], dataset[3], dataset[4], dataset[5], dataset[6], dataset[7]
    # Load testing data
    train_test_datadir = "/project/spice/radiation/ML/CRM/data/models/"
    dataset = np.load(train_test_datadir + 'train_test_data.npz')
    qadv_test = dataset['qadv_test']
    q_test = dataset['q_test']
    qphys_test = dataset['qphys_test']

    checkpoint_dir = '/project/spice/radiation/ML/CRM/data/models/chkpts_keras/'
    latest = tf.train.latest_checkpoint(checkpoint_dir)

    # model = tf.keras.Sequential([
    #     keras.layers.Dense(128, activation='relu', input_shape=(70,)),
    #     keras.layers.Dense(256, activation='relu'),
    #     keras.layers.Dense(128, activation='relu'),
    #     keras.layers.Dense(70, activation='tanh')
    #     ])
    model = mlm.get_model()

    loss = tf.keras.losses.MeanSquaredError()
    optimizer = tf.keras.optimizers.Adam()

    model.compile(optimizer=optimizer,
                  loss=loss,
                  metrics=[tf.keras.metrics.Accuracy()])
    model.load_weights(latest)
    #qphys_normaliser = joblib.load('/project/spice/radiation/ML/CRM/data/models/normaliser/std_qphy.joblib')
    qphys_normaliser = joblib.load(
        '/project/spice/radiation/ML/CRM/data/models/normaliser/minmax_qphys.joblib'
    )
    qphys_predict = model.predict(q_test)
    qphys_predict_denorm = qphys_normaliser.inverse_transform(qphys_predict)
    qphys_test_denorm = qphys_normaliser.inverse_transform(qphys_test)
    np.savez('qqphys_predict_wts',
             qphys_predict=qphys_predict_denorm,
             qphys_test=qphys_test_denorm,
             qphys_test_norm=qphys_test)
Example #4
0
def create_and_train():

    #q_train, q_test, qadv_train, qadv_test, qphys_train, qphys_test, t_train, t_test, tadv_train, tadv_test, tphys_train, tphys_test = dataset[0], dataset[1], dataset[2], dataset[3], dataset[4], dataset[5], dataset[6], dataset[7], dataset[8], dataset[9], dataset[10], dataset[11]
    train_test_datadir = "/project/spice/radiation/ML/CRM/data/models/"
    dataset = np.load(train_test_datadir + 'train_test_data.npz')
    qadv_train = dataset['qadv_train']
    q_train = dataset['q_train']
    qphys_train = dataset['qphys_train']
    qphys_test = dataset['qphys_test']
    q_test = dataset['q_test']
    # model = tf.keras.Sequential([
    #     keras.layers.Dense(128, activation='relu', input_shape=(70,)),
    #     keras.layers.Dense(256, activation='relu'),
    #     keras.layers.Dense(128, activation='relu'),
    #     keras.layers.Dense(70, activation='tanh')
    #     ])
    model = mlm.get_model()
    loss = tf.keras.losses.MeanSquaredError()
    optimizer = tf.keras.optimizers.Adam()

    model.compile(optimizer=optimizer,
                  loss=loss,
                  metrics=[tf.keras.metrics.Accuracy()])
    batch = 100
    n_epoch = 20
    # history = model.fit(qadv_train, qphys_train,epochs=n_epoch,batch_size=batch, validation_data=(qadv_test,qphys_test))
    history = model.fit(q_train,
                        qphys_train,
                        epochs=n_epoch,
                        batch_size=batch,
                        validation_data=(q_test, qphys_test))
    model_fname = "model_q_epoch_{0}".format(n_epoch)
    model.save('/project/spice/radiation/ML/CRM/data/models/' + model_fname +
               '.h5')
    model.save_weights(
        '/project/spice/radiation/ML/CRM/data/models/chkpts_keras/' +
        model_fname)
    pickle_file = '/project/spice/radiation/ML/CRM/data/models/' + model_fname + '.history'
    pickle.dump(history.history, open(pickle_file, 'wb'))
    dataset.close()
Example #5
0
                    X_2.append(np.concatenate((sample_spectrum, hit_spectrum)))
                    Y_2.append(gt)

            indicies = []
            for i in range(len(X_2)):
                indicies.append(i)

            X_train_2, X_test_2, y_train_2, y_test_2, indicies_train2, indicies_test2 = train_test_split(
                X_2, Y_2, indicies, test_size=0.20, random_state=100)

            for i in X_train_2:
                X.append(i)
            for i in y_train_2:
                Y.append(i)

            dnn = ml.DNNModel(args['-s'])

            X = np.array(X)
            Y = np.array(Y)
            i_class0 = np.where(Y == 0)[0]
            i_class2 = np.where(Y == 2)[0]
            n_class0 = len(i_class0)
            n_class2 = len(i_class2)
            if n_class0 > n_class2:
                i_class0_downsampled = np.random.choice(i_class0,
                                                        size=n_class2,
                                                        replace=False)
                y = np.concatenate((Y[i_class0_downsampled], Y[i_class2]))
                x_train = np.concatenate(
                    (X[i_class0_downsampled], X[i_class2]))
            if n_class2 > n_class0:
    def post(self):
        starttime = datetime.datetime.now()

        item_id = str(self.get_argument('item_id', ''))  # 项目id
        model_id = str(self.get_argument('model_id', ''))

        ip = str(self.get_argument('ip', ''))
        up_url = str(self.get_argument('up_url', ''))
        down_url = str(self.get_argument('down_url', ''))
        access_url = str(self.get_argument('access_url', ''))
        access_key = str(self.get_argument('access_key', ''))
        _init_companyId = str(self.get_argument('_init_companyId', ''))

        model_type = str(self.get_argument('model_type', ''))  # 模型类型
        data_id = str(self.get_argument('data_id', ''))  # 数据id

        Logger.log_DEBUG.info("==== 识别接口url获取参数打印 ====")
        Logger.log_DEBUG.info("item_id: %s" % item_id)
        Logger.log_DEBUG.info("model_id: %s" % model_id)

        Logger.log_DEBUG.info("ip: %s" % ip)
        Logger.log_DEBUG.info("up_url: %s" % up_url)
        Logger.log_DEBUG.info("down_url: %s" % down_url)
        Logger.log_DEBUG.info("access_url: %s" % access_url)
        Logger.log_DEBUG.info("access_key: %s" % access_key)
        Logger.log_DEBUG.info("_init_companyId: %s" % _init_companyId)
        Logger.log_DEBUG.info("model_type: %s" % model_type)
        Logger.log_DEBUG.info("data_id: %s" % data_id)

        if model_type == "":
            model_type = "AD_BR"

        try:
            if model_type == 'GRU':
                gru_mlb_file_id = str(self.get_argument('mlb_file_id',
                                                        ''))  # mlb模型id
                gru_tokenizer_file_id = str(
                    self.get_argument('tokenizer_file_id',
                                      ''))  # tokenizer模型id
                gru_model_file_id = str(self.get_argument('model_file_id',
                                                          ''))  # 模型id
                max_sequence_length = str(
                    self.get_argument('max_sequence_length', ''))  # 最大词个数
                batch_size = str(self.get_argument('batch_size',
                                                   ''))  # batch大小

                if max_sequence_length == "":
                    max_sequence_length = "5000"
                if batch_size == "":
                    batch_size = "128"

                print("Start GRU Predict")
                gru_result = GruModel.gru_predict(
                    ip, up_url, down_url, access_url, access_key,
                    _init_companyId, data_id, gru_mlb_file_id,
                    gru_tokenizer_file_id, gru_model_file_id,
                    max_sequence_length, batch_size)
                print("End GRU Predict")
                self.write(gru_result)

            elif model_type == 'TEXTCNN':
                textcnn_mlb_file_id = str(self.get_argument('mlb_file_id',
                                                            ''))  # mlb模型id
                textcnn_tokenizer_file_id = str(
                    self.get_argument('tokenizer_file_id',
                                      ''))  # tokenizer模型id
                textcnn_model_file_id = str(
                    self.get_argument('model_file_id', ''))  # 模型id
                max_sequence_length = str(
                    self.get_argument('max_sequence_length', ''))  # 最大词个数
                batch_size = str(self.get_argument('batch_size',
                                                   ''))  # batch大小

                if max_sequence_length == "":
                    max_sequence_length = "5000"
                if batch_size == "":
                    batch_size = "128"

                print("Start TEXTCNN Predict")
                textcnn_result = TextcnnModel.textcnn_predict(
                    ip, up_url, down_url, access_url, access_key,
                    _init_companyId, data_id, textcnn_mlb_file_id,
                    textcnn_tokenizer_file_id, textcnn_model_file_id,
                    max_sequence_length, batch_size)
                print("End TEXTCNN Predict")
                self.write(textcnn_result)

            elif model_type == 'MLKNN':
                knn_tfidf_file_id = str(self.get_argument('tfidf_file_id', ''))
                knn_mlb_file_id = str(self.get_argument('mlb_file_id', ''))
                knn_model_file_id = str(self.get_argument('model_file_id', ''))

                print('Start MLKNN Predict')
                ml_result = MLModel.knn_predict(ip, up_url, down_url,
                                                access_url, access_key,
                                                _init_companyId, data_id,
                                                knn_tfidf_file_id,
                                                knn_mlb_file_id,
                                                knn_model_file_id)
                print('End MLKNN Predict')
                self.write(ml_result)

            elif model_type == 'AD_BR':
                br_tfidf_file_id = str(self.get_argument('tfidf_file_id', ''))
                br_mlb_file_id = str(self.get_argument('mlb_file_id', ''))
                br_model_file_id = str(self.get_argument('model_file_id', ''))

                print('Start AD_BR Predict')
                br_result = MLModel.br_predict(ip, up_url, down_url,
                                               access_url, access_key,
                                               _init_companyId, data_id,
                                               br_tfidf_file_id,
                                               br_mlb_file_id,
                                               br_model_file_id)
                print('End AD_BR Predict')
                self.write(br_result)

            elif model_type == 'AD_CC':
                cc_tfidf_file_id = str(self.get_argument('tfidf_file_id', ''))
                cc_mlb_file_id = str(self.get_argument('mlb_file_id', ''))
                cc_model_file_id = str(self.get_argument('model_file_id', ''))

                print('Start AD_CC Predict')
                cc_result = MLModel.cc_predict(ip, up_url, down_url,
                                               access_url, access_key,
                                               _init_companyId, data_id,
                                               cc_tfidf_file_id,
                                               cc_mlb_file_id,
                                               cc_model_file_id)
                print('End AD_CC Predict')
                self.write(cc_result)

            elif model_type == 'AD_LP':
                lp_tfidf_file_id = str(self.get_argument('tfidf_file_id', ''))
                lp_mlb_file_id = str(self.get_argument('mlb_file_id', ''))
                lp_model_file_id = str(self.get_argument('model_file_id', ''))

                print('Start AD_LP Predict')
                lp_result = MLModel.lp_predict(ip, up_url, down_url,
                                               access_url, access_key,
                                               _init_companyId, data_id,
                                               lp_tfidf_file_id,
                                               lp_mlb_file_id,
                                               lp_model_file_id)
                print('End AD_LP Predict')
                self.write(lp_result)

        except Exception as e:
            # print('请检查参数输入是否正确' + str(e))
            # print('==== 错误信息 ====')
            # print('traceback.print_exc():', traceback.print_exc())
            # print('========')
            Logger.log_ERROR.error("请检查参数输入是否正确:" + str(e))
            Logger.log_ERROR.error("错误详细信息:%s" % traceback.print_exc())
            # Logger.log_ERROR.error(traceback.print_exc())

        endtime = datetime.datetime.now()
        time_diff = endtime - starttime
        print('耗时:', time_diff)
        Logger.log_DEBUG.info("==== use time: %s" % str(time_diff))
    def post(self):
        starttime = datetime.datetime.now()

        item_id = str(self.get_argument('item_id', ''))  # 项目id
        model_id = str(self.get_argument('model_id', ''))

        ip = str(self.get_argument('ip', ''))
        up_url = str(self.get_argument('up_url', ''))
        down_url = str(self.get_argument('down_url', ''))
        access_url = str(self.get_argument('access_url', ''))
        access_key = str(self.get_argument('access_key', ''))
        _init_companyId = str(self.get_argument('_init_companyId', ''))

        model_type = str(self.get_argument('model_type', ''))  # 模型类型

        Logger.log_DEBUG.info("==== 训练接口url获取参数打印 ====")
        Logger.log_DEBUG.info("item_id: %s" % item_id)
        Logger.log_DEBUG.info("model_id: %s" % model_id)

        Logger.log_DEBUG.info("ip: %s" % ip)
        Logger.log_DEBUG.info("up_url: %s" % up_url)
        Logger.log_DEBUG.info("down_url: %s" % down_url)
        Logger.log_DEBUG.info("access_url: %s" % access_url)
        Logger.log_DEBUG.info("access_key: %s" % access_key)
        Logger.log_DEBUG.info("_init_companyId: %s" % _init_companyId)
        Logger.log_DEBUG.info("model_type: %s" % model_type)

        if model_type == "":
            model_type = "AD_BR"
        train_data_id = str(self.get_argument('train_data_id', ''))  # 数据id

        Logger.log_DEBUG.info("train_data_id: %s" % train_data_id)

        try:
            if model_type == 'GRU':
                w2v_size = str(self.get_argument('w2v_size', ''))  # 词向量维度
                w2v_window = str(self.get_argument('w2v_window', ''))
                w2v_min_count = str(self.get_argument('w2v_min_count', ''))
                w2v_negative = str(self.get_argument('w2v_negative', ''))
                batch_size = str(self.get_argument('batch_size',
                                                   ''))  # batch大小
                epochs = str(self.get_argument('epochs', ''))  # 迭代次数
                max_sequence_length = str(
                    self.get_argument('max_sequence_length', ''))  # 最大词个数
                num_filter = str(self.get_argument('num_filter', ''))  # 过滤器个数
                drop_rate = str(self.get_argument('drop_rate', ''))  # 衰减率

                if w2v_size == "":
                    w2v_size = "300"
                if w2v_window == "":
                    w2v_window = "5"
                if w2v_min_count == "":
                    w2v_min_count = "1"
                if w2v_negative == "":
                    w2v_negative = "5"
                if batch_size == "":
                    batch_size = "128"
                if epochs == "":
                    epochs = "40"
                if max_sequence_length == "":
                    max_sequence_length = "5000"
                if num_filter == "":
                    num_filter = "128"
                if drop_rate == "":
                    drop_rate = "0.4"

                print("Start GRU Training")
                gru_result = GruModel.gru_train(
                    ip, up_url, down_url, access_url, access_key,
                    _init_companyId, train_data_id, w2v_size, w2v_window,
                    w2v_min_count, w2v_negative, batch_size, epochs,
                    max_sequence_length, num_filter, drop_rate)
                print("End GRU Training")
                self.write(gru_result)
                # self.write('mlb_id:', gru_mlb_id)
                # self.write('tokenizer_id:', gru_tokenizer_id)
                # self.write('model_id:', gru_model_id)

            elif model_type == 'TEXTCNN':
                w2v_size = str(self.get_argument('w2v_size', ''))  # 词向量维度
                w2v_window = str(self.get_argument('w2v_window', ''))
                w2v_min_count = str(self.get_argument('w2v_min_count', ''))
                w2v_negative = str(self.get_argument('w2v_negative', ''))
                batch_size = str(self.get_argument('batch_size',
                                                   ''))  # batch大小
                epochs = str(self.get_argument('epochs', ''))  # 迭代次数
                max_sequence_length = str(
                    self.get_argument('max_sequence_length', ''))  # 最大词个数
                num_filter = str(self.get_argument('num_filter', ''))  # 过滤器个数
                drop_rate = str(self.get_argument('drop_rate', ''))  # 衰减率

                if w2v_size == "":
                    w2v_size = "300"
                if w2v_window == "":
                    w2v_window = "5"
                if w2v_min_count == "":
                    w2v_min_count = "1"
                if w2v_negative == "":
                    w2v_negative = "5"
                if batch_size == "":
                    batch_size = "128"
                if epochs == "":
                    epochs = "40"
                if max_sequence_length == "":
                    max_sequence_length = "5000"
                if num_filter == "":
                    num_filter = "128"
                if drop_rate == "":
                    drop_rate = "0.4"

                print("Start TEXTCNN Training")
                textcnn_result = TextcnnModel.textcnn_train(
                    ip, up_url, down_url, access_url, access_key,
                    _init_companyId, train_data_id, w2v_size, w2v_window,
                    w2v_min_count, w2v_negative, batch_size, epochs,
                    max_sequence_length, num_filter, drop_rate)
                print("End TEXTCNN Training")
                self.write(textcnn_result)
                # self.write('mlb_id:', textcnn_mlb_id)
                # self.write('tokenizer_id:', textcnn_tokenizer_id)
                # self.write('model_id:', textcnn_model_id)

            elif model_type == 'MLKNN':
                ngram_num = str(self.get_argument('ngram_num', ''))
                feature_num = str(self.get_argument('feature_num', ''))
                ml_k = str(self.get_argument('ml_k', ''))
                ml_s = str(self.get_argument('ml_s', ''))

                if ngram_num == "":
                    ngram_num = "3"
                if feature_num == "":
                    feature_num = "8000"
                if ml_k == "":
                    ml_k = "50"
                if ml_s == "":
                    ml_s = "1.0"

                print("Start MLKNN Training")
                ml_result = MLModel.knn_train(ip, up_url, down_url, access_url,
                                              access_key, _init_companyId,
                                              train_data_id, ngram_num,
                                              feature_num, ml_k, ml_s)
                print("End MLKNN Training")
                self.write(ml_result)
                # self.write('tfidf_id:', knn_tfidf_id)
                # self.write('mlb_id:', knn_mlb_id)
                # self.write('model_id:', knn_classifier_id)

            elif model_type == 'AD_BR':
                ngram_num = str(self.get_argument('ngram_num', ''))
                feature_num = str(self.get_argument('feature_num', ''))
                samples_leaf = str(self.get_argument('samples_leaf', ''))
                samples_split = str(self.get_argument('samples_split', ''))

                if ngram_num == "":
                    ngram_num = "3"
                if feature_num == "":
                    feature_num = "8000"
                if samples_leaf == "":
                    samples_leaf = "1"
                if samples_split == "":
                    samples_split = "2"

                Logger.log_DEBUG.info("ngram_num: %s" % ngram_num)
                Logger.log_DEBUG.info("feature_num: %s" % feature_num)
                Logger.log_DEBUG.info("samples_leaf: %s" % samples_leaf)
                Logger.log_DEBUG.info("samples_split: %s" % samples_split)

                print("Start AD_BR Training")
                br_result = MLModel.br_train(ip, up_url, down_url, access_url,
                                             access_key, _init_companyId,
                                             train_data_id, ngram_num,
                                             feature_num, samples_leaf,
                                             samples_split)
                print("End AD_BR Training")
                self.write(br_result)
                # self.write('tfidf_id:', br_tfidf_id)
                # self.write('mlb_id:', br_mlb_id)
                # self.write('model_id:', br_classifier_id)

            elif model_type == 'AD_CC':
                ngram_num = str(self.get_argument('ngram_num', ''))
                feature_num = str(self.get_argument('feature_num', ''))
                samples_leaf = str(self.get_argument('samples_leaf', ''))
                samples_split = str(self.get_argument('samples_split', ''))

                if ngram_num == "":
                    ngram_num = "3"
                if feature_num == "":
                    feature_num = "8000"
                if samples_leaf == "":
                    samples_leaf = "1"
                if samples_split == "":
                    samples_split = "2"

                print("Start AD_CC Training")
                cc_result = MLModel.cc_train(ip, up_url, down_url, access_url,
                                             access_key, _init_companyId,
                                             train_data_id, ngram_num,
                                             feature_num, samples_leaf,
                                             samples_split)
                print("End AD_CC Training")
                self.write(cc_result)
                # self.write('tfidf_id:', cc_tfidf_id)
                # self.write('mlb_id:', cc_mlb_id)
                # self.write('model_id:', cc_classifier_id)

            elif model_type == 'AD_LP':
                ngram_num = str(self.get_argument('ngram_num', ''))
                feature_num = str(self.get_argument('feature_num', ''))
                samples_leaf = str(self.get_argument('samples_leaf', ''))
                samples_split = str(self.get_argument('samples_split', ''))

                if ngram_num == "":
                    ngram_num = "3"
                if feature_num == "":
                    feature_num = "8000"
                if samples_leaf == "":
                    samples_leaf = "1"
                if samples_split == "":
                    samples_split = "2"

                print("Start AD_LP Training")
                lp_result = MLModel.lp_train(ip, up_url, down_url, access_url,
                                             access_key, _init_companyId,
                                             train_data_id, ngram_num,
                                             feature_num, samples_leaf,
                                             samples_split)
                print("End AD_LP Training")
                self.write(lp_result)
                # self.write('tfidf_id:', lp_tfidf_id)
                # self.write('mlb_id:', lp_mlb_id)
                # self.write('model_id:', lp_classifier_id)

        except Exception as e:
            # print('请检查参数输入是否正确' + str(e))
            # print('==== 错误信息 ====')
            # print('traceback.print_exc():', traceback.print_exc())
            # print('========')
            Logger.log_ERROR.error("请检查参数输入是否正确:" + str(e))
            Logger.log_ERROR.error("错误详细信息:%s" % traceback.print_exc())
            # Logger.log_ERROR.error(traceback.print_exc())

        endtime = datetime.datetime.now()
        time_diff = endtime - starttime
        print('耗时:', time_diff)
        Logger.log_DEBUG.info("==== use time: %s" % str(time_diff))