def init_model(workflow, client): global train_start_time global train_end_time initReq = workflow + "#" + client name, _, __ = requestHandler.parseReq(initReq, "fwf") print("whrkflow name : " + name) if name not in model.keys(): _model = support_vector_machine() model[name] = _model if name not in preprocessor.keys(): _preprocessor = DataPreprocessor() preprocessor[name] = _preprocessor train_start_time = time.time() dataX, datay = preprocessor[name].getData(workflow + "_" + client) trainX, testX, trainy, testy = train_test_split(dataX, datay, test_size=0.2) model[name].train(trainX, trainy) train_end_time = time.time() score = model[name].score(testX, testy) print("test score : " + str(score))
def build_model(): ''' request body{ name: model name, path: path to model } ''' jsondata = request.get_json() name = jsondata["name"] if name not in model.keys(): _model = support_vector_machine() model[name] = _model if name not in preprocessor.keys(): _preprocessor = DataPreprocessor() preprocessor[name] = _preprocessor dataX, datay = DataPreprocessor().getData() trainX, testX, trainy, testy = train_test_split(dataX, datay, test_size=0.2) model[name].train(trainX, trainy) score = model[name].score(testX, testy) # payload = {"test R square": str(score), # "result": "success", # } return str(score)
def build_model(): ''' request body{ name: model name, path: path to model } ''' global model global preprocessor global requestHandler global train_start_time global train_end_time jsondata = request.get_json() app.logger.info("json data : " + str(jsondata)) name, _, __ = requestHandler.parseReq(jsondata, "nwf") # name, _, __ = requestHandler.parseReq(initReq, "fwf") # name = jsondata["workflow"]+"#"+jsondata["client_name"] _client = jsondata["client_name"] _workflow = jsondata["workflow"] client = _workflow + "_" + _client if name not in model.keys(): _model = support_vector_machine() model[name] = _model app.logger.info("name key error ") if name not in preprocessor.keys(): _preprocessor = DataPreprocessor() preprocessor[name] = _preprocessor app.logger.info("name key error ") train_start_time = time.time() app.logger.info("model start training") dataX, datay = DataPreprocessor().getData(client) trainX, testX, trainy, testy = train_test_split(dataX, datay, test_size=0.2) model[name].train(trainX, trainy) app.logger.info("training success") score = model[name].score(testX, testy) train_end_time = time.time() # payload = {"test R square": str(score), # "result": "success", # } res = {"start_time": train_start_time, "end_time": train_end_time} return jsonify(res)
mnist_training, mnist_validation, mnist_testing, USPS_mat, USPS_target) softmax_score_mnist = accuracy_score(mnist_testing[1], y_test_soft) cm = confusion_matrix(mnist_testing[1], y_test_soft) print("Accuracy for MNIST Softmax: {}".format(softmax_score_mnist)) print("Confusion Matrix Softmax: \n{}".format(cm)) softmax_score_usps = accuracy_score(USPS_target, usps_y_soft) cm = confusion_matrix(USPS_target, usps_y_soft) print("Accuracy for MNIST Softmax: {}".format(softmax_score_usps)) print("Confusion Matrix Softmax: \n{}".format(cm)) print("Done with Logistic") print("Starting SVM") y_mnist_svm, y_usps_svm = svm.support_vector_machine(mnist_training, mnist_testing, USPS_mat, USPS_target) # ##Uncomment to run SVM with Gamma = 1 # print("\n\tSVM with Gamma value set to 1") # y_mnist_svm_1, y_usps_svm_1 = svm.support_vector_machine(mnist_training, mnist_testing, USPS_mat, USPS_target, gamma=1) print("Done with SVM") print("Starting Random Forest") y_mnist_rf, y_usps_rf = random_forest.random_forest_implementation( mnist_training, mnist_testing, USPS_mat, USPS_target) print("Done with Random Forest") print("Starting Neural Network Classifier") y_pred_mnist_nn, y_pred_usps_nn = neural_network_classifier.neural_net_implementation( mnist_training, mnist_testing, USPS_mat, USPS_target)
def main(): directory = "../Data/training_data" test_dir = "../Data/testing_data" test_features = [] test_uni = [] test_bi = [] test_labels = [] feature_vectors = [] labels = [] temp_features = [] temp_labels = [] unigrams = [] bigrams = [] feature_index = 0 for filename in os.listdir(directory): if filename.endswith(".txt"): f = fileinput.input(os.path.join(directory, filename)) for line in f: """ Stripping new line """ line = line.rstrip('\n') line = line.replace(string.punctuation.replace('#', ''), '') line = " ".join(line.split()) modified_line = line.replace('#L', '') tokens = modified_line.split() tag = nltk.pos_tag(tokens) for i in range(len(tag)): if tag[i][0] not in pos_tag_dict: modified_tag = pos_tag_label(tag[i][1]) pos_tag_dict[tag[i][0]] = modified_tag """ Splitting on delimiter. Better to include stopwords """ for word in line.split(): if word == "" or word == "#L": continue unigrams.append(word) for filename in os.listdir(test_dir): if filename.endswith(".txt"): f = fileinput.input(os.path.join(test_dir, filename)) for line in f: """ Stripping new line """ line = line.rstrip('\n') line = line.replace(string.punctuation.replace('#', ''), '') modified_line = line.replace('#L', '') tokens = modified_line.split() tag = nltk.pos_tag(tokens) for i in range(len(tag)): if tag[i][0] not in pos_tag_dict: modified_tag = pos_tag_label(tag[i][1]) pos_tag_dict[tag[i][0]] = modified_tag """ Splitting on delimiter. Better to include stopwords """ for word in line.split(): if word == "" or word == "#L": continue test_uni.append(word) """ Only form bigrams with unigrams that are not punctuations """ for i in range(len(unigrams) - 1): bigrams.append(unigrams[i] + " " + unigrams[i + 1]) mod_bigram = bigrams[-1].replace("#L", "") pos_tag_dict[mod_bigram] = 0 all_words = unigrams + bigrams """ doing nltk pos_tagging """ labels = gen_labels(all_words) # print("Training") # print(collections.Counter(labels)) features = generating_features(all_words, feature_index, feature_vectors, labels, prepositions, stop_words, location_tenders, substr, followed_by, nonlocation_nouns) """ Only form bigrams with unigrams that are not punctuations """ for i in range(len(test_uni) - 1): test_bi.append(test_uni[i] + " " + test_uni[i + 1]) mod_bigram = test_bi[-1].replace("#L", "") pos_tag_dict[mod_bigram] = 0 test_words = test_uni + test_bi test_labels = gen_labels(test_words) # print("Testing") # print(collections.Counter(test_labels)) test_features = generating_features(test_words, feature_index, temp_features, test_labels, prepositions, stop_words, location_tenders, substr, followed_by, nonlocation_nouns) decision_tree_classifier(features, labels, test_features, test_labels) random_forest(features, labels, test_features, test_labels) support_vector_machine(features, labels, test_features, test_labels) logistic_regression(features, labels, test_features, test_labels) linear_regression(features, labels, test_features, test_labels)
if (lca_flag): print("Doing LCA") train = lca(tf_idf_combine, tf_idf_training_matrix) test = lca(tf_idf_combine, tf_idf_testing_matrix) elif (pca_flag): print("Doing PCA") train = pca_preprocess(tf_idf_combine, tf_idf_training_matrix) test = pca_preprocess(tf_idf_combine, tf_idf_testing_matrix) else: print("Doing No Reduction") train = tf_idf_training_matrix test = tf_idf_testing_matrix if (svm_flag): print("Doing SVM") ans = svm.support_vector_machine(train, training_label, test, 0.01) elif (adaboo_flag): print("Doing Adaboost") ans = adaboost.adaboo(train, training_label, test) elif (rf_flag): print("Doing RF") ans = randomForest.r_forest(training_matrix, training_label, testing_matrix) elif (nb_flag): print("Doing Naive Bayes") ans = naiveBayes.nb(training_matrix, training_label, testing_matrix) elif (bagging_flag): print("Doing Bagging") ans = baggingClassifier.bagging(training_matrix, training_label, testing_matrix) else: print("Invalid argument specified")