def predict(): # initialize the data dictionary that will be returned from the # view data = {"success": False} # ensure an image was properly uploaded to our endpoint if flask.request.method == "POST": if flask.request.files.get("file"): # read the image in PIL format image = flask.request.files["file"].read() print(f"The image is: {flask.request.files['file']}") image = Image.open(io.BytesIO(image)) # preprocess the image and prepare it for classification image_ = load(image) # classify the input image and then initialize the list # of predictions to return to the client # model = keras.models.load_model('model.model') preds = xray_model.predict(image_) data["success"] = True preds_list = preds.tolist()[0] preds_list[0] = round(preds_list[0] * 100, 2) preds_list[1] = round(preds_list[1] * 100, 2) data["predictions"] = preds_list # return the data dictionary as a JSON response if flask.request.content_type.startswith("application/json"): return flask.jsonify(data) return render_template("result.html", data=data)
def main(): metadata = Metadata() data, labels = preprocessing.load(metadata) data = preprocessing.encode(data, metadata.COLUMNS) # divide data into training and test sets x_train, x_test, y_train, y_test = train_test_split( data, labels, test_size=0.2) #, random_state=33) # run classifiers classifiers clf_base = model.baseline_classifier(x_train, y_train) clf_nb = model.naive_bayes_classifier(x_train, y_train, metadata.COLUMNS) clf_knn = model.knn_classifier(x_train, y_train, metadata.COLUMNS) clf_svm = model.svm_classifier(x_train, y_train, metadata.COLUMNS) # filter best classifier clf = [(clf[1].best_score_, clf) for clf in [('base', clf_base), ('knn', clf_knn), ('svm', clf_svm), ('nb', clf_nb)] ] name, clf = max(clf, key=lambda x: x[0])[1] # predict test set y_pred = clf.predict(x_test) print 'Best classifier: %s' % name print '\taccuracy: %0.3f\n' % accuracy_score(y_test, y_pred) print classification_report(y_test, y_pred)
def cv(k, c, extra=[]): p = 0 r = 0 f1 = 0 p_all = np.zeros(3) r_all = np.zeros(3) f1_all = np.zeros(3) a = 0 data, data_raw = pp.load(True) kf = KFold(data.shape[0], n_folds=k) for train_index, test_index in kf: training_data = data.iloc[train_index, :] raw_training = data_raw.iloc[train_index, :] test_data = data.iloc[test_index, :] raw_test = data_raw.iloc[test_index, :] #print 'train_size: '+str(training_data.shape[0]) #print 'test_size: '+str(test_data.shape[0]) predict, p_, r_, f1_, p_all_, r_all_, f1_all_ = run( training_data, test_data, raw_training, raw_test, c, extra) p += p_ r += r_ f1 += f1_ p_all += p_all_ r_all += r_all_ f1_all += f1_all_ p /= 1.0 * len(kf) r /= 1.0 * len(kf) f1 /= 1.0 * len(kf) p_all /= len(kf) r_all /= len(kf) f1_all /= len(kf) score_set.append(f1) return p, r, f1, p_all, r_all, f1_all
from preprocessing import load, add_title_bad from utilities import split, tfidf_transform if __name__ == '__main__': steam_reviews, nlp, docs = load(r"steam_reviews.csv") add_title_bad(docs, steam_reviews) # Split on user_suggestion X_train, X_test, y_train, y_test = split(docs, steam_reviews.user_suggestion) # TF-IDF and LDA X_train_tfidf, X_test_tfidf, tfidf = tfidf_transform(X_train, X_test)
import sys from training import * from preprocessing import load if __name__ == "__main__": if (sys.argv[1] == '2'): validation_reviews = load('validationreviews.pkl') validation_list = load('validationlist.pkl') test_reviews = load('testreviews.pkl') test_list = load('testlist.pkl') random_list_pos = load('randompos.pkl') random_list_neg = load('randomneg.pkl') review_list = load('reviewlist.pkl') indexlist = load('indexlist.pkl') Tree = tree() Tree.treeconstruct(random_list_pos, random_list_neg, indexlist, review_list) print("total nodes in decision tree initially : " + str(count_nodes(Tree.root))) print("total leaves in decision tree initially : " + str(count_leaves(Tree.root))) h = height(Tree.root) print("height of decision tree initially : " + str(h)) print("accuracy on training set : " + str( accuracy(Tree, (random_list_pos + random_list_neg), review_list))) print("accuracy on test set : " + str(accuracy(Tree, test_list, test_reviews))) indexcount = {} count_splitting_times(Tree.root, indexcount) count = 0
def main(self): t_start = datetime.now() logger.info(' {} / {} '.format(self.name, self.random_seed).center(62, '=')) logger.info('Hyperparameters:\n{}'.format(pprint.pformat(self.params))) if os.path.isfile(os.path.join(self.output_dir, 'test.csv')): logger.info('Output already exists - skipping') # Initialize the random number generator self.random_state = RandomState(self.random_seed) np.random.seed( int.from_bytes(self.random_state.bytes(4), byteorder=sys.byteorder)) preprocessed_data = preprocessing.load(self.params) vectorizer = self.build_vectorizer(preprocessed_data) train_df = common.load_data('train') train_df['comment_text'] = train_df['id'].map(preprocessed_data) test_df = common.load_data('test') test_df['comment_text'] = test_df['id'].map(preprocessed_data) folds = common.stratified_kfold(train_df, random_seed=self.random_seed) for fold_num, train_ids, val_ids in folds: logger.info(f'Fold #{fold_num}') fold_train_df = train_df[train_df['id'].isin(train_ids)] fold_val_df = train_df[train_df['id'].isin(val_ids)] models = self.train(fold_num, vectorizer, fold_train_df, fold_val_df) logger.info('Generating the out-of-fold predictions') path = os.path.join(self.output_dir, f'fold{fold_num}_validation.csv') self.predict(models, vectorizer, fold_val_df, path) logger.info('Generating the test predictions') path = os.path.join(self.output_dir, f'fold{fold_num}_test.csv') self.predict(models, vectorizer, test_df, path) logger.info('Combining the out-of-fold predictions') df_parts = [] for fold_num in range(1, 11): path = os.path.join(self.output_dir, f'fold{fold_num}_validation.csv') df_part = pd.read_csv(path, usecols=['id'] + common.LABELS) df_parts.append(df_part) train_pred = pd.concat(df_parts) path = os.path.join(self.output_dir, 'train.csv') train_pred.to_csv(path, index=False) logger.info('Averaging the test predictions') df_parts = [] for fold_num in range(1, 11): path = os.path.join(self.output_dir, f'fold{fold_num}_test.csv') df_part = pd.read_csv(path, usecols=['id'] + common.LABELS) df_parts.append(df_part) test_pred = pd.concat(df_parts).groupby('id', as_index=False).mean() path = os.path.join(self.output_dir, 'test.csv') test_pred.to_csv(path, index=False) logger.info('Total elapsed time - {}'.format(datetime.now() - t_start))
import numpy as np import preprocessing as pre # --- SVM balanceada tomando a media dos intervalos de 10 em 10 --- data_num = 500 num_int = 2560 #Lendo todos os dados do experimento X, y = pre.load('dataset/', data_num, num_int) #Pegando a media em um numero de 10 intervalos para cada componente X = pre.med_intervalo(X, 10) #Balanceando os dados X, y = pre.proc_balanceado(X, y, data_num) #Separando em conjunto de treino e teste (pego de forma aleatoria, aleatorizando também as variáveis dependentes) X_train, X_test, y_train, y_test = pre.split_data(X, y, 0.2, None) #Padronizando dados X_train, X_test = pre.standardize_data(X_train, X_test) #Implementando a SVM from sklearn.svm import SVC classifier = SVC(kernel='rbf', probability=True, gamma='auto') #Treinando a SVM classifier.fit(X_train, y_train.ravel()) #Prevendo os resultados de teste y_pred = classifier.predict(X_test) svm_predict = classifier.predict_proba(X_test) #Produzindo a confusion matrix da SVM acima from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print('\n\nConfusion Matrix: \n', cm)
import pandas import preprocessing from defines import Types, Metadata def print_pivot_tables(data, metadata, numerical=False): for column in metadata.COLUMNS: if not numerical and column.TYPE is Types.NUMERICAL or column.CATEGORIES is None: continue df_column = pandas.DataFrame(data[column.HEADER]) count = df_column.apply(pandas.value_counts).T count = count.reindex_axis(sorted(count.columns), axis=1) print count, '\n' def print_statistics(data, metadata): for column in metadata.COLUMNS: if column.TYPE is not Types.NUMERICAL or column.CATEGORIES is not None: continue print pandas.DataFrame(data[column.HEADER]).describe(), '\n' if __name__ == '__main__': metadata = Metadata() data, _ = preprocessing.load(metadata) print_pivot_tables(data, metadata, numerical=True) print_statistics(data, metadata)
def generate_data(self, x_data, y_data): # Memory allocation x_gen = np.zeros([ self.batch_size, self.image_height, self.image_width, self.image_depth ]) y_gen = np.zeros([self.batch_size]) while True: # New epoch x_data, y_data = shuffle(x_data, y_data) count_gen = 0 for i in range(len(x_data)): # Generate center + augmentations center = pre.load(x_data[i][0]) center_steer = y_data[i][0] count_gen = self.generate_feature(center, center_steer, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen image, steering = self.flip_feature(center, center_steer) count_gen = self.generate_feature(image, steering, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen image, steering = self.shear_feature(center, center_steer, random.uniform(-25, -15), 0.25) count_gen = self.generate_feature(image, steering, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen image, steering = self.flip_feature(image, steering) count_gen = self.generate_feature(image, steering, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen image, steering = self.shear_feature(center, center_steer, random.uniform(15, 25), 0.25) count_gen = self.generate_feature(image, steering, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen image, steering = self.flip_feature(image, steering) count_gen = self.generate_feature(image, steering, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen # Generate left + augmentations left = pre.load(x_data[i][1]) left_steer = y_data[i][1] count_gen = self.generate_feature(left, left_steer, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen image, steering = self.flip_feature(left, left_steer) count_gen = self.generate_feature(image, steering, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen image, steering = self.shear_feature(left, left_steer, random.uniform(15, 25), 0.5) count_gen = self.generate_feature(image, steering, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen image, steering = self.flip_feature(image, steering) count_gen = self.generate_feature(image, steering, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen # Generate right + augmentations right = pre.load(x_data[i][2]) right_steer = y_data[i][2] count_gen = self.generate_feature(right, right_steer, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen image, steering = self.flip_feature(right, right_steer) count_gen = self.generate_feature(image, steering, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen image, steering = self.shear_feature(right, right_steer, random.uniform(-25, -15), 0.5) count_gen = self.generate_feature(image, steering, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen image, steering = self.flip_feature(image, steering) count_gen = self.generate_feature(image, steering, x_gen, y_gen, count_gen) if count_gen == 0: yield x_gen, y_gen # Yield remainder if count_gen > 0: yield x_gen, y_gen
#This section describes the main algorithm for training the data from sklearn.utils import shuffle from sklearn.model_selection import train_test_split from tensorflow.keras.callbacks import EarlyStopping import preprocessing import numpy as np import model #preprocessing call, after this step, features and labels will be extracted out of the raw data features, labels = preprocessing.load() print("Feature shape:", np.asarray(features).shape) print("Label shape:", np.asarray(labels).shape) # Shuffle up the different moves #features, labels = shuffle(features, labels) #Train-Test Split with 7:3 Ratio/ Cross validation if there is not enough data #features, labels = shuffle(features, labels) # X_train, X_val, X_test = features[:int(len(features)*0.5), :], features[int(len(features)*0.5):int(len(features)*0.75), :], features[int(len(features)*0.75):, :] # y_train, y_val, y_test = labels[:int(len(labels)*0.5), :], labels[int(len(labels)*0.5):int(len(labels)*0.75), :], labels[int(len(labels)*0.75):, :] np.random.seed(1) X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.25, random_state=1) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.33, random_state=1) #One Hot Encoding labels
def load_preprocessed_data(self): preprocessed_data = preprocessing.load(self.params) return preprocessed_data
from sklearn.metrics import confusion_matrix import matplotlib.pyplot as plt import time import pandas as pd from sklearn.naive_bayes import BernoulliNB from sklearn.decomposition import PCA import re import json from sklearn.discriminant_analysis import LinearDiscriminantAnalysis base_directory = "F:\\code\\python\\lvtn\\" score_set = [] # 0:NEG, 1:NEU, 2:POS training_data, test_data, raw_training, raw_test, raw = pp.load() socal = pd.read_csv(base_directory + "so-cal.csv") #x = socal[socal.socal<0]['socal'] - 1 #socal.loc[socal.socal<0, 'socal'] = x pca = LinearDiscriminantAnalysis() pca.fit(socal['socal'].reshape(-1, 1), socal['lab']) socal['y'] = pca.transform(socal['socal'].reshape(-1, 1)) def training_ngram(corpus, min_df=3): vectorizer = CountVectorizer( min_df=3, decode_error="ignore", analyzer="word", lowercase=True,