def _predict(): results = [] for file in glob.glob('input/*.jpg'): for guess in predict(file): results.append(str(guess)) return results
def classify(train_data, train_target, test_data, test_target, n_pca=49, n_neighbors=4, display=False): test_data_original = test_data train_data = pp.standardized_augmentation(train_data, display=False) test_data = pp.standardized_augmentation(test_data, display=False) train_data = pp.histogram_of_oriented_gradients(train_data) if display: plt.figure() image_helpers.show_image(data[0]) plt.savefig(path.figure + 'pp/hog.pdf', format='pdf', dpi=1000) plt.draw() pca = PCA(n_components=n_pca) pca_model = pca.fit(train_data) pca_train = pca_model.transform(train_data) pca_test = pca_model.transform(test_data) knn_model = kNN.find_nearest_neighbor(pca_train, train_target, n_neighbors=n_neighbors) prediction = kNN.predict(pca_test, knn_model) error = calculate_error(prediction, test_target) five_Examples(test_data_original, prediction, test_target) return error
def get(self): # use parser and find the user's query args = parser.parse_args() user_query = args['query'] prediction = predict(user_query) output = {'user_query': user_query, 'prediction': prediction} return output
def save_keypoints(keypoints, frame): decisions = [] for index, kp in enumerate(keypoints): x, y = kp.pt size = kp.size frame = frame[ int(y)-int(size//2):int(y)+int(size//2), int(x)-int(size//2):int(x)+int(size//2), ] # cv2.imwrite('images/%s.png' % (random.getrandbits(128)), frame) print(frame.shape) if all(frame.shape): decisions.append(predict(frame)) print(decisions) return decisions
def upload_file(): ''' Routing for the upload request, which checks the file's validation and save the image to the 'upload' folder. ''' if request.method == 'POST': if 'file' not in request.files: flash('There is no file uploaded') return render_template('index.html', warning_msg='There is no file uploaded') file = request.files['file'] if file.filename == '': flash('No selected file') return render_template('index.html', warning_msg='No selected file') img_filename = os.path.join('./upload', file.filename) output_filename = os.path.join('./upload', 'out' + file.filename) file.save(img_filename) image = readImg.readImg(img_filename, output_filename) numbers = classification.predict(model, image) return render_template('index.html', number=numbers, img_path=output_filename)
print( f"====\t####\t====\t####\t====\t####\t====\t####\t====\nC L A S S I F I C A T I O N\n====\t####\t====\t####\t====\t####\t====\t####\t====" ) # gets train and test sets columns = list(df.columns) X, y = classification.getXandy(df) X_train, X_test, y_train, y_test = classification.splitTrainAndTest( X, y, testSize=testSize) print( f"Predicting y_test ({y_test.shape[0]} movies) using ensemble methods..." ) sectionTimer.restartTimer() # predicts test set's labels y_pred_tuning = classification.predict(X_train, X_test, y_train, y_test, mode="ensemble", tuning=True) y_pred_noTuning = classification.predict(X_train, X_test, y_train, y_test, mode="ensemble", tuning=False) print(f"\t...done in {sectionTimer.getHumanReadableElapsedTime()}") print() ''' E V A L U A T I O N ''' print( f"====\t####\t====\t####\t====\t####\t====\t####\t====\nE V A L U A T I O N\n====\t####\t====\t####\t====\t####\t====\t####\t===="
from pathlib import Path import os from classification import predict data_dir = Path('../segmentation_model/sample_predictions') img_dir = data_dir / 'images' mask_dir = data_dir / 'mask_predicted' img_list = os.listdir(img_dir) img_list.sort() mask_list = os.listdir(mask_dir) mask_list.sort() i = 0 model_path = Path('./models/model.h5') y_pred = predict(img_dir / img_list[i], mask_dir / mask_list[i], model_path) print(y_pred)
def extract_algorithm_lung_sound_features(patient_ids, feature_type='engineered', deep_model_num = [], clf_type = 'rf', verbose=0): # Generate and save model if feature_type == 'sda' or feature_type == 'conv': clf_file = 'cache/lung_sound_classifiers_F'+feature_type + '_C' + clf_type + '_M{}'.format(deep_model_num) + '.pkl' elif feature_type == 'engineered': clf_file = 'cache/lung_sound_classifiers_F'+feature_type + '_C' + clf_type + '.pkl' else: print('Feature type not implemented') return if not isfile(clf_file): wheeze_clf,crackle_clf,has_prob = lung_sounds.get_lung_sound_classifiers(feature_type=feature_type, deep_model_num = deep_model_num,clf_type=clf_type) with open(clf_file, 'wb') as save_file: pickle.dump(wheeze_clf,save_file) pickle.dump(crackle_clf,save_file) pickle.dump(has_prob,save_file) else: with open(clf_file, 'rb') as save_file: wheeze_clf = pickle.load(save_file) crackle_clf = pickle.load(save_file) has_prob = pickle.load(save_file) if feature_type == 'sda' or feature_type == 'conv': feature_file = 'cache/lung_sounds_algorithm_F'+feature_type + '_C' + clf_type + '_M{}'.format(deep_model_num) + '.pkl' elif feature_type == 'engineered': feature_file = 'cache/lung_sounds_algorithm_F'+feature_type + '_C' + clf_type + '.pkl' features_changed = False if not os.path.exists(feature_file): # Create a new feature dataframe x = pd.DataFrame() features_changed = True else: with open(feature_file, 'rb') as save_file: x = pickle.load(save_file) # Determine lung sound data folder study_loc = 'PhaseII_CRF' local_os_path = settings.load_lung_sound_path() # Iterate through and process sound files for m in np.arange(len(patient_ids)): if(m % np.floor(len(patient_ids)/100) == 0): if verbose > 0: print(m,' of ',len(patient_ids)) if features_changed: with open(feature_file, 'wb') as save_file: pickle.dump(x,save_file) features_changed = False diagnosis_patient_id = patient_ids[m] sound_patient_id = patient_ids[m] # Strip letters from sound patient IDs sound_patient_id = str.split(sound_patient_id,'/')[1] patient_folder = join(local_os_path,study_loc,sound_patient_id) if isdir(patient_folder): patient_key = study_loc + '/' + sound_patient_id if not diagnosis_patient_id in x.index or np.any(x.loc[diagnosis_patient_id].isnull()) or len(x.columns) == 0: features_changed = True sound_files = np.asarray(os.listdir(patient_folder)) for area_num in np.arange(1,12): if verbose > 0: print('Area num ', area_num,' of ',11) matching_file_locs = np.core.defchararray.find(sound_files,'Area {:02d}'.format(area_num))>=0 if np.sum(matching_file_locs) < 13: print('Less than 13 sound files for patient {} area {}'.format(patient_key,area_num)) matching_sound_files_key = [patient_key + '/' + f for f in sound_files[matching_file_locs]] area_sound_features_frame = lung_sounds.get_features(matching_sound_files_key,feature_type=feature_type,deep_model_num=deep_model_num) area_sound_features = area_sound_features_frame.get_values().astype(float) p = classification.predict(wheeze_clf,area_sound_features,has_prob=has_prob) x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Wheeze_A{:02d}_ProbMean'.format(area_num)] = np.mean(p) x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Wheeze_A{:02d}_ProbMax'.format(area_num)] = np.max(p) p = classification.predict(crackle_clf,area_sound_features,has_prob=has_prob) x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Crackle_A{:02d}_ProbMean'.format(area_num)] = np.mean(p) x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Crackle_A{:02d}_ProbMax'.format(area_num)] = np.max(p) if area_num == 1: patient_wheeze_area_mean = x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Wheeze_A{:02d}_ProbMean'.format(area_num)] patient_crackle_area_mean = x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Crackle_A{:02d}_ProbMean'.format(area_num)] patient_wheeze_area_max = x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Wheeze_A{:02d}_ProbMax'.format(area_num)] patient_crackle_area_max = x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Crackle_A{:02d}_ProbMax'.format(area_num)] else: patient_wheeze_area_mean += x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Wheeze_A{:02d}_ProbMean'.format(area_num)] patient_crackle_area_mean =+ x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Crackle_A{:02d}_ProbMean'.format(area_num)] patient_wheeze_area_max = max([patient_wheeze_area_max,x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Wheeze_A{:02d}_ProbMax'.format(area_num)]]) patient_crackle_area_max = max([patient_crackle_area_max,x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Crackle_A{:02d}_ProbMax'.format(area_num)]]) x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Wheeze_ProbMean'] = patient_wheeze_area_mean/11 x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Crackle_ProbMean'] = patient_crackle_area_mean/11 x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Wheeze_ProbMax'] = patient_wheeze_area_max x.loc[diagnosis_patient_id,'LungSoundAlgorithm_Crackle_ProbMax'] = patient_crackle_area_max else: print('Missing lung sounds from patient ',diagnosis_patient_id) x.loc[diagnosis_patient_id] = np.NaN if features_changed: with open(feature_file, 'wb') as save_file: pickle.dump(x,save_file) x_keys = x.loc[patient_ids,['LungSoundAlgorithm_Wheeze_ProbMean','LungSoundAlgorithm_Wheeze_ProbMax','LungSoundAlgorithm_Crackle_ProbMean','LungSoundAlgorithm_Crackle_ProbMax']] return x_keys
import random from nltk.corpus import movie_reviews from review_sentiment import ReviewSentiment import classification if __name__ == '__main__': labeled_data = [(movie_reviews.raw(fileids=fileid), movie_reviews.categories(fileid)[0]) for fileid in movie_reviews.fileids()] random.seed(1234) random.shuffle(labeled_data) labeled_data = labeled_data[:100] rs = ReviewSentiment(labeled_data, train_size=50) classifiers = classification.train(rs) classification.evaluate(rs, classifiers) classifier = classifiers[0][0] print() print("positive reviews prediction") classification.predict(rs, "data/positive/", classifier, 0) print() print("negative reviews prediction") classification.predict(rs, "data/negative/", classifier, 0)
P R E D I C T I O N //// //////////////////////// ''' test_ids = parsing.parse_CSV_to_df(file_path=file_path_test, log=False)["SK_ID_CURR"] X_train, y_train = df_train.drop( columns=["TARGET"]).to_numpy(), df_train["TARGET"] X_test = df_test.to_numpy() features = list(df_test.columns) y_test_pred, proba = classification.predict( X_train=X_train, X_test=X_test, X_validate=X_train, y_train=y_train, y_validate=y_train, mode=classifier, tuning=hyperparameters_tuning, probabilities=predict_probabilities, k_fold_splits=k_fold_splits, log=log) df_submission = pd.DataFrame(columns=["SK_ID_CURR", "TARGET"]) df_submission["SK_ID_CURR"], df_submission[ "TARGET"] = test_ids, y_test_pred parsing.write_df_to_file(df=df_submission, file_path=submission_path, log=log) parsing.write_df_to_file(df=df_submission, file_path="../submission.csv", log=log)