def searchOverWindows(img, windows, clf, scaler, spatialParams, colorParams, hogParams): clfSize = spatialParams['clfSize'] # A list to store all positive windows positives = [] # Iterate over all windows in the input image for win in windows: # Extract pixels and resize winImg = cv2.resize(img[win[0][1]:win[1][1], win[0][0]:win[1][0]], clfSize) features = extract_features(winImg, spatialParams, colorParams, hogParams) # Have the scaler scale the features scFeatures = scaler.transform(np.concatenate(features).reshape(1, -1)) # Have the classifier make the prediction #prediction = predictBinary(clf, scFeatures) prediction = predictWithMargin(clf, scFeatures, 0.7) if prediction: positives.append(win) return positives
def evaluate(): train.make_keras_picklable() with open(PKL_FILENAME, 'rb') as file: model = pickle.load(file) training_points = range(0, 101) data = np.array([train.extract_features(i) for i in training_points]) labels = np.array([train.fizzbuzz(i) for i in training_points]) score = model.evaluate(data, keras.utils.to_categorical(labels), batch_size=64) with open("accuracy.txt", 'w') as file: file.write(str(score[1]))
def predict(text): # preprocessing wordnet_lemmatizer = WordNetLemmatizer() stop = stopwords.words('english') + list(string.punctuation) + list( ["``", "''", '""']) preprocessed = " ".join([ wordnet_lemmatizer.lemmatize(w) for w in word_tokenize(text) if w not in stop ]) # feature extraction clf, count_vectorizer, scaler = joblib.load("classifier.pkl") count_matrix = count_vectorizer.transform([preprocessed]) engineered = extract_features([text], scaler) features = sparse.hstack((count_matrix, engineered.values)) return "spam" if clf.predict(features) == [1] else "not spam"
def predict_test_file(fname, input_dim, timesteps, nlabels, labels): print('loading data from file ', fname) df = pd.read_csv(fname, sep=' ', header=0) X = extract_features(df, timesteps, input_dim) y = extract_labels(df, timesteps, nlabels) print('X temporal reshape: ', X.shape) print('y temporal reshape: ', y.shape) print('#samples: ', len(X)) print('#labels: ', len(y)) # we are averaging over all models output probabilities and then just taking the max m_preds = np.zeros((X.shape[0], timesteps, nlabels)) for model in models: m_preds = m_preds + model.predict(X) break m_preds = m_preds / len(models) # just count and report and we are done counts, conf_matrix = conll_eval_counts(m_preds, y, labels) print('file: ', fname) ceval.report(counts) print_cm(conf_matrix, ordered_label_keys(labels))
def classify(frame): global window, model, update_interval, last_draw_time, blinks_this_interval, blink_threshold # load next frame into the window queue eog_signals = extract_eog_signals_from_jins_frame(frame) eog_signals = eog_signals.reshape((4,1)).T window = window[1:] window = np.append(window, eog_signals, axis=0) # extract features from frames features = np.array([ train.extract_features(window) ]) # pass the features into the model prediction = model.predict(features) if prediction: blinks_this_interval += 1 if time() - last_draw_time > update_interval: print( '\tblink' if blinks_this_interval >= blink_threshold else 'open' ) # print(blinks_this_interval) last_draw_time = time() blinks_this_interval = 0
def slide_window(self, image, y_start, x_start, y_end, x_end, scale, x_overlap=0.5, y_overlap=0.5): image = cv2.resize( image, (int(image.shape[1] / scale), int(image.shape[0] / scale))) hog_image, scaled_img = extract_features([image], self.colorspace)[0] height_blocks = train.TRAINING_IMAGE_SIZE[0] // train.HOG_CELL_SIZE[ 0] - train.HOG_CELLS_PER_BLOCK[0] + 1 width_blocks = train.TRAINING_IMAGE_SIZE[1] // train.HOG_CELL_SIZE[ 1] - train.HOG_CELLS_PER_BLOCK[1] + 1 y_block_start = int(y_start / scale) // train.HOG_CELL_SIZE[0] x_block_start = int(x_start / scale) // train.HOG_CELL_SIZE[1] y_block_end = int(y_end / scale) // train.HOG_CELL_SIZE[0] - 1 x_block_end = int(x_end / scale) // train.HOG_CELL_SIZE[1] - 1 y_block_step = height_blocks - int(height_blocks * y_overlap) x_block_step = width_blocks - int(width_blocks * x_overlap) hits = [] for i in range((y_block_end - y_block_start - height_blocks) // y_block_step + 1): for j in range((x_block_end - x_block_start - width_blocks) // x_block_step + 1): v1_x = j * x_block_step + x_block_start v1_y = i * y_block_step + y_block_start v2_x = v1_x + width_blocks v2_y = v1_y + height_blocks v1_x_spatial = int(v1_x * train.HOG_CELL_SIZE[1] * train.SPATIAL_FEATURE_SCALE) v1_y_spatial = int(v1_y * train.HOG_CELL_SIZE[1] * train.SPATIAL_FEATURE_SCALE) spatial_features = scaled_img[ v1_y_spatial:v1_y_spatial + int(train.TRAINING_IMAGE_SIZE[1] * train.SPATIAL_FEATURE_SCALE), v1_x_spatial:v1_x_spatial + int(train.TRAINING_IMAGE_SIZE[0] * train.SPATIAL_FEATURE_SCALE), ...] features = np.concatenate([ hog_image[:, v1_y:v2_y, v1_x:v2_x, ...].ravel(), spatial_features.ravel() ]) if self.predict([features]) >= CONFIDENCE_THRESHOLD: v1_x *= int(train.HOG_CELL_SIZE[1] * scale) v1_y *= int(train.HOG_CELL_SIZE[0] * scale) v2_x *= int(train.HOG_CELL_SIZE[1] * scale) v2_y *= int(train.HOG_CELL_SIZE[0] * scale) hits.append([[v1_x, v1_y], [v2_x, v2_y]]) return hits
def main(): parser = argparse.ArgumentParser() parser.add_argument("-t", dest="txt", help="The files that contain the training examples", default=os.path.join(BASE_DIR, 'data/annotated.txt')) parser.add_argument("-n", dest="length", help="Number of data points to use", default=-1) parser.add_argument("-f", dest="folds", help="Number of folds to partition data into", default=10) parser.add_argument("-r", dest="random", help="Random shuffling of input data.", action='store_true', default=False) # Parse the command line arguments args = parser.parse_args() # Decode arguments txt_files = glob.glob(args.txt) length = int(args.length) num_folds = int(args.folds) # Get data from files if not txt_files: print 'no training files :(' sys.exit(1) notes = [] for txt in txt_files: note_tmp = Note() note_tmp.read(txt) notes.append(note_tmp) # List of all data X = [] Y = [] for n in notes: # Data points x = [it for it in zip(n.sid_list(), n.text_list())] X += x # Labels y = [it for it in n.label_list()] Y += y # Limit length X = X[:length] Y = Y[:length] # Build confusion matrix confusion = [[0 for i in labels_map] for j in labels_map] # Instantiate feat obj once (it'd really slow down CV to rebuild every time) feat_obj = FeaturesWrapper() # Extract features once feats = train.extract_features(X, feat_obj) data = zip(feats, Y) # For each held-out test set i = 1 for training, testing in cv_partitions(data[:length], num_folds=num_folds, shuffle=args.random): # Users like to see progress print 'Fold: %d of %d' % (i, num_folds) i += 1 # Train on non-heldout data X_train = [d[0] for d in training] Y_train = [d[1] for d in training] vec, clf = train.train_vectorized(X_train, Y_train, model_path=None, grid=False) # Predict on held out X_test = [d[0] for d in testing] Y_test = [d[1] for d in testing] labels = predict.predict_vectorized(X_test, clf, vec) # Compute confusion matrix for held_out data testing_confusion = evaluate.create_confusion(labels, Y_test) confusion = add_matrix(confusion, testing_confusion) # Evaluate evaluate.display_confusion(confusion)
print("[STATUS] Creating the classifier..") clf_svm = LinearSVC(random_state=9) # fit the training data and labels print("[STATUS] Fitting data/label to model..") clf_svm.fit(train_features, train_labels) #test_path = "dataset/test" #for file in glob.glob(test_path + "/*.jpg"): # read the input image image = cv2.imread('b.jpg') # convert to grayscale gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # extract haralick texture from the image features = extract_features(gray) # evaluate the model and predict label prediction = clf_svm.predict(features.reshape(1, -1))[0] # show the label cv2.putText(image, prediction, (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3) print("Prediction - {}".format(prediction)) # display the output image cv2.imshow("Test_Image", image) cv2.waitKey(0)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-t", dest = "txt", help = "The files that contain the training examples", default = os.path.join(BASE_DIR, 'data/annotated.txt') ) parser.add_argument("-n", dest = "length", help = "Number of data points to use", default = -1 ) parser.add_argument("-f", dest = "folds", help = "Number of folds to partition data into", default = 10 ) parser.add_argument("-r", dest = "random", help = "Random shuffling of input data.", action = 'store_true', default = False ) # Parse the command line arguments args = parser.parse_args() # Decode arguments txt_files = glob.glob(args.txt) length = int(args.length) num_folds = int(args.folds) # Get data from files if not txt_files: print 'no training files :(' sys.exit(1) notes = [] for txt in txt_files: note_tmp = Note() note_tmp.read(txt) notes.append(note_tmp) # List of all data X = [] Y = [] for n in notes: # Data points x = [ it for it in zip(n.sid_list(), n.text_list()) ] X += x # Labels y = [ it for it in n.label_list() ] Y += y # Limit length X = X[:length] Y = Y[:length] # Build confusion matrix confusion = [ [0 for i in labels_map] for j in labels_map ] # Instantiate feat obj once (it'd really slow down CV to rebuild every time) feat_obj = FeaturesWrapper() # Extract features once feats = train.extract_features(X, feat_obj) data = zip(feats,Y) # For each held-out test set i = 1 for training,testing in cv_partitions(data[:length], num_folds=num_folds, shuffle=args.random): # Users like to see progress print 'Fold: %d of %d' % (i,num_folds) i += 1 # Train on non-heldout data X_train = [ d[0] for d in training ] Y_train = [ d[1] for d in training ] vec,clf = train.train_vectorized(X_train, Y_train, model_path=None, grid=False) # Predict on held out X_test = [ d[0] for d in testing ] Y_test = [ d[1] for d in testing ] labels = predict.predict_vectorized(X_test, clf, vec) # Compute confusion matrix for held_out data testing_confusion = evaluate.create_confusion(labels, Y_test) confusion = add_matrix(confusion, testing_confusion) # Evaluate evaluate.display_confusion(confusion)
import pickle import pandas as pd from train import extract_features from utils import real_to_cdf if __name__ == '__main__': metadata = pd.read_csv('data/metadata_validate.csv') features = extract_features(metadata).set_index('Id').sort_index() diastole_model = pickle.load(open('diastole.pkl')) systole_model = pickle.load(open('systole.pkl')) diastole = diastole_model.predict(features) systole = systole_model.predict(features) systole_cdf = real_to_cdf(systole, sigma=1e-10) diastole_cdf = real_to_cdf(diastole, sigma=1e-10) submission = pd.DataFrame(columns=['Id'] + ['P%d' % i for i in range(600)]) i = 0 for id in range(features.shape[0]): diastole_id = '%d_Diastole' % features.index[id] systole_id = '%d_Systole' % features.index[id] submission.loc[i, :] = [diastole_id] + diastole_cdf[id, :].tolist() submission.loc[i+1, :] = [systole_id] + systole_cdf[id, :].tolist() i += 2 submission.to_csv('submission.csv', index=False)