def searchOverWindows(img, windows, clf, scaler, spatialParams, colorParams,
                      hogParams):

    clfSize = spatialParams['clfSize']
    # A list to store all positive windows
    positives = []

    # Iterate over all windows in the input image
    for win in windows:
        # Extract pixels and resize
        winImg = cv2.resize(img[win[0][1]:win[1][1], win[0][0]:win[1][0]],
                            clfSize)
        features = extract_features(winImg, spatialParams, colorParams,
                                    hogParams)

        # Have the scaler scale the features
        scFeatures = scaler.transform(np.concatenate(features).reshape(1, -1))

        # Have the classifier make the prediction
        #prediction = predictBinary(clf, scFeatures)
        prediction = predictWithMargin(clf, scFeatures, 0.7)
        if prediction:
            positives.append(win)

    return positives
Example #2
0
def evaluate():
    train.make_keras_picklable()
    with open(PKL_FILENAME, 'rb') as file:
        model = pickle.load(file)
    training_points = range(0, 101)
    data = np.array([train.extract_features(i) for i in training_points])
    labels = np.array([train.fizzbuzz(i) for i in training_points])
    score = model.evaluate(data,
                           keras.utils.to_categorical(labels),
                           batch_size=64)
    with open("accuracy.txt", 'w') as file:
        file.write(str(score[1]))
Example #3
0
def predict(text):
    # preprocessing
    wordnet_lemmatizer = WordNetLemmatizer()
    stop = stopwords.words('english') + list(string.punctuation) + list(
        ["``", "''", '""'])
    preprocessed = " ".join([
        wordnet_lemmatizer.lemmatize(w) for w in word_tokenize(text)
        if w not in stop
    ])

    # feature extraction
    clf, count_vectorizer, scaler = joblib.load("classifier.pkl")
    count_matrix = count_vectorizer.transform([preprocessed])
    engineered = extract_features([text], scaler)
    features = sparse.hstack((count_matrix, engineered.values))

    return "spam" if clf.predict(features) == [1] else "not spam"
Example #4
0
def predict_test_file(fname, input_dim, timesteps, nlabels, labels):
    print('loading data from file ', fname)
    df = pd.read_csv(fname, sep=' ', header=0)
    X = extract_features(df, timesteps, input_dim)
    y = extract_labels(df, timesteps, nlabels)

    print('X temporal reshape: ', X.shape)
    print('y temporal reshape: ', y.shape)
    print('#samples: ', len(X))
    print('#labels: ', len(y))

    # we are averaging over all models output probabilities and then just taking the max
    m_preds = np.zeros((X.shape[0], timesteps, nlabels))
    for model in models:
        m_preds = m_preds + model.predict(X)
        break

    m_preds = m_preds / len(models)

    # just count and report and we are done
    counts, conf_matrix = conll_eval_counts(m_preds, y, labels)
    print('file: ', fname)
    ceval.report(counts)
    print_cm(conf_matrix, ordered_label_keys(labels))
Example #5
0
def classify(frame):
    global window, model, update_interval, last_draw_time, blinks_this_interval, blink_threshold

    # load next frame into the window queue
    eog_signals = extract_eog_signals_from_jins_frame(frame)
    eog_signals = eog_signals.reshape((4,1)).T
    window = window[1:]
    window = np.append(window, eog_signals, axis=0)

    # extract features from frames
    features = np.array([ train.extract_features(window) ])

    # pass the features into the model
    prediction = model.predict(features)

    if prediction:
        blinks_this_interval += 1

    if time() - last_draw_time > update_interval:
        print( '\tblink' if blinks_this_interval >= blink_threshold else 'open' )
        # print(blinks_this_interval)

        last_draw_time = time()
        blinks_this_interval = 0
Example #6
0
    def slide_window(self,
                     image,
                     y_start,
                     x_start,
                     y_end,
                     x_end,
                     scale,
                     x_overlap=0.5,
                     y_overlap=0.5):

        image = cv2.resize(
            image, (int(image.shape[1] / scale), int(image.shape[0] / scale)))

        hog_image, scaled_img = extract_features([image], self.colorspace)[0]

        height_blocks = train.TRAINING_IMAGE_SIZE[0] // train.HOG_CELL_SIZE[
            0] - train.HOG_CELLS_PER_BLOCK[0] + 1
        width_blocks = train.TRAINING_IMAGE_SIZE[1] // train.HOG_CELL_SIZE[
            1] - train.HOG_CELLS_PER_BLOCK[1] + 1
        y_block_start = int(y_start / scale) // train.HOG_CELL_SIZE[0]
        x_block_start = int(x_start / scale) // train.HOG_CELL_SIZE[1]
        y_block_end = int(y_end / scale) // train.HOG_CELL_SIZE[0] - 1
        x_block_end = int(x_end / scale) // train.HOG_CELL_SIZE[1] - 1
        y_block_step = height_blocks - int(height_blocks * y_overlap)
        x_block_step = width_blocks - int(width_blocks * x_overlap)

        hits = []
        for i in range((y_block_end - y_block_start - height_blocks) //
                       y_block_step + 1):
            for j in range((x_block_end - x_block_start - width_blocks) //
                           x_block_step + 1):
                v1_x = j * x_block_step + x_block_start
                v1_y = i * y_block_step + y_block_start
                v2_x = v1_x + width_blocks
                v2_y = v1_y + height_blocks

                v1_x_spatial = int(v1_x * train.HOG_CELL_SIZE[1] *
                                   train.SPATIAL_FEATURE_SCALE)
                v1_y_spatial = int(v1_y * train.HOG_CELL_SIZE[1] *
                                   train.SPATIAL_FEATURE_SCALE)

                spatial_features = scaled_img[
                    v1_y_spatial:v1_y_spatial +
                    int(train.TRAINING_IMAGE_SIZE[1] *
                        train.SPATIAL_FEATURE_SCALE),
                    v1_x_spatial:v1_x_spatial +
                    int(train.TRAINING_IMAGE_SIZE[0] *
                        train.SPATIAL_FEATURE_SCALE), ...]

                features = np.concatenate([
                    hog_image[:, v1_y:v2_y, v1_x:v2_x, ...].ravel(),
                    spatial_features.ravel()
                ])

                if self.predict([features]) >= CONFIDENCE_THRESHOLD:
                    v1_x *= int(train.HOG_CELL_SIZE[1] * scale)
                    v1_y *= int(train.HOG_CELL_SIZE[0] * scale)
                    v2_x *= int(train.HOG_CELL_SIZE[1] * scale)
                    v2_y *= int(train.HOG_CELL_SIZE[0] * scale)

                    hits.append([[v1_x, v1_y], [v2_x, v2_y]])

        return hits
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument("-t",
                        dest="txt",
                        help="The files that contain the training examples",
                        default=os.path.join(BASE_DIR, 'data/annotated.txt'))

    parser.add_argument("-n",
                        dest="length",
                        help="Number of data points to use",
                        default=-1)

    parser.add_argument("-f",
                        dest="folds",
                        help="Number of folds to partition data into",
                        default=10)

    parser.add_argument("-r",
                        dest="random",
                        help="Random shuffling of input data.",
                        action='store_true',
                        default=False)

    # Parse the command line arguments
    args = parser.parse_args()

    # Decode arguments
    txt_files = glob.glob(args.txt)
    length = int(args.length)
    num_folds = int(args.folds)

    # Get data from files
    if not txt_files:
        print 'no training files :('
        sys.exit(1)

    notes = []
    for txt in txt_files:
        note_tmp = Note()
        note_tmp.read(txt)
        notes.append(note_tmp)

    # List of all data
    X = []
    Y = []
    for n in notes:
        # Data points
        x = [it for it in zip(n.sid_list(), n.text_list())]
        X += x

        # Labels
        y = [it for it in n.label_list()]
        Y += y

    # Limit length
    X = X[:length]
    Y = Y[:length]

    # Build confusion matrix
    confusion = [[0 for i in labels_map] for j in labels_map]

    # Instantiate feat obj once (it'd really slow down CV to rebuild every time)
    feat_obj = FeaturesWrapper()

    # Extract features once
    feats = train.extract_features(X, feat_obj)
    data = zip(feats, Y)

    # For each held-out test set
    i = 1
    for training, testing in cv_partitions(data[:length],
                                           num_folds=num_folds,
                                           shuffle=args.random):

        # Users like to see progress
        print 'Fold: %d of %d' % (i, num_folds)
        i += 1

        # Train on non-heldout data
        X_train = [d[0] for d in training]
        Y_train = [d[1] for d in training]
        vec, clf = train.train_vectorized(X_train,
                                          Y_train,
                                          model_path=None,
                                          grid=False)

        # Predict on held out
        X_test = [d[0] for d in testing]
        Y_test = [d[1] for d in testing]
        labels = predict.predict_vectorized(X_test, clf, vec)

        # Compute confusion matrix for held_out data
        testing_confusion = evaluate.create_confusion(labels, Y_test)
        confusion = add_matrix(confusion, testing_confusion)

    # Evaluate
    evaluate.display_confusion(confusion)
Example #8
0
print("[STATUS] Creating the classifier..")
clf_svm = LinearSVC(random_state=9)

# fit the training data and labels
print("[STATUS] Fitting data/label to model..")
clf_svm.fit(train_features, train_labels)

#test_path = "dataset/test"
#for file in glob.glob(test_path + "/*.jpg"):
# read the input image
image = cv2.imread('b.jpg')

# convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# extract haralick texture from the image
features = extract_features(gray)

# evaluate the model and predict label
prediction = clf_svm.predict(features.reshape(1, -1))[0]

# show the label
cv2.putText(image, prediction, (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0,
            (0, 255, 255), 3)
print("Prediction - {}".format(prediction))

# display the output image
cv2.imshow("Test_Image", image)
cv2.waitKey(0)
Example #9
0
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument("-t",
        dest = "txt",
        help = "The files that contain the training examples",
        default = os.path.join(BASE_DIR, 'data/annotated.txt')
    )

    parser.add_argument("-n",
        dest = "length",
        help = "Number of data points to use",
        default = -1
    )

    parser.add_argument("-f",
        dest = "folds",
        help = "Number of folds to partition data into",
        default = 10
    )

    parser.add_argument("-r",
        dest = "random",
        help = "Random shuffling of input data.",
        action = 'store_true',
        default = False
    )


    # Parse the command line arguments
    args = parser.parse_args()


    # Decode arguments
    txt_files = glob.glob(args.txt)
    length = int(args.length)
    num_folds = int(args.folds)


    # Get data from files
    if not txt_files:
        print 'no training files :('
        sys.exit(1)

    notes = []
    for txt in txt_files:
        note_tmp = Note()
        note_tmp.read(txt)
        notes.append(note_tmp)

    # List of all data
    X = []
    Y = []
    for n in notes:
        # Data points
        x = [ it for it in zip(n.sid_list(), n.text_list()) ]
        X += x

        # Labels
        y = [ it for it in n.label_list() ]
        Y += y

    # Limit length
    X = X[:length]
    Y = Y[:length]

    # Build confusion matrix
    confusion = [ [0 for i in labels_map] for j in labels_map ]

    # Instantiate feat obj once (it'd really slow down CV to rebuild every time)
    feat_obj = FeaturesWrapper()

    # Extract features once
    feats = train.extract_features(X, feat_obj)
    data = zip(feats,Y)

    # For each held-out test set
    i = 1
    for training,testing in cv_partitions(data[:length], num_folds=num_folds, shuffle=args.random):

        # Users like to see progress
        print 'Fold: %d of %d' % (i,num_folds)
        i += 1

        # Train on non-heldout data
        X_train = [ d[0] for d in training ]
        Y_train = [ d[1] for d in training ]
        vec,clf = train.train_vectorized(X_train, Y_train, model_path=None, grid=False)

        # Predict on held out
        X_test = [ d[0] for d in testing ]
        Y_test = [ d[1] for d in testing ]
        labels = predict.predict_vectorized(X_test, clf, vec)

        # Compute confusion matrix for held_out data
        testing_confusion = evaluate.create_confusion(labels, Y_test)
        confusion = add_matrix(confusion, testing_confusion)


    # Evaluate
    evaluate.display_confusion(confusion)
Example #10
0
import pickle
import pandas as pd
from train import extract_features
from utils import real_to_cdf

if __name__ == '__main__':
    metadata = pd.read_csv('data/metadata_validate.csv')
    features = extract_features(metadata).set_index('Id').sort_index()

    diastole_model = pickle.load(open('diastole.pkl'))
    systole_model = pickle.load(open('systole.pkl'))

    diastole = diastole_model.predict(features)
    systole = systole_model.predict(features)

    systole_cdf = real_to_cdf(systole, sigma=1e-10)
    diastole_cdf = real_to_cdf(diastole, sigma=1e-10)

    submission = pd.DataFrame(columns=['Id'] + ['P%d' % i for i in range(600)])
    i = 0

    for id in range(features.shape[0]):
        diastole_id = '%d_Diastole' % features.index[id]
        systole_id = '%d_Systole' % features.index[id]
        submission.loc[i, :] = [diastole_id] + diastole_cdf[id, :].tolist()
        submission.loc[i+1, :] = [systole_id] + systole_cdf[id, :].tolist()
        i += 2

    submission.to_csv('submission.csv', index=False)