Esempio n. 1
0
def main():
    print('Getting paths and labels for all train and test data')
    train_image_paths, test_image_paths, train_labels, test_labels = \
        get_image_paths(data_path, categories, num_train_per_cat)
    """ Step 1: Represent each image with the appropriate feature
    Each function to construct features should return and N x d matrix, where
    N is the number of paths passed to the function and d is the dimensionality
    of each image representation.
    """
    print('Using [%s] representation for image' % representation)

    if feature == 'HoG':
        if not os.path.exists('vocab_hog.npy'):
            print('No existing visual word vocabulary found. '
                  'Computing one from training images')

            vocab = build_vocabulary(train_image_paths, vocab_size, feature)
            np.save('vocab_hog', vocab)
    elif feature == 'SIFT':
        if not os.path.exists('vocab_sift.npy'):
            print('No existing visual word vocabulary found. '
                  'Computing one from training images')

            vocab = build_vocabulary(train_image_paths, vocab_size, feature)
            np.save('vocab_sift', vocab)

    pca_visualize(pca_out_dim, feature, vocab_size)

    if representation == 'bag of words':
        # Build vocabulary and save it as a file 'vocab.mat' for reuse.
        train_image_feats = get_bags_of_words(train_image_paths, feature)
        test_image_feats = get_bags_of_words(test_image_paths, feature)
    elif representation == 'spatial_pyramid_feats':
        train_image_feats = get_spatial_pyramid_feats(train_image_paths,
                                                      max_level, feature)
        test_image_feats = get_spatial_pyramid_feats(test_image_paths,
                                                     max_level, feature)
    else:
        raise KeyError('No such representation %s is defined' % representation)
    """ Step 2: Classify each test image by training and using the appropriate classifier
    Each function to classify test features will return an N x 1 array, where
    N is the number of test cases and each entry is string indicating the predicted
    category for each test image. Each entry in 'predicted_categories' must be one of
    the 15 string in 'categories', 'train_labels', and 'test_labels'.
    """
    print('Using [%s] classifier to predict test set categories' % classifier)

    if classifier == 'SVM':
        predicted_categories = svm_classify(train_image_feats, train_labels,
                                            test_image_feats, kernel_type)
    else:
        raise KeyError('No such classifier %s is defined' % classifier)
    """ Step 3: Build a confusion matrix and score the recongnition system
    You do not need to code anything in this section.
    """
    create_results_webpage(train_image_paths, test_image_paths, train_labels,
                           test_labels, categories, abbr_categories,
                           predicted_categories)
Esempio n. 2
0
def main():
    print("Getting paths and labels for all train and test data")
    train_image_paths, test_image_paths, train_labels, test_labels = \
        get_image_paths(DATA_PATH, CATEGORIES, NUM_TRAIN_PER_CAT)

    if FEATURE == 'tiny_image':
        train_image_feats = get_tiny_images(train_image_paths)
        test_image_feats = get_tiny_images(test_image_paths)

    elif FEATURE == 'bag_of_sift':
        if os.path.isfile('vocab.pkl') is False:
            print('No existing visual word vocabulary found. Computing one from training images\n')
            vocab_size = 400
            vocab = build_vocabulary(train_image_paths, vocab_size)
            with open('vocab.pkl', 'wb') as handle:
                pickle.dump(vocab, handle, protocol=pickle.HIGHEST_PROTOCOL)

        if os.path.isfile('train_image_feats.pkl') is False:
            train_image_feats = get_bags_of_sifts(train_image_paths);
            with open('train_image_feats.pkl', 'wb') as handle:
                pickle.dump(train_image_feats, handle, protocol=pickle.HIGHEST_PROTOCOL)
        else:
            with open('train_image_feats.pkl', 'rb') as handle:
                train_image_feats = pickle.load(handle)

        if os.path.isfile('test_image_feats.pkl') is False:
            test_image_feats  = get_bags_of_sifts(test_image_paths);
            with open('test_image_feats.pkl', 'wb') as handle:
                pickle.dump(test_image_feats, handle, protocol=pickle.HIGHEST_PROTOCOL)
        else:
            with open('test_image_feats.pkl', 'rb') as handle:
                test_image_feats = pickle.load(handle)
    else:
        raise NameError('Unknown feature type')


    if CLASSIFIER == 'nearest_neighbor':
        predicted_categories = nearest_neighbor_classify(train_image_feats, train_labels, test_image_feats)

    elif CLASSIFIER == 'support_vector_machine':
        predicted_categories = svm_classify(train_image_feats, train_labels, test_image_feats)

    else:
        raise NameError('Unknown classifier type')

    accuracy = float(len([x for x in zip(test_labels,predicted_categories) if x[0]== x[1]]))/float(len(test_labels))
    print("Accuracy = ", accuracy)
    test_labels_ids = [CATE2ID[x] for x in test_labels]
    predicted_categories_ids = [CATE2ID[x] for x in predicted_categories]
    train_labels_ids = [CATE2ID[x] for x in train_labels]

    build_confusion_mtx(test_labels_ids, predicted_categories_ids, ABBR_CATEGORIES)
    visualize(CATEGORIES, test_image_paths, test_labels_ids, predicted_categories_ids, train_image_paths, train_labels_ids)
Esempio n. 3
0
def main():
    # root directory of all data
    data_path = '../data'
    # directory of positive training examples. 36x36 head crops
    train_path_pos = os.path.join(data_path, 'caltech_faces/Caltech_CropFaces')
    # we can mine random or hard negatives from here
    non_face_scn_path = os.path.join(data_path, 'train_non_face_scenes')
    # CMU+MIT test scenes
    test_scn_path = os.path.join(data_path, 'test_scenes/test_jpg')
    # the ground truth face locations in the test set
    label_path = os.path.join(data_path, 'test_scenes/ground_truth_bboxes.txt')
    # directory for saving figure
    fig_path = 'visualizations'
    if not os.path.exists(fig_path):
        os.mkdir(fig_path)

    # The faces are 36x36 pixels, which works fine as a template size. You could
    # add other fields to this struct if you want to modify HoG default
    # parameters such as the number of orientations, but that does not help
    # performance in our limited test.
    feature_params = {'template_size': 36, 'hog_cell_size': 6}

    ## Step 1. Load positive training crops and random negative examples
    # YOU CODE 'get_positive_features' and 'get_random_negative_features'
    features_pos = get_positive_features(train_path_pos, feature_params)
    # higher will work strictly better, but your should start with 10000
    num_negative_examples = 10000
    features_neg, neg_examples = get_random_negative_features(
        non_face_scn_path, feature_params, num_negative_examples)

    ## Step 2. Train classifier
    features_total = np.concatenate([features_pos, features_neg], axis=0)
    labels = np.concatenate([
        np.ones((features_pos.shape[0], 1)), -np.ones(
            (features_neg.shape[0], 1))
    ],
                            axis=0)

    model = svm_classify(features_total, labels)

    ## Step 3. Examine learned classifier
    # You don't need to modify anything in this section. The section first
    # evaluates _training_ error, which isn't ultimately what we care about,
    # but it is a good sanity check. Your training error should be very low.
    print('Initial classifier performance on train data:')
    confidences = model.decision_function(features_total)
    label_vector = labels
    tp_rate, fp_rate, tn_rate, fn_rate = report_accuracy(
        confidences, label_vector)

    # Visualize how well separated the positive and negative examples are at
    # training time. Sometimes this can idenfity odd biases in your training
    # data, especially if you're trying hard negative mining. This
    # visualization won't be very meaningful with the placeholder starter code.
    non_face_confs = confidences[label_vector.ravel() < 0]
    face_confs = confidences[label_vector.ravel() > 0]
    fig2 = plt.figure(2)
    plt.hold(True)
    plt.plot(np.arange(non_face_confs.size),
             np.sort(non_face_confs),
             color='g')
    plt.plot(np.arange(face_confs.size), np.sort(face_confs), color='r')
    plt.plot([0, non_face_confs.size], [0, 0], color='b')
    plt.hold(False)

    ## Step 4. (optional) Mine hard negatives
    # Mining hard negatives is extra credit. You can get very good performance
    # by using random negatives, so hard negative mining is somewhat
    # unnecessary for face detection. If you implement hard negative mining,
    # you probably want to modify 'run_detector', run the detector on the
    # images in 'non_face_scn_path', and keep all of the features above some
    # confidence level.

    # hard negative mining

    # hard positive mining

    # training with hard examples

    # estimate again after hard example mining

    # visualize

    ## Step 5. Run detector on test set.
    # YOU CODE 'run_detector'. Make sure the outputs are properly structured!
    # They will be interpreted in Step 6 to evaluate and visualize your
    # results. See run_detector.m for more details.
    bboxes, confidences, image_ids = run_detector(test_scn_path, model,
                                                  feature_params)

    # run_detector will have (at least) two parameters which can heavily
    # influence performance -- how much to rescale each step of your multiscale
    # detector, and the threshold for a detection. If your recall rate is low
    # and your detector still has high precision at its highest recall point,
    # you can improve your average precision by reducing the threshold for a
    # positive detection.

    ## Step 6. Evaluate and Visualize detections
    # These functions require ground truth annotations, and thus can only be
    # run on the CMU+MIT face test set. Use visualize_detectoins_by_image_no_gt
    # for testing on extra images (it is commented out below).

    # Don't modify anything in 'evaluate_detections'
    gt_ids, gt_bboxes, gt_isclaimed, tp, fp, duplicate_detections = \
        evaluate_detections(bboxes, confidences, image_ids, label_path, fig_path)

    visualize_detections_by_image(bboxes, confidences, image_ids, tp, fp,
                                  test_scn_path, label_path, fig_path)
Esempio n. 4
0
def main():
    #This function returns arrays containing the file path for each train
    #and test image, as well as arrays with the label of each train and
    #test image. By default all four of these arrays will be 1500 where each
    #entry is a string.
    print("Getting paths and labels for all train and test data")
    train_image_paths, test_image_paths, train_labels, test_labels = \
        get_image_paths(DATA_PATH, CATEGORIES, NUM_TRAIN_PER_CAT)

    # TODO Step 1:
    # Represent each image with the appropriate feature
    # Each function to construct features should return an N x d matrix, where
    # N is the number of paths passed to the function and d is the
    # dimensionality of each image representation. See the starter code for
    # each function for more details.

    if FEATURE == 'tiny_image':
        # YOU CODE get_tiny_images.py
        train_image_feats = get_tiny_images(train_image_paths)
        test_image_feats = get_tiny_images(test_image_paths)

    elif FEATURE == 'bag_of_sift':
        # YOU CODE build_vocabulary.py
        if os.path.isfile('vocab.pkl') is False:
            print(
                'No existing visual word vocabulary found. Computing one from training images\n'
            )
            vocab_size = 1000  ### Vocab_size is up to you. Larger values will work better (to a point) but be slower to comput.
            vocab = build_vocabulary(train_image_paths, vocab_size)
            with open('vocab.pkl', 'wb') as handle:
                pickle.dump(vocab, handle, protocol=pickle.HIGHEST_PROTOCOL)

        if os.path.isfile('train_image_feats.pkl') is False:
            # YOU CODE get_bags_of_sifts.py
            train_image_feats = get_bags_of_sifts(train_image_paths)
            with open('train_image_feats.pkl', 'wb') as handle:
                pickle.dump(train_image_feats,
                            handle,
                            protocol=pickle.HIGHEST_PROTOCOL)
        else:
            with open('train_image_feats.pkl', 'rb') as handle:
                train_image_feats = pickle.load(handle)

        if os.path.isfile('test_image_feats.pkl') is False:
            test_image_feats = get_bags_of_sifts(test_image_paths)
            with open('test_image_feats.pkl', 'wb') as handle:
                pickle.dump(test_image_feats,
                            handle,
                            protocol=pickle.HIGHEST_PROTOCOL)
        else:
            with open('test_image_feats.pkl', 'rb') as handle:
                test_image_feats = pickle.load(handle)
    elif FEATURE == 'dumy_feature':
        train_image_feats = []
        test_image_feats = []
    else:
        raise NameError('Unknown feature type')

    # TODO Step 2:
    # Classify each test image by training and using the appropriate classifier
    # Each function to classify test features will return an N x 1 array,
    # where N is the number of test cases and each entry is a string indicating
    # the predicted category for each test image. Each entry in
    # 'predicted_categories' must be one of the 15 strings in 'categories',
    # 'train_labels', and 'test_labels.

    if CLASSIFIER == 'nearest_neighbor':
        # YOU CODE nearest_neighbor_classify.py
        predicted_categories = nearest_neighbor_classify(
            train_image_feats, train_labels, test_image_feats)

    elif CLASSIFIER == 'support_vector_machine':
        # YOU CODE svm_classify.py
        predicted_categories = svm_classify(train_image_feats, train_labels,
                                            test_image_feats)

    elif CLASSIFIER == 'dumy_classifier':
        # The dummy classifier simply predicts a random category for
        # every test case
        predicted_categories = test_labels[:]
        shuffle(predicted_categories)
    else:
        raise NameError('Unknown classifier type')

    accuracy = float(
        len([
            x for x in zip(test_labels, predicted_categories) if x[0] == x[1]
        ])) / float(len(test_labels))
    print("Accuracy = ", accuracy)
    test_labels_ids = [CATE2ID[x] for x in test_labels]
    predicted_categories_ids = [CATE2ID[x] for x in predicted_categories]
    train_labels_ids = [CATE2ID[x] for x in train_labels]

    # Step 3: Build a confusion matrix and score the recognition system
    # You do not need to code anything in this section.

    build_confusion_mtx(test_labels_ids, predicted_categories_ids,
                        ABBR_CATEGORIES)
    visualize(CATEGORIES, test_image_paths, test_labels_ids,
              predicted_categories_ids, train_image_paths, train_labels_ids)
def projSceneRecBoW(feature='placeholder', classifier='placeholder'):
    '''
    For this project, you will need to report performance for three
    combinations of features / classifiers. We recommend that you code them in
    this order:
        1) Tiny image features and nearest neighbor classifier
        2) Bag of word features and nearest neighbor classifier
        3) Bag of word features and linear SVM classifier
    The starter code is initialized to 'placeholder' just so that the starter
    code does not crash when run unmodified and you can get a preview of how
    results are presented.

    Interpreting your performance with 100 training examples per category:
     accuracy  =   0 -> Something is broken.
     accuracy ~= .07 -> Your performance is equal to chance.
                        Something is broken or you ran the starter code unchanged.
     accuracy ~= .20 -> Rough performance with tiny images and nearest
                        neighbor classifier. Performance goes up a few
                        percentage points with K-NN instead of 1-NN.
     accuracy ~= .20 -> Rough performance with tiny images and linear SVM
                        classifier. Although the accuracy is about the same as
                        nearest neighbor, the confusion matrix is very different.
     accuracy ~= .40 -> Rough performance with bag of word and nearest
                        neighbor classifier. Can reach .60 with K-NN and
                        different distance metrics.
     accuracy ~= .50 -> You've gotten things roughly correct with bag of
                        word and a linear SVM classifier.
     accuracy >= .70 -> You've also tuned your parameters well. E.g. number
                        of clusters, SVM regularization, number of patches
                        sampled when building vocabulary, size and step for
                        dense features.
     accuracy >= .80 -> You've added in spatial information somehow or you've
                        added additional, complementary image features. This
                        represents state of the art in Lazebnik et al 2006.
     accuracy >= .85 -> You've done extremely well. This is the state of the
                        art in the 2010 SUN database paper from fusing many
                        features. Don't trust this number unless you actually
                        measure many random splits.
     accuracy >= .90 -> You used modern deep features trained on much larger
                        image databases.
     accuracy >= .96 -> You can beat a human at this task. This isn't a
                        realistic number. Some accuracy calculation is broken
                        or your classifier is cheating and seeing the test
                        labels.
    '''

    # Step 0: Set up parameters, category list, and image paths.
    FEATURE = feature
    CLASSIFIER = classifier

    # This is the path the script will look at to load images from.
    data_path = './data/'

    # This is the list of categories / directories to use. The categories are
    # somewhat sorted by similarity so that the confusion matrix looks more
    # structured (indoor and then urban and then rural).
    categories = [
        'Kitchen', 'Store', 'Bedroom', 'LivingRoom', 'Office', 'Industrial',
        'Suburb', 'InsideCity', 'TallBuilding', 'Street', 'Highway',
        'OpenCountry', 'Coast', 'Mountain', 'Forest'
    ]

    # This list of shortened category names is used later for visualization.
    abbr_categories = [
        'Kit', 'Sto', 'Bed', 'Liv', 'Off', 'Ind', 'Sub', 'Cty', 'Bld', 'St',
        'HW', 'OC', 'Cst', 'Mnt', 'For'
    ]

    # Number of training examples per category to use. Max is 100. For
    # simplicity, we assume this is the number of test cases per category as
    # well.
    num_train_per_cat = 100

    # This function returns string arrays containing the file path for each train
    # and test image, as well as string arrays with the label of each train and
    # test image. By default all four of these arrays will be 1500x1 where each
    # entry is a string.
    print('Getting paths and labels for all train and test data.')
    train_image_paths, test_image_paths, train_labels, test_labels = \
        get_image_paths(data_path, categories, num_train_per_cat)
    #   train_image_paths  1500x1   list
    #   test_image_paths   1500x1   list
    #   train_labels       1500x1   list
    #   test_labels        1500x1   list

    ############################################################################
    ## Step 1: Represent each image with the appropriate feature
    # Each function to construct features should return an N x d matrix, where
    # N is the number of paths passed to the function and d is the
    # dimensionality of each image representation. See the starter code for
    # each function for more details.
    ############################################################################

    print('Using %s representation for images.' % FEATURE)

    if FEATURE.lower() == 'tiny_image':
        print('Loading tiny images...')
        # YOU CODE get_tiny_images (see student.py)
        train_image_feats = get_tiny_images(train_image_paths)
        test_image_feats = get_tiny_images(test_image_paths)
        print('Tiny images loaded.')

    elif FEATURE.lower() == 'bag_of_words':
        # Because building the vocabulary takes a long time, we save the generated
        # vocab to a file and re-load it each time to make testing faster. If
        # you need to re-generate the vocab (for example if you change its size
        # or the length of your feature vectors), simply delete the vocab.npy
        # file and re-run main.py
        if not os.path.isfile('vocab.npy'):
            print(
                'No existing visual word vocabulary found. Computing one from training images.'
            )

            #Larger values will work better (to a point), but are slower to compute
            vocab_size = 200

            # YOU CODE build_vocabulary (see student.py)
            vocab = build_vocabulary(train_image_paths, vocab_size)
            np.save('vocab.npy', vocab)

        # YOU CODE get_bags_of_words.m (see student.py)
        train_image_feats = get_bags_of_words(train_image_paths)
        # You may want to write out train_image_features here as a *.npy and
        # load it up later if you want to just test your classifiers without
        # re-computing features

        test_image_feats = get_bags_of_words(test_image_paths)
        # Same goes here for test image features.

    elif FEATURE.lower() == 'placeholder':
        train_image_feats = []
        test_image_feats = []

    else:
        raise ValueError('Unknown feature type!')

    ############################################################################
    ## Step 2: Classify each test image by training and using the appropriate classifier
    # Each function to classify test features will return an N x 1 string array,
    # where N is the number of test cases and each entry is a string indicating
    # the predicted category for each test image. Each entry in
    # 'predicted_categories' must be one of the 15 strings in 'categories',
    # 'train_labels', and 'test_labels'. See the starter code for each function
    # for more details.
    ############################################################################

    print('Using %s classifier to predict test set categories.' % CLASSIFIER)

    if CLASSIFIER.lower() == 'nearest_neighbor':
        # YOU CODE nearest_neighbor_classify (see student.py)
        predicted_categories = nearest_neighbor_classify(
            train_image_feats, train_labels, test_image_feats)

    elif CLASSIFIER.lower() == 'support_vector_machine':
        # YOU CODE svm_classify (see student.py)
        predicted_categories = svm_classify(train_image_feats, train_labels,
                                            test_image_feats)

    elif CLASSIFIER.lower() == 'placeholder':
        #The placeholder classifier simply predicts a random category for every test case
        random_permutation = np.random.permutation(len(test_labels))
        predicted_categories = [test_labels[i] for i in random_permutation]

    else:
        raise ValueError('Unknown classifier type')

    ############################################################################
    ## Step 3: Build a confusion matrix and score the recognition system
    # You do not need to code anything in this section.

    # If we wanted to evaluate our recognition method properly we would train
    # and test on many random splits of the data. You are not required to do so
    # for this project.

    # This function will recreate results_webpage/index.html and various image
    # thumbnails each time it is called. View the webpage to help interpret
    # your classifier performance. Where is it making mistakes? Are the
    # confusions reasonable?
    ############################################################################

    create_results_webpage( train_image_paths, \
                            test_image_paths, \
                            train_labels, \
                            test_labels, \
                            categories, \
                            abbr_categories, \
                            predicted_categories)
Esempio n. 6
0
# http://www.mathworks.com/help/matlab/matlab_prog/run-sections-of-programs.html

## Step 2: Classify each test image by training and using the appropriate classifier
# Each function to classify test features will return an N x 1 cell array,
# where N is the number of test cases and each entry is a string indicating
# the predicted category for each test image. Each entry in
# 'predicted_categories' must be one of the 15 strings in 'categories',
# 'train_labels', and 'test_labels'. See the starter code for each function
# for more details.

print('Using %s classifier to predict test set categories\n' % CLASSIFIER)

if CLASSIFIER.lower() == "nearest neighbor":
    predicted_categories = nearest_neighbor_classify(train_image_feats, train_labels, test_image_feats)
elif CLASSIFIER.lower() == "support vector machine":
    predicted_categories = svm_classify(train_image_feats, train_labels, test_image_feats)
elif CLASSIFIER.lower() == "placeholder":
    # random_permutation = np.random.permutation(len(test_labels))
    predicted_categories = copy.deepcopy(test_labels)
    random.shuffle(predicted_categories)
else:
    raise Exception("Unknown classifier type")

## Step 3: Build a confusion matrix and score the recognition system
# You do not need to code anything in this section.

# If we wanted to evaluate our recognition method properly we would train
# and test on many random splits of the data. You are not required to do so
# for this project.

# This function will recreate results_webpage/index.html and various image
Esempio n. 7
0
def main():
    #This function returns arrays containing the file path for each train
    #and test image, as well as arrays with the label of each train and
    #test image. By default all four of these arrays will be 1500 where each
    #entry is a string.
    print("Getting paths and labels for all train and test data")
    train_image_paths, test_image_paths, train_labels, test_labels = \
        get_image_paths(DATA_PATH, CATEGORIES, NUM_TRAIN_PER_CAT)

    # TODO Step 1:
    # Represent each image with the appropriate feature
    # Each function to construct features should return an N x d matrix, where
    # N is the number of paths passed to the function and d is the 
    # dimensionality of each image representation. See the starter code for
    # each function for more details.

    if FEATURE == 'tiny_image':
        # YOU CODE get_tiny_images.py
        # print('in tiny image')
        if os.path.isfile('tiny_test.pkl') is False:
            print('false QQ')
            train_image_feats = get_tiny_images(train_image_paths)
            with open('test_image_feats.pkl', 'wb') as handle:
                    pickle.dump(train_image_feats, handle, protocol=pickle.HIGHEST_PROTOCOL)
            test_image_feats = get_tiny_images(test_image_paths)
            with open('test_image_feats.pkl', 'wb') as handle:
                    pickle.dump(test_image_feats, handle, protocol=pickle.HIGHEST_PROTOCOL)
        else:
            with open('tiny_train.pkl', 'rb') as handle:
                # print('tiny_train exists')
                train_image_feats = pickle.load(handle)
                # print(type(train_image_feats),len(train_image_feats))
            with open('tiny_test.pkl', 'rb') as handle:
                # print('test_image_feats exists')
                test_image_feats = pickle.load(handle)
                # print(type(test_image_feats),len(test_image_feats))

    elif FEATURE == 'bag_of_sift':
        # YOU CODE build_vocabulary.py
        if os.path.isfile('vocab.pkl') is False:
            print('No existing visual word vocabulary found. Computing one from training images\n')
            vocab_size = 400   ### Vocab_size is up to you. Larger values will work better (to a point) but be slower to comput.
            vocab = build_vocabulary(train_image_paths, vocab_size)
            with open('vocab.pkl', 'wb') as handle:
                pickle.dump(vocab, handle, protocol=pickle.HIGHEST_PROTOCOL)

        if os.path.isfile('train_image_feats.pkl') is False:
            # YOU CODE get_bags_of_sifts.py
            train_image_feats = get_bags_of_sifts(train_image_paths);
            with open('train_image_feats.pkl', 'wb') as handle:
                pickle.dump(train_image_feats, handle, protocol=pickle.HIGHEST_PROTOCOL)
        else:
            with open('train_image_feats.pkl', 'rb') as handle:
                train_image_feats = pickle.load(handle)

        if os.path.isfile('test_image_feats.pkl') is False:
            test_image_feats  = get_bags_of_sifts(test_image_paths);
            with open('test_image_feats.pkl', 'wb') as handle:
                pickle.dump(test_image_feats, handle, protocol=pickle.HIGHEST_PROTOCOL)
        else:
            with open('test_image_feats.pkl', 'rb') as handle:
                test_image_feats = pickle.load(handle)
    elif FEATURE == 'dumy_feature':
        train_image_feats = []
        test_image_feats = []
    else:
        raise NameError('Unknown feature type')
    #####################initialize the possibility
    possibility=[float(1/num_all)]*num_all
    sum_p=[]
    for i in range(1,num_all):
        sum_p.append(float(i/num_all))
    testing=[]
    for i in range(num_trial):
        training_id=[]
        #choose 100 examples 
        training_id=choose_training_id(sum_p)
        #training
        predicted_categories = svm_classify([train_image_feats[idx] for idx in training_id], [train_labels[idx] for idx in training_id],train_image_feats)
        temp=[]
        temp=svm_classify([train_image_feats[idx] for idx in training_id], [train_labels[idx] for idx in training_id],test_image_feats)
        testing.append(temp)
        #update possibility
        sum_p=[]
        possibility,sum_p=update_possibility(possibility,predicted_categories,train_labels) 
    accuracy=cal_result(testing,test_labels)
    # TODO Step 2: 
    # Classify each test image by training and using the appropriate classifier
    # Each function to classify test features will return an N x 1 array,
    # where N is the number of test cases and each entry is a string indicating
    # the predicted category for each test image. Each entry in
    # 'predicted_categories' must be one of the 15 strings in 'categories',
    # 'train_labels', and 'test_labels.

    # if CLASSIFIER == 'nearest_neighbor':
    #     # YOU CODE nearest_neighbor_classify.py
    #     predicted_categories = nearest_neighbor_classify(train_image_feats, train_labels, test_image_feats)

    # elif CLASSIFIER == 'support_vector_machine':
    #     # YOU CODE svm_classify.py
    #     print('training')
    #     predicted_categories = svm_classify(train_image_feats, train_labels, test_image_feats)

    # elif CLASSIFIER == 'dumy_classifier':
    #     # The dummy classifier simply predicts a random category for
    #     # every test case
    #     predicted_categories = test_labels[:]
    #     shuffle(predicted_categories)
    # else:
    #     raise NameError('Unknown classifier type')

    # accuracy = float(len([x for x in zip(test_labels,predicted_categories) if x[0]== x[1]]))/float(len(test_labels))
    print("Accuracy = ", accuracy)
    '''