コード例 #1
0
def vectorize(data_path, no_of_images, indicator):
    '''
    Takes the path, total number of folders(not images coz folders may be empty)
    in path and creates vector matrix. Generlly called in  Model.py file
    Input: Path to files, Total Images, print step(how many images till notification)
    Output: Final dataset to be used in modeling after PCA ,Index dictionary
    '''

    # Since some of the images may not be loaded(dont exist or other errors) we
    # start a dictionary to keep track of which ones do and which ones dont
    index_dict = {}
    # dict_counter keeps a count of number of images successfully loaded
    dict_counter = 0

    predata = []

    for i in xrange(no_of_images):
        image_name = data_path + "{0}/{0}.jpg".format(i)

        if os.path.exists(image_name):
            # Get features
            features = preprocess(image_name)

            # Forming three matrices from all the images. these will be
            # combined later
            predata.append(features)

            index_dict[dict_counter] = i
            dict_counter += 1
        else:
            #index_dict[i] = "Load Error:No image"
            pass

        if i % indicator == 0:
            print "%d images finished" % i

    # Convert the two lists to np arrays
    data = np.array(predata)
    print data
    print">>>>>>>>>>>>>>", data.shape

    pickle_this(data, "Final_Feature_Matrix_For_PCA.pkl")
    pickle_this(index_dict, "Image_model_Index_dict.pkl")

    # Perform PCA
    data = data_pca_pipeline(data)

    return data, index_dict