def process_files(path, feature_model, dimension_reduction, filtered_image_ids=None): files = os.listdir(path) ids, x = [], [] for file in files: if not filtered_image_ids or (filtered_image_ids and file.replace( ".jpg", "") in filtered_image_ids): print("Reading file: {}".format(file)) image = cv2.imread("{}{}".format(path, file)) feature_descriptor = Descriptor( image, feature_model, dimension_reduction).feature_descriptor ids.append(file.replace(".jpg", "")) x.append(feature_descriptor) if DescriptorType(feature_model).check_sift(): """ For SIFT, we flatten the image descriptor array into an array of keypoints. We return an extra list (pos) representing the number of keypoints for each image. This is done to extract the feature descriptors (after dimensionality reduction) of each image correctly while inserting into the DB. """ sift_x, pos = x[0], [x[0].shape[0]] for i in range(1, len(x)): pos.append(x[i].shape[0]) sift_x = np.vstack((sift_x, x[i])) return sift_x, ids, pos """ For all other feature descriptors, return only the ids and descriptor array. """ return np.array(x), ids
def __init__(self, img, x, y, w, h): self.id = uuid.uuid1() self.descriptor = Descriptor(img, x, y, w, h) self.position = Position(x, y, w, h) self.last_seen = time.time() self.color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) self.is_followed = False
def main(): image_id = input("Enter image ID: ") constants_dict = read_json() read_path = constants_dict["READ_PATH"] files = os.listdir(read_path) file = files[files.index("{}.jpg".format(image_id))] img = cv2.imread(read_path + file) desc = Descriptor(img) print(desc.sift()) print(desc.lbp())
def process_files(path, feature_model, dimension_reduction): files = os.listdir(path) ids, x = [], [] for file in files: print("Reading file: {}".format(file)) image = cv2.imread("{}{}".format(path, file)) feature_descriptor = Descriptor( image, feature_model, dimension_reduction ).feature_descriptor ids.append(file.replace(".jpg", "")) x.append(feature_descriptor) return ids, np.array(x)
def loadClassifier(self, filepath=None, classifier_data=None): """ Load a classifier trained by the functions in train.py. Either a dict (classifier_data) or pickled file (filepath) may be supplied. """ if filepath is not None: filepath = os.path.abspath(filepath) if not os.path.isfile(filepath): raise FileNotFoundError("File " + filepath + " does not exist.") classifier_data = pickle.load(open(filepath, "rb")) else: classifier_data = classifier_data if classifier_data is None: raise ValueError("Invalid classifier data supplied.") self.classifier = classifier_data["classifier"] self.scaler = classifier_data["scaler"] self.cv_color_const = classifier_data["cv_color_const"] self.channels = classifier_data["channels"] # Simply loading the descriptor from the dict with # self.descriptor = classifier_data["descriptor"] # produces an error. Thus, we instantiate a new descriptor object # using the same parameters on which the classifier was trained. self.descriptor = Descriptor( hog_features=classifier_data["hog_features"], hist_features=classifier_data["hist_features"], spatial_features=classifier_data["spatial_features"], hog_lib=classifier_data["hog_lib"], size=classifier_data["size"], hog_bins=classifier_data["hog_bins"], pix_per_cell=classifier_data["pix_per_cell"], cells_per_block=classifier_data["cells_per_block"], block_stride=classifier_data["block_stride"], block_norm=classifier_data["block_norm"], transform_sqrt=classifier_data["transform_sqrt"], signed_gradient=classifier_data["signed_gradient"], hist_bins=classifier_data["hist_bins"], spatial_size=classifier_data["spatial_size"]) return self
def load_model(self, file_path): file_path = os.path.abspath(file_path) if not os.path.isfile(file_path): raise FileNotFoundError("File " + file_path + " not found.") model_data = pickle.load(open(file_path, "rb")) self.model = model_data["model"] self.scaler = model_data["scaler"] self.color_const = model_data["color_const"] self.channels = model_data["channels"] self.descriptor = Descriptor( hog=model_data["hog"], histogram=model_data["histogram"], spatial=model_data["spatial"], hog_size=model_data["hog_size"], hog_bins=model_data["hog_bins"], cell_size=model_data["cell_size"], cells_per_block=model_data["cells_per_block"], histogram_bins=model_data["histogram_bins"], spatial_size=model_data["spatial_size"]) return self
def process_files(positive_dir, negative_dir, color_space="bgr", channels=[0, 1, 2], hog=False, histogram=False, spatial=False, hog_size=(64, 64), hog_bins=9, cell_size=(8, 8), cells_per_block=(2, 2), histogram_bins=16, spatial_size=(16, 16)): # take care of training files positive_dir = os.path.abspath(positive_dir) negative_dir = os.path.abspath(negative_dir) if not os.path.isdir(positive_dir): raise FileNotFoundError("Directory " + positive_dir + " not found.") if not os.path.isdir(negative_dir): raise FileNotFoundError("Directory " + negative_dir + " not found.") positive_files = [ os.path.join(positive_dir, file) for file in os.listdir(positive_dir) if os.path.isfile(os.path.join(positive_dir, file)) ] negative_files = [ os.path.join(negative_dir, file) for file in os.listdir(negative_dir) if os.path.isfile(os.path.join(negative_dir, file)) ] print("{} positive files and {} negative files found.\n".format( len(positive_files), len(negative_files))) # color space info color_space = color_space.lower() if color_space == "hls": color_const = cv2.COLOR_BGR2HLS elif color_space == "hsv": color_const = cv2.COLOR_BGR2HSV elif color_space == "luv": color_const = cv2.COLOR_BGR2Luv elif color_space == "ycrcb" or color_space == "ycc": color_const = cv2.COLOR_BGR2YCrCb elif color_space == "yuv": color_const = cv2.COLOR_BGR2YUV else: color_const = -1 # store feature vectors for both positive and negative files positive_features = [] negative_features = [] time_begin = time.time() # create feature descriptor object descriptor = Descriptor(hog=hog, histogram=histogram, spatial=spatial, hog_size=hog_size, hog_bins=hog_bins, cell_size=cell_size, cells_per_block=cells_per_block, histogram_bins=histogram_bins, spatial_size=spatial_size) # extract features from each file for i, file_path in enumerate(positive_files + negative_files): image = cv2.imread(file_path) if image is None: continue if color_const > -1: image = cv2.cvtColor(image, color_const) feature_vector = descriptor.get_features(image) if i < len(positive_files): positive_features.append(feature_vector) else: negative_features.append(feature_vector) print("Features extraction completed in {:.1f} seconds\n".format( time.time() - time_begin)) num_features = len(positive_features[0]) # scale features scaler = StandardScaler().fit(positive_features + negative_features) positive_features = scaler.transform(positive_features) negative_features = scaler.transform(negative_features) # randomize lists of feature vectors by splitting them into training, cross-validation, and test sets # the ratio is 75/20/5 random.shuffle(positive_features) random.shuffle(negative_features) num_positive_train = int(round(0.75 * len(positive_features))) num_negative_train = int(round(0.75 * len(negative_features))) num_positive_val = int(round(0.2 * len(positive_features))) num_negative_val = int(round(0.2 * len(negative_features))) positive_train = positive_features[0:num_positive_train] negative_train = negative_features[0:num_negative_train] positive_val = positive_features[num_positive_train:(num_positive_train + num_positive_val)] negative_val = negative_features[num_negative_train:(num_negative_train + num_negative_val)] positive_test = positive_features[(num_positive_train + num_positive_val):] negative_test = negative_features[(num_negative_train + num_negative_val):] print( "Randomized images into training, cross-validation, and test sets.\n") print("{} images in positive training set.".format(len(positive_train))) print("{} images in positive cross-validation set.".format( len(positive_val))) print("{} images in positive test set.".format(len(positive_test))) print("{} total positive images.\n".format( len(positive_train) + len(positive_val) + len(positive_test))) print("{} images in negative training set.".format(len(negative_train))) print("{} images in negative cross-validation set.".format( len(negative_val))) print("{} images in negative test set.".format(len(negative_test))) print("{} total negative images.\n".format( len(negative_train) + len(negative_val) + len(negative_test))) # store data and parameters in a dictionary feature_data = { "positive_train": positive_train, "negative_train": negative_train, "positive_val": positive_val, "negative_val": negative_val, "positive_test": positive_test, "negative_test": negative_test, "scaler": scaler, "hog": hog, "histogram": histogram, "spatial": spatial, "color_space": color_space, "color_const": color_const, "channels": channels, "hog_size": hog_size, "hog_bins": hog_bins, "cell_size": cell_size, "cells_per_block": cells_per_block, "histogram_bins": histogram_bins, "spatial_size": spatial_size, "num_features": num_features } return feature_data
from os import path import sys print(sys.argv) train = False count = 1 name_video = '../sample-extractor/cutvideo.mp4' if (len(sys.argv) >= 2): if (sys.argv[1] == "train"): train = True else: name_video = sys.argv[1] if (not train): print("- CARREGANDO MODELO SVM EXISTENTE -") descriptor = Descriptor('') svm = SVM() svm.load("../models/trained_model.xml") vidcap = cv2.VideoCapture(name_video) success, image = vidcap.read() while success: for cut in slidingwindow.cut_frame(image): if cut[0].shape[0] == 128 and cut[0].shape[1] == 48: description = descriptor.describeImage(cut[0]) result = int(list(svm.test(description))[0][0]) print(result) if (result == 1): cv2.imwrite( "../figs/official/positive_" + str(count) + ".jpg", cut[0]) count += 1
def processFiles(pos_dir, neg_dir, recurse=False, output_file=False, output_filename=None, color_space="bgr", channels=[0, 1, 2], hog_features=False, hist_features=False, spatial_features=False, hog_lib="cv", size=(64, 64), hog_bins=9, pix_per_cell=(8, 8), cells_per_block=(2, 2), block_stride=None, block_norm="L1", transform_sqrt=True, signed_gradient=False, hist_bins=16, spatial_size=(16, 16)): """ Extract features from positive samples and negative samples. Store feature vectors in a dict and optionally save to pickle file. @param pos_dir (str): Path to directory containing positive samples. @param neg_dir (str): Path to directory containing negative samples. @param recurse (bool): Traverse directories recursively (else, top-level only). @param output_file (bool): Save processed samples to file. @param output_filename (str): Output file filename. @param color_space (str): Color space conversion. @param channels (list): Image channel indices to use. For remaining arguments, refer to Descriptor class: @see descriptor.Descriptor#__init__(...) @return feature_data (dict): Lists of sample features split into training, validation, test sets; scaler object; parameters used to construct descriptor and process images. NOTE: OpenCV HOGDescriptor currently only supports 1-channel and 3-channel images, not 2-channel images. """ if not (hog_features or hist_features or spatial_features): raise RuntimeError( "No features selected (set hog_features=True, " + "hist_features=True, and/or spatial_features=True.)") pos_dir = os.path.abspath(pos_dir) neg_dir = os.path.abspath(neg_dir) if not os.path.isdir(pos_dir): raise FileNotFoundError("Directory " + pos_dir + " does not exist.") if not os.path.isdir(neg_dir): raise FileNotFoundError("Directory " + neg_dir + " does not exist.") print("Building file list...") if recurse: pos_files = [ os.path.join(rootdir, file) for rootdir, _, files in os.walk(pos_dir) for file in files ] neg_files = [ os.path.join(rootdir, file) for rootdir, _, files in os.walk(neg_dir) for file in files ] else: pos_files = [ os.path.join(pos_dir, file) for file in os.listdir(pos_dir) if os.path.isfile(os.path.join(pos_dir, file)) ] neg_files = [ os.path.join(neg_dir, file) for file in os.listdir(neg_dir) if os.path.isfile(os.path.join(neg_dir, file)) ] print("{} positive files and {} negative files found.\n".format( len(pos_files), len(neg_files))) # Get color space information. color_space = color_space.lower() if color_space == "gray": color_space_name = "grayscale" cv_color_const = cv2.COLOR_BGR2GRAY channels = [0] elif color_space == "hls": color_space_name = "HLS" cv_color_const = cv2.COLOR_BGR2HLS elif color_space == "hsv": color_space_name = "HSV" cv_color_const = cv2.COLOR_BGR2HSV elif color_space == "lab": color_space_name = "Lab" cv_color_const = cv2.COLOR_BGR2Lab elif color_space == "luv": color_space_name = "Luv" cv_color_const = cv2.COLOR_BGR2Luv elif color_space == "ycrcb" or color_space == "ycc": color_space_name = "YCrCb" cv_color_const = cv2.COLOR_BGR2YCrCb elif color_space == "yuv": color_space_name = "YUV" cv_color_const = cv2.COLOR_BGR2YUV else: color_space_name = "BGR" cv_color_const = -1 # Get names of desired features. features = [ feature_name for feature_name, feature_bool in zip(["HOG", "color histogram", "spatial"], [hog_features, hist_features, spatial_features]) if feature_bool == True ] feature_str = features[0] for feature_name in features[1:]: feature_str += ", " + feature_name # Get information about channel indices. if len(channels) == 2 and hog_features and hog_lib == "cv": warnings.warn("OpenCV HOG does not support 2-channel images", RuntimeWarning) channel_index_str = str(channels[0]) for ch_index in channels[1:]: channel_index_str += ", {}".format(ch_index) print("Converting images to " + color_space_name + " color space and " + "extracting " + feature_str + " features from channel(s) " + channel_index_str + ".\n") # Store feature vectors for positive samples in list pos_features and # for negative samples in neg_features. pos_features = [] neg_features = [] start_time = time.time() # Get feature descriptor object to call on each sample. descriptor = Descriptor(hog_features=hog_features, hist_features=hist_features, spatial_features=spatial_features, hog_lib=hog_lib, size=size, hog_bins=hog_bins, pix_per_cell=pix_per_cell, cells_per_block=cells_per_block, block_stride=block_stride, block_norm=block_norm, transform_sqrt=transform_sqrt, signed_gradient=signed_gradient, hist_bins=hist_bins, spatial_size=spatial_size) # Iterate through files and extract features. for i, filepath in enumerate(pos_files + neg_files): image = cv2.imread(filepath) if cv_color_const > -1: image = cv2.cvtColor(image, cv_color_const) if len(image.shape) > 2: image = image[:, :, channels] feature_vector = descriptor.getFeatureVector(image) if i < len(pos_files): pos_features.append(feature_vector) else: neg_features.append(feature_vector) print("Features extracted from {} files in {:.1f} seconds\n".format( len(pos_features) + len(neg_features), time.time() - start_time)) # Store the length of the feature vector produced by the descriptor. num_features = len(pos_features[0]) ##TODO: Instantiate scaler and scale features. scaler = StandardScaler() scaler.fit(np.concatenate((pos_features, neg_features), axis=0)) pos_features = scaler.transform(pos_features) neg_features = scaler.transform(neg_features) ##TODO: Randomize lists of feature vectors. Split 75/20/5 into training, # validation, and test sets. print( "Shuffling samples into training, cross-validation, and test sets.\n") random.shuffle(pos_features) random.shuffle(neg_features) # Use pos_train, pos_val, pos_test and neg_train, neg_val, neg_test to represent # the Train, Validation and Test sets of Positive and Negtive sets. pos_train, pos_val, pos_test = np.split( pos_features, [int(.75 * len(pos_features)), int(.95 * len(pos_features))]) neg_train, neg_val, neg_test = np.split( neg_features, [int(.75 * len(neg_features)), int(.95 * len(neg_features))]) # Store sample data and parameters in dict. # Descriptor class object seems to produce errors when unpickling and # has been commented out below. The descriptor will be re-instantiated # by the Detector object later. feature_data = { "pos_train": pos_train, "neg_train": neg_train, "pos_val": pos_val, "neg_val": neg_val, "pos_test": pos_test, "neg_test": neg_test, #"descriptor": descriptor, "scaler": scaler, "hog_features": hog_features, "hist_features": hist_features, "spatial_features": spatial_features, "color_space": color_space, "cv_color_const": cv_color_const, "channels": channels, "hog_lib": hog_lib, "size": size, "hog_bins": hog_bins, "pix_per_cell": pix_per_cell, "cells_per_block": cells_per_block, "block_stride": block_stride, "block_norm": block_norm, "transform_sqrt": transform_sqrt, "signed_gradient": signed_gradient, "hist_bins": hist_bins, "spatial_size": spatial_size, "num_features": num_features } # Pickle to file if desired. if output_file: if output_filename is None: output_filename = (datetime.now().strftime("%Y%m%d%H%M") + "_data.pkl") pickle.dump(feature_data, open(output_filename, "wb")) print( "Sample and parameter data saved to {}\n".format(output_filename)) return feature_data
import sys import grpc import movie_pb2 import movie_pb2_grpc import argparse from descriptor import Descriptor parser = argparse.ArgumentParser() parser.add_argument("--model",help="the path of the model to save or load",\ required=True) parser.add_argument("--address", help="the ip and port this service want to listen", default="[::]:5011") parser.add_argument("--topk", help="top k", type=int, default=50) args = parser.parse_args() descriptor = Descriptor() descriptor.load_model(args.model, "max") class movieServicer(movie_pb2_grpc.FindMovieServiceServicer): def FindMovies(self, request, context): query = request.query print( time.strftime('%Y-%m-%d/%H:%M:%S', time.localtime(time.time())) + '\t' + query) sys.stdout.flush() ngram_desc = descriptor.match_desc_max(query) titles = descriptor.rank_titles(ngram_desc, args.topk) movies = [title for title, _, _ in titles] return movie_pb2.FindMovieReply(movies=movies)
def helper(feature_model, dimension_reduction, k, label_choice, image_id): path, pos = Config().read_path(), None descriptor_type = DescriptorType(feature_model).descriptor_type symantics_type = LatentSymanticsType(dimension_reduction).symantics_type label, value, complementary_value = Labels(label_choice).label image = cv2.imread("{}{}{}".format(Config().read_all_path(), image_id, ".jpg")) image_feature_vector = Descriptor(image, feature_model, dimension_reduction).feature_descriptor label_filtered_image_ids = [ item["image_id"] for item in Database().retrieve_metadata_with_labels(label, value) ] complementary_label_filtered_image_ids = [ item["image_id"] for item in Database().retrieve_metadata_with_labels( label, complementary_value) ] if DescriptorType(feature_model).check_sift(): label_feature_vector, label_ids, label_pos = functions.process_files( path, feature_model, dimension_reduction, label_filtered_image_ids) complementary_label_feature_vector, complementary_label_ids, complementary_label_pos = functions.process_files( path, feature_model, dimension_reduction, complementary_label_filtered_image_ids, ) feature_vector = np.concatenate(( label_feature_vector, complementary_label_feature_vector, image_feature_vector, )) pos = label_pos + complementary_label_pos + [ image_feature_vector.shape[0] ] else: label_feature_vector, label_ids = functions.process_files( path, feature_model, dimension_reduction, label_filtered_image_ids) complementary_label_feature_vector, complementary_label_ids = functions.process_files( path, feature_model, dimension_reduction, complementary_label_filtered_image_ids, ) feature_vector = np.concatenate(( label_feature_vector, complementary_label_feature_vector, np.array([image_feature_vector]), )) ids = label_ids + complementary_label_ids + [image_id] _, latent_symantics = LatentSymantics(feature_vector, k, dimension_reduction).latent_symantics records = functions.set_records(ids, descriptor_type, symantics_type, k, latent_symantics, pos, 5) for record in records: if record["image_id"] == image_id: continue elif record["image_id"] in label_ids: record[label] = value elif record["image_id"] in complementary_label_ids: record[label] = complementary_value Database().insert_many(records)
isShuttingDown = True input_thread = threading.Thread(target=get_user_input) input_thread.start() while True: if isShuttingDown: break # accepts a new connection into the socket server connectionSocket, addr = serverSocket.accept() # ask for a nickname nickname_message = Message(data="Digite um nickname para você: ") connectionSocket.send(nickname_message.encode().encode('utf-8')) # wait for nickname nickname_payload = connectionSocket.recv(1024).decode('utf-8') nickname_message = Message() nickname_message.decode(nickname_payload) nickname = nickname_message.data # initializes the client descriptor for its thread client = Descriptor(nickname, addr[0], addr[1], connectionSocket, get_connected_clients, global_sender, send_to_client) # starts the client thread clients.append(client) client.start()
def helper(self,feature_model, dimension_reduction, k): unlabelled_path = "C:/Users/himan/OneDrive/Desktop/MWDB/phase3_sample_data/Unlabelled/Set 1/" files = os.listdir(unlabelled_path) path, pos = Config().read_path(), None descriptor_type = DescriptorType(feature_model).descriptor_type symantics_type = LatentSymanticsType(dimension_reduction).symantics_type label, value, complementary_value = ("dorsal", 1, 0) unlabelled_image_feature_vector = [] unlabelled_image_ids = [] for i, file in enumerate(files): print(file) image = cv2.imread("{}{}".format(unlabelled_path, file)) image_feature_vector = Descriptor( image, feature_model, dimension_reduction ).feature_descriptor unlabelled_image_feature_vector.append(image_feature_vector) unlabelled_image_ids.append(file) label_filtered_image_ids = [ item["image_id"] for item in Database().retrieve_metadata_with_labels(label, value) ] complementary_label_filtered_image_ids = [ item["image_id"] for item in Database().retrieve_metadata_with_labels(label, complementary_value) ] if DescriptorType(feature_model).check_sift(): label_feature_vector, label_ids, label_pos = functions_phase2.process_files( path, feature_model, dimension_reduction, label_filtered_image_ids ) complementary_label_feature_vector, complementary_label_ids, complementary_label_pos = functions_phase2.process_files( path, feature_model, dimension_reduction, complementary_label_filtered_image_ids, ) feature_vector = np.concatenate( ( label_feature_vector, complementary_label_feature_vector, unlabelled_image_feature_vector, ) ) # pos = label_pos + complementary_label_pos + [image_feature_vector.shape[0]] else: label_feature_vector, label_ids = functions_phase2.process_files( path, feature_model, dimension_reduction, label_filtered_image_ids ) complementary_label_feature_vector, complementary_label_ids = functions_phase2.process_files( path, feature_model, dimension_reduction, complementary_label_filtered_image_ids, ) feature_vector = np.concatenate( ( label_feature_vector, complementary_label_feature_vector, unlabelled_image_feature_vector ) ) ids = label_ids + complementary_label_ids + unlabelled_image_ids _, latent_symantics = LatentSymantics( feature_vector, k, dimension_reduction ).latent_symantics # for i, ids in unlabelled_image_ids: # _, latent_symantics = LatentSymantics( # unlabelled_image_feature_vector[i], k, dimension_reduction # ).latent_symantics records = functions_phase2.set_records( ids, descriptor_type, symantics_type, k, latent_symantics, pos, 5 ) for record in records: if record["image_id"] in label_ids: record[label] = value elif record["image_id"] in complementary_label_ids: record[label] = complementary_value else: continue Database().insert_many(records)
def clustering(path, c): mongo_url = "mongodb://localhost:27017/" database_name = "mwdb_phase3" lbld_collection_name = "labelled_hands" unlbld_collection_name = "unlabelled_hands" meta_collection_name = "metadata" lbld_csv = "C:/Users/priya/Documents/images/Phase 3/phase3_sample_data/labelled_set1.csv" unlabelled_csv = "C:/Users/priya/Documents/images/Phase 3/phase3_sample_data/Unlabelled/unlablled_set1.csv" try: connection = MongoClient(mongo_url) database = connection[database_name] lbld_collection = database[lbld_collection_name] unlbld_collection = database[unlbld_collection_name] meta_collection = database[meta_collection_name] # storing labelled images df = pd.read_csv(lbld_csv) lbld_records = df.to_dict(orient='records') lbld_collection.remove() lbld_collection.insert_many(lbld_records) # storing unlabelled images df = pd.read_csv(unlabelled_csv) unlbld_records = df.to_dict(orient='records') unlbld_collection.remove() unlbld_collection.insert_many(unlbld_records) ids1, ids2, feature_vector1, feature_vector2, feature_vector3 = [], [], [], [], [] colors = ['red', 'blue', 'green', 'cyan', 'magenta'] markers = ['o', '<', 's', '+', 'v', '^', '.', '>', ',', 'd'] clust_labels = [] cent_labels = [] cluster = "Cluster " cent = "Centroid " for i in range(c): clust_labels.append(cluster.join(str(i))) cent_labels.append(cent.join(str(i))) # extracting features # dorsal for subject in lbld_collection.find({"aspectOfHand": {"$regex": "dorsal"}}, {"imageName": 1}): image_id = subject['imageName'] img_path = path + image_id image = cv2.imread(img_path) ids1.append(image_id.replace(".jpg", "")) feature_descriptor = Descriptor(image, 1).feature_descriptor # normalize features features_norm = (feature_descriptor - feature_descriptor.min()) / ( feature_descriptor.max() - feature_descriptor.min()) feature_vector1.append(features_norm) _, d_latent_semantics = LatentSymantics( np.array(feature_vector1), 2, 1 ).latent_symantics # K means centroids, prev_centroids, classes, X, centroid_norm, d_img_classes = [], [], [], [], [], [] max_iterations = 1 isOptimal = False for i in range(c): centroids.append(d_latent_semantics[i]) prev_centroids.append(d_latent_semantics[i]) while not isOptimal and max_iterations < 501: d_distances = [] classes = [] d_img_classes = [] for i in range(c): classes.append([]) d_img_classes.append([]) # Calculating clusters for each feature for i in range(d_latent_semantics.shape[0]): features = d_latent_semantics[i] d_distances = [euclidean(features, centroid) for centroid in centroids] classification = d_distances.index(min(d_distances)) classes[classification].append(features) d_img_classes[classification].append(ids1[i]) # Recalculating centroids for i in range(len(classes)): centroids[i] = np.mean(classes[i], axis=0) isOptimal = True for i in range(len(centroids)): if sum((centroids[i] - prev_centroids[i]) / prev_centroids[i] * 100.0) > tolerance: isOptimal = False break prev_centroids[i] = centroids[i] max_iterations += 1 # # Visualize clusters -- takes longer time to show so commented # for i in range(c): # plt.scatter(centroids[i][0], centroids[i][1], s=300, c="black", marker="x", label=cent_labels[i]) # for features in classes[i]: # plt.scatter(features[0], features[1], color=colors[i], s=30, marker=markers[i], label=clust_labels[i]) # plt.show() print "Dorsal CLusters: " for i in range(len(d_img_classes)): print ("Cluster %d: " % i) print d_img_classes[i] # --------------------------------------------------------------------------------------------------------------------- # extracting features # palmar for subject in lbld_collection.find({"aspectOfHand": {"$regex": "palmar"}}, {"imageName": 1}): image_id = subject['imageName'] img_path = path + image_id image = cv2.imread(img_path) ids2.append(image_id.replace(".jpg", "")); # normalize features feature_descriptor = Descriptor(image, 1).feature_descriptor features_norm = (feature_descriptor - feature_descriptor.min()) / ( feature_descriptor.max() - feature_descriptor.min()) feature_vector2.append(features_norm) _, p_latent_semantics = LatentSymantics( np.array(feature_vector2), 2, 1 ).latent_symantics # K means p_centroids, p_prev_centroids, p_classes, p_X, p_centroid_norm, p_img_classes = [], [], [], [], [], [] p_max_iterations = 1 p_isOptimal = False for i in range(c): p_centroids.append(p_latent_semantics[i]) p_prev_centroids.append(p_latent_semantics[i]) p_classes.append([]) p_img_classes.append([]) while not p_isOptimal and p_max_iterations < 501: p_distances = [] p_classes = [] p_img_classes = [] for i in range(c): p_classes.append([]) p_img_classes.append([]) # Calculating clusters for each feature for i in range(p_latent_semantics.shape[0]): features = p_latent_semantics[i] p_distances = [euclidean(features, centroid) for centroid in p_centroids] classification = p_distances.index(min(p_distances)) p_classes[classification].append(features) p_img_classes[classification].append(ids2[i]) # Recalculating centroids for i in range(len(p_classes)): p_centroids[i] = np.mean(p_classes[i], axis=0) p_isOptimal = True for i in range(len(p_centroids)): if sum((p_centroids[i] - p_prev_centroids[i]) / p_prev_centroids[i] * 100.0) > tolerance: p_isOptimal = False break p_prev_centroids[i] = p_centroids[i] p_max_iterations += 1 # # Visualize clusters -- takes longer time to show so commented # for i in range(c): # plt.scatter(p_centroids[i][0], p_centroids[i][1], s=130, marker="x") # for features in p_classes[i]: # plt.scatter(features[0], features[1], color=colors[i], s=30, marker=markers[i]) # plt.show() print "Palmar CLusters: " for i in range(len(p_img_classes)): print ("Cluster %d" % i) print p_img_classes[i] # ---------------------------------------------------------------------------------------------------------------------- # Classification # mean_dorsal = np.mean(centroids, axis=0) # mean_palmar = np.mean(p_centroids, axis=0) image_name = [] dorsal_cnt = 0 palmar_cnt = 0 d_cnt = 0 p_cnt = 0 for image_path in glob.glob(test_path): image = cv2.imread(image_path) # get filename image_name.append(os.path.basename(image_path)) feature_descriptor = Descriptor(image, 1).feature_descriptor # normalize features features_norm = (feature_descriptor - feature_descriptor.min()) / ( feature_descriptor.max() - feature_descriptor.min()) feature_vector3.append(features_norm) _, latent_semantics = LatentSymantics(np.array(feature_vector3), 2, 1).latent_symantics for i in range(len(latent_semantics)): ddistances = [euclidean(latent_semantics[i], centroid) for centroid in centroids] pdistances = [euclidean(latent_semantics[i], centroid) for centroid in p_centroids] subject_img = unlbld_collection.find_one({"imageName": image_name[i]}, {"aspectOfHand": 1}) if "dorsal" in subject_img['aspectOfHand']: d_cnt += 1 else: p_cnt += 1 if min(ddistances) < min(pdistances): if "dorsal" in subject_img['aspectOfHand']: dorsal_cnt += 1 print ("Image ID: %s, %s" % (image_name[i], "dorsal")) else: if "palmar" in subject_img['aspectOfHand']: palmar_cnt += 1 print ("Image ID: %s, %s" % (image_name[i], "palmar")) print ("Dorsal Accuracy %d" % ((dorsal_cnt*100)/d_cnt)) print ("Palmar Accuracy %d" % ((palmar_cnt*100)/p_cnt)) except Exception as e: traceback.print_exc() print("Connection refused... ")
def insert_images_in_database(feature_model, dimension_reduction, k, identifier, set1_dir=True, set2_dir=True): """ :param feature_model: 1 - CM, 2 - LBP, 3 - HOG, 4 - SIFT :param dimension_reduction: 1 - PCA, 2 - SVD, 3 - NMF, 4 - LDA :param k: reduced dimension value :param identifier: 0 - Read all, 1 - Read from Labelled, 2 - Read from Unlabelled :param set1_dir (Optional): True - Read from Set1 folder of Labelled/Unlabelled, False otherwise :param set2_dir (Optional): True - Read from Set2 folder of Labelled/Unlabelled, False otherwise :return None Default case: Read from both Set1 and Set2 folders """ # Read images and feature extraction if identifier == 0: read_all_path = Config().read_all_path() files = os.listdir(read_all_path) connection = Database().open_connection() db = connection[Config().database_name()] collection = db[Config().collection_name()] for i, file in enumerate(files): print("Reading file: {} | {} % Done".format( file, ((i + 1) * 100.0) / len(files))) image = cv2.imread("{}{}".format(read_all_path, file)) feature_descriptor = Descriptor( image, feature_model, dimension_reduction).feature_descriptor image_id = file.replace(".jpg", "") collection.insert_one({ "image_id": image_id, "vector": feature_descriptor.tolist() }) connection.close() query_results = Database().retrieve_many() ids = [item["image_id"] for item in query_results] x = np.array([item["vector"] for item in query_results]) elif identifier == 1: if set1_dir and set2_dir: ids1, x1 = functions.process_files( Config().read_training_set1_path(), feature_model, dimension_reduction) ids2, x2 = functions.process_files( Config().read_training_set2_path(), feature_model, dimension_reduction) ids = ids1 + ids2 x = np.concatenate((x1, x2)) elif set1_dir: ids, x = functions.process_files( Config().read_training_set1_path(), feature_model, dimension_reduction) elif set2_dir: ids, x = functions.process_files( Config().read_training_set2_path(), feature_model, dimension_reduction) else: if set1_dir and set2_dir: ids1, x1 = functions.process_files( Config().read_testing_set1_path(), feature_model, dimension_reduction) ids2, x2 = functions.process_files( Config().read_testing_set2_path(), feature_model, dimension_reduction) ids = ids1 + ids2 x = np.concatenate((x1, x2)) elif set1_dir: ids, x = functions.process_files(Config().read_testing_set1_path(), feature_model, dimension_reduction) elif set2_dir: ids, x = functions.process_files(Config().read_testing_set2_path(), feature_model, dimension_reduction) # Find Latent_symantics _, latent_symantics = LatentSymantics(x, k, dimension_reduction).latent_symantics # inserting data into Database if identifier == 0: records = functions.set_records(ids, latent_symantics) Database().insert_many(records) elif identifier == 1: records = functions.set_records(ids, latent_symantics, training=True) Database().insert_many(records, collection_type="training") else: records = functions.set_records(ids, latent_symantics) Database().insert_many(records, collection_type="testing") print("Done... ")