Esempio n. 1
0
def process_files(path,
                  feature_model,
                  dimension_reduction,
                  filtered_image_ids=None):
    files = os.listdir(path)

    ids, x = [], []
    for file in files:
        if not filtered_image_ids or (filtered_image_ids and file.replace(
                ".jpg", "") in filtered_image_ids):
            print("Reading file: {}".format(file))
            image = cv2.imread("{}{}".format(path, file))

            feature_descriptor = Descriptor(
                image, feature_model, dimension_reduction).feature_descriptor
            ids.append(file.replace(".jpg", ""))
            x.append(feature_descriptor)

    if DescriptorType(feature_model).check_sift():
        """
    	    For SIFT, we flatten the image descriptor array into an array of keypoints.
    	    We return an extra list (pos) representing the number of keypoints for each image. 
    	    This is done to extract the feature descriptors (after dimensionality reduction) of 
    	    each image correctly while inserting into the DB.
    	"""
        sift_x, pos = x[0], [x[0].shape[0]]
        for i in range(1, len(x)):
            pos.append(x[i].shape[0])
            sift_x = np.vstack((sift_x, x[i]))
        return sift_x, ids, pos
    """
        For all other feature descriptors, return only the ids and descriptor array.
    """
    return np.array(x), ids
Esempio n. 2
0
 def __init__(self, img, x, y, w, h):
     self.id = uuid.uuid1()
     self.descriptor = Descriptor(img, x, y, w, h)
     self.position = Position(x, y, w, h)
     self.last_seen = time.time()
     self.color = (random.randint(0, 255), random.randint(0, 255),
                   random.randint(0, 255))
     self.is_followed = False
Esempio n. 3
0
def main():
    image_id = input("Enter image ID: ")

    constants_dict = read_json()
    read_path = constants_dict["READ_PATH"]

    files = os.listdir(read_path)

    file = files[files.index("{}.jpg".format(image_id))]

    img = cv2.imread(read_path + file)
    desc = Descriptor(img)

    print(desc.sift())
    print(desc.lbp())
Esempio n. 4
0
def process_files(path, feature_model, dimension_reduction):
    files = os.listdir(path)

    ids, x = [], []
    for file in files:
        print("Reading file: {}".format(file))
        image = cv2.imread("{}{}".format(path, file))

        feature_descriptor = Descriptor(
            image, feature_model, dimension_reduction
        ).feature_descriptor
        ids.append(file.replace(".jpg", ""))
        x.append(feature_descriptor)

    return ids, np.array(x)
Esempio n. 5
0
    def loadClassifier(self, filepath=None, classifier_data=None):
        """
        Load a classifier trained by the functions in train.py. Either a dict
        (classifier_data) or pickled file (filepath) may be supplied.
        """

        if filepath is not None:
            filepath = os.path.abspath(filepath)
            if not os.path.isfile(filepath):
                raise FileNotFoundError("File " + filepath +
                                        " does not exist.")
            classifier_data = pickle.load(open(filepath, "rb"))
        else:
            classifier_data = classifier_data

        if classifier_data is None:
            raise ValueError("Invalid classifier data supplied.")

        self.classifier = classifier_data["classifier"]
        self.scaler = classifier_data["scaler"]
        self.cv_color_const = classifier_data["cv_color_const"]
        self.channels = classifier_data["channels"]

        # Simply loading the descriptor from the dict with
        #   self.descriptor = classifier_data["descriptor"]
        # produces an error. Thus, we instantiate a new descriptor object
        # using the same parameters on which the classifier was trained.
        self.descriptor = Descriptor(
            hog_features=classifier_data["hog_features"],
            hist_features=classifier_data["hist_features"],
            spatial_features=classifier_data["spatial_features"],
            hog_lib=classifier_data["hog_lib"],
            size=classifier_data["size"],
            hog_bins=classifier_data["hog_bins"],
            pix_per_cell=classifier_data["pix_per_cell"],
            cells_per_block=classifier_data["cells_per_block"],
            block_stride=classifier_data["block_stride"],
            block_norm=classifier_data["block_norm"],
            transform_sqrt=classifier_data["transform_sqrt"],
            signed_gradient=classifier_data["signed_gradient"],
            hist_bins=classifier_data["hist_bins"],
            spatial_size=classifier_data["spatial_size"])

        return self
Esempio n. 6
0
    def load_model(self, file_path):
        file_path = os.path.abspath(file_path)
        if not os.path.isfile(file_path):
            raise FileNotFoundError("File " + file_path + " not found.")

        model_data = pickle.load(open(file_path, "rb"))

        self.model = model_data["model"]
        self.scaler = model_data["scaler"]
        self.color_const = model_data["color_const"]
        self.channels = model_data["channels"]
        self.descriptor = Descriptor(
            hog=model_data["hog"],
            histogram=model_data["histogram"],
            spatial=model_data["spatial"],
            hog_size=model_data["hog_size"],
            hog_bins=model_data["hog_bins"],
            cell_size=model_data["cell_size"],
            cells_per_block=model_data["cells_per_block"],
            histogram_bins=model_data["histogram_bins"],
            spatial_size=model_data["spatial_size"])

        return self
Esempio n. 7
0
def process_files(positive_dir,
                  negative_dir,
                  color_space="bgr",
                  channels=[0, 1, 2],
                  hog=False,
                  histogram=False,
                  spatial=False,
                  hog_size=(64, 64),
                  hog_bins=9,
                  cell_size=(8, 8),
                  cells_per_block=(2, 2),
                  histogram_bins=16,
                  spatial_size=(16, 16)):
    # take care of training files
    positive_dir = os.path.abspath(positive_dir)
    negative_dir = os.path.abspath(negative_dir)
    if not os.path.isdir(positive_dir):
        raise FileNotFoundError("Directory " + positive_dir + " not found.")
    if not os.path.isdir(negative_dir):
        raise FileNotFoundError("Directory " + negative_dir + " not found.")
    positive_files = [
        os.path.join(positive_dir, file) for file in os.listdir(positive_dir)
        if os.path.isfile(os.path.join(positive_dir, file))
    ]
    negative_files = [
        os.path.join(negative_dir, file) for file in os.listdir(negative_dir)
        if os.path.isfile(os.path.join(negative_dir, file))
    ]
    print("{} positive files and {} negative files found.\n".format(
        len(positive_files), len(negative_files)))

    # color space info
    color_space = color_space.lower()
    if color_space == "hls":
        color_const = cv2.COLOR_BGR2HLS
    elif color_space == "hsv":
        color_const = cv2.COLOR_BGR2HSV
    elif color_space == "luv":
        color_const = cv2.COLOR_BGR2Luv
    elif color_space == "ycrcb" or color_space == "ycc":
        color_const = cv2.COLOR_BGR2YCrCb
    elif color_space == "yuv":
        color_const = cv2.COLOR_BGR2YUV
    else:
        color_const = -1

    # store feature vectors for both positive and negative files
    positive_features = []
    negative_features = []
    time_begin = time.time()

    # create feature descriptor object
    descriptor = Descriptor(hog=hog,
                            histogram=histogram,
                            spatial=spatial,
                            hog_size=hog_size,
                            hog_bins=hog_bins,
                            cell_size=cell_size,
                            cells_per_block=cells_per_block,
                            histogram_bins=histogram_bins,
                            spatial_size=spatial_size)

    # extract features from each file
    for i, file_path in enumerate(positive_files + negative_files):
        image = cv2.imread(file_path)
        if image is None:
            continue

        if color_const > -1:
            image = cv2.cvtColor(image, color_const)

        feature_vector = descriptor.get_features(image)

        if i < len(positive_files):
            positive_features.append(feature_vector)
        else:
            negative_features.append(feature_vector)

    print("Features extraction completed in {:.1f} seconds\n".format(
        time.time() - time_begin))

    num_features = len(positive_features[0])

    # scale features
    scaler = StandardScaler().fit(positive_features + negative_features)
    positive_features = scaler.transform(positive_features)
    negative_features = scaler.transform(negative_features)

    # randomize lists of feature vectors by splitting them into training, cross-validation, and test sets
    # the ratio is 75/20/5
    random.shuffle(positive_features)
    random.shuffle(negative_features)

    num_positive_train = int(round(0.75 * len(positive_features)))
    num_negative_train = int(round(0.75 * len(negative_features)))
    num_positive_val = int(round(0.2 * len(positive_features)))
    num_negative_val = int(round(0.2 * len(negative_features)))

    positive_train = positive_features[0:num_positive_train]
    negative_train = negative_features[0:num_negative_train]

    positive_val = positive_features[num_positive_train:(num_positive_train +
                                                         num_positive_val)]
    negative_val = negative_features[num_negative_train:(num_negative_train +
                                                         num_negative_val)]

    positive_test = positive_features[(num_positive_train + num_positive_val):]
    negative_test = negative_features[(num_negative_train + num_negative_val):]

    print(
        "Randomized images into training, cross-validation, and test sets.\n")
    print("{} images in positive training set.".format(len(positive_train)))
    print("{} images in positive cross-validation set.".format(
        len(positive_val)))
    print("{} images in positive test set.".format(len(positive_test)))
    print("{} total positive images.\n".format(
        len(positive_train) + len(positive_val) + len(positive_test)))
    print("{} images in negative training set.".format(len(negative_train)))
    print("{} images in negative cross-validation set.".format(
        len(negative_val)))
    print("{} images in negative test set.".format(len(negative_test)))
    print("{} total negative images.\n".format(
        len(negative_train) + len(negative_val) + len(negative_test)))

    # store data and parameters in a dictionary
    feature_data = {
        "positive_train": positive_train,
        "negative_train": negative_train,
        "positive_val": positive_val,
        "negative_val": negative_val,
        "positive_test": positive_test,
        "negative_test": negative_test,
        "scaler": scaler,
        "hog": hog,
        "histogram": histogram,
        "spatial": spatial,
        "color_space": color_space,
        "color_const": color_const,
        "channels": channels,
        "hog_size": hog_size,
        "hog_bins": hog_bins,
        "cell_size": cell_size,
        "cells_per_block": cells_per_block,
        "histogram_bins": histogram_bins,
        "spatial_size": spatial_size,
        "num_features": num_features
    }

    return feature_data
Esempio n. 8
0
from os import path
import sys

print(sys.argv)
train = False
count = 1
name_video = '../sample-extractor/cutvideo.mp4'
if (len(sys.argv) >= 2):
    if (sys.argv[1] == "train"):
        train = True
    else:
        name_video = sys.argv[1]

if (not train):
    print("- CARREGANDO MODELO SVM EXISTENTE -")
    descriptor = Descriptor('')
    svm = SVM()
    svm.load("../models/trained_model.xml")
    vidcap = cv2.VideoCapture(name_video)
    success, image = vidcap.read()
    while success:
        for cut in slidingwindow.cut_frame(image):
            if cut[0].shape[0] == 128 and cut[0].shape[1] == 48:
                description = descriptor.describeImage(cut[0])
                result = int(list(svm.test(description))[0][0])
                print(result)
                if (result == 1):
                    cv2.imwrite(
                        "../figs/official/positive_" + str(count) + ".jpg",
                        cut[0])
                    count += 1
Esempio n. 9
0
def processFiles(pos_dir,
                 neg_dir,
                 recurse=False,
                 output_file=False,
                 output_filename=None,
                 color_space="bgr",
                 channels=[0, 1, 2],
                 hog_features=False,
                 hist_features=False,
                 spatial_features=False,
                 hog_lib="cv",
                 size=(64, 64),
                 hog_bins=9,
                 pix_per_cell=(8, 8),
                 cells_per_block=(2, 2),
                 block_stride=None,
                 block_norm="L1",
                 transform_sqrt=True,
                 signed_gradient=False,
                 hist_bins=16,
                 spatial_size=(16, 16)):
    """
    Extract features from positive samples and negative samples.
    Store feature vectors in a dict and optionally save to pickle file.

    @param pos_dir (str): Path to directory containing positive samples.
    @param neg_dir (str): Path to directory containing negative samples.
    @param recurse (bool): Traverse directories recursively (else, top-level only).
    @param output_file (bool): Save processed samples to file.
    @param output_filename (str): Output file filename.
    @param color_space (str): Color space conversion.
    @param channels (list): Image channel indices to use.
    
    For remaining arguments, refer to Descriptor class:
    @see descriptor.Descriptor#__init__(...)

    @return feature_data (dict): Lists of sample features split into training,
        validation, test sets; scaler object; parameters used to
        construct descriptor and process images.

    NOTE: OpenCV HOGDescriptor currently only supports 1-channel and 3-channel
    images, not 2-channel images.
    """

    if not (hog_features or hist_features or spatial_features):
        raise RuntimeError(
            "No features selected (set hog_features=True, " +
            "hist_features=True, and/or spatial_features=True.)")

    pos_dir = os.path.abspath(pos_dir)
    neg_dir = os.path.abspath(neg_dir)

    if not os.path.isdir(pos_dir):
        raise FileNotFoundError("Directory " + pos_dir + " does not exist.")
    if not os.path.isdir(neg_dir):
        raise FileNotFoundError("Directory " + neg_dir + " does not exist.")

    print("Building file list...")
    if recurse:
        pos_files = [
            os.path.join(rootdir, file)
            for rootdir, _, files in os.walk(pos_dir) for file in files
        ]
        neg_files = [
            os.path.join(rootdir, file)
            for rootdir, _, files in os.walk(neg_dir) for file in files
        ]
    else:
        pos_files = [
            os.path.join(pos_dir, file) for file in os.listdir(pos_dir)
            if os.path.isfile(os.path.join(pos_dir, file))
        ]
        neg_files = [
            os.path.join(neg_dir, file) for file in os.listdir(neg_dir)
            if os.path.isfile(os.path.join(neg_dir, file))
        ]

    print("{} positive files and {} negative files found.\n".format(
        len(pos_files), len(neg_files)))

    # Get color space information.
    color_space = color_space.lower()
    if color_space == "gray":
        color_space_name = "grayscale"
        cv_color_const = cv2.COLOR_BGR2GRAY
        channels = [0]
    elif color_space == "hls":
        color_space_name = "HLS"
        cv_color_const = cv2.COLOR_BGR2HLS
    elif color_space == "hsv":
        color_space_name = "HSV"
        cv_color_const = cv2.COLOR_BGR2HSV
    elif color_space == "lab":
        color_space_name = "Lab"
        cv_color_const = cv2.COLOR_BGR2Lab
    elif color_space == "luv":
        color_space_name = "Luv"
        cv_color_const = cv2.COLOR_BGR2Luv
    elif color_space == "ycrcb" or color_space == "ycc":
        color_space_name = "YCrCb"
        cv_color_const = cv2.COLOR_BGR2YCrCb
    elif color_space == "yuv":
        color_space_name = "YUV"
        cv_color_const = cv2.COLOR_BGR2YUV
    else:
        color_space_name = "BGR"
        cv_color_const = -1

    # Get names of desired features.
    features = [
        feature_name for feature_name, feature_bool in
        zip(["HOG", "color histogram", "spatial"],
            [hog_features, hist_features, spatial_features])
        if feature_bool == True
    ]

    feature_str = features[0]
    for feature_name in features[1:]:
        feature_str += ", " + feature_name

    # Get information about channel indices.
    if len(channels) == 2 and hog_features and hog_lib == "cv":
        warnings.warn("OpenCV HOG does not support 2-channel images",
                      RuntimeWarning)

    channel_index_str = str(channels[0])
    for ch_index in channels[1:]:
        channel_index_str += ", {}".format(ch_index)

    print("Converting images to " + color_space_name + " color space and " +
          "extracting " + feature_str + " features from channel(s) " +
          channel_index_str + ".\n")

    # Store feature vectors for positive samples in list pos_features and
    # for negative samples in neg_features.
    pos_features = []
    neg_features = []
    start_time = time.time()

    # Get feature descriptor object to call on each sample.
    descriptor = Descriptor(hog_features=hog_features,
                            hist_features=hist_features,
                            spatial_features=spatial_features,
                            hog_lib=hog_lib,
                            size=size,
                            hog_bins=hog_bins,
                            pix_per_cell=pix_per_cell,
                            cells_per_block=cells_per_block,
                            block_stride=block_stride,
                            block_norm=block_norm,
                            transform_sqrt=transform_sqrt,
                            signed_gradient=signed_gradient,
                            hist_bins=hist_bins,
                            spatial_size=spatial_size)

    # Iterate through files and extract features.
    for i, filepath in enumerate(pos_files + neg_files):
        image = cv2.imread(filepath)
        if cv_color_const > -1:
            image = cv2.cvtColor(image, cv_color_const)

        if len(image.shape) > 2:
            image = image[:, :, channels]

        feature_vector = descriptor.getFeatureVector(image)

        if i < len(pos_files):
            pos_features.append(feature_vector)
        else:
            neg_features.append(feature_vector)

    print("Features extracted from {} files in {:.1f} seconds\n".format(
        len(pos_features) + len(neg_features),
        time.time() - start_time))

    # Store the length of the feature vector produced by the descriptor.
    num_features = len(pos_features[0])

    ##TODO: Instantiate scaler and scale features.
    scaler = StandardScaler()
    scaler.fit(np.concatenate((pos_features, neg_features), axis=0))
    pos_features = scaler.transform(pos_features)
    neg_features = scaler.transform(neg_features)

    ##TODO: Randomize lists of feature vectors. Split 75/20/5 into training,
    # validation, and test sets.
    print(
        "Shuffling samples into training, cross-validation, and test sets.\n")
    random.shuffle(pos_features)
    random.shuffle(neg_features)

    # Use pos_train, pos_val, pos_test and neg_train, neg_val, neg_test to represent
    # the Train, Validation and Test sets of Positive and Negtive sets.
    pos_train, pos_val, pos_test = np.split(
        pos_features,
        [int(.75 * len(pos_features)),
         int(.95 * len(pos_features))])
    neg_train, neg_val, neg_test = np.split(
        neg_features,
        [int(.75 * len(neg_features)),
         int(.95 * len(neg_features))])

    # Store sample data and parameters in dict.
    # Descriptor class object seems to produce errors when unpickling and
    # has been commented out below. The descriptor will be re-instantiated
    # by the Detector object later.
    feature_data = {
        "pos_train": pos_train,
        "neg_train": neg_train,
        "pos_val": pos_val,
        "neg_val": neg_val,
        "pos_test": pos_test,
        "neg_test": neg_test,
        #"descriptor": descriptor,
        "scaler": scaler,
        "hog_features": hog_features,
        "hist_features": hist_features,
        "spatial_features": spatial_features,
        "color_space": color_space,
        "cv_color_const": cv_color_const,
        "channels": channels,
        "hog_lib": hog_lib,
        "size": size,
        "hog_bins": hog_bins,
        "pix_per_cell": pix_per_cell,
        "cells_per_block": cells_per_block,
        "block_stride": block_stride,
        "block_norm": block_norm,
        "transform_sqrt": transform_sqrt,
        "signed_gradient": signed_gradient,
        "hist_bins": hist_bins,
        "spatial_size": spatial_size,
        "num_features": num_features
    }

    # Pickle to file if desired.
    if output_file:
        if output_filename is None:
            output_filename = (datetime.now().strftime("%Y%m%d%H%M") +
                               "_data.pkl")

        pickle.dump(feature_data, open(output_filename, "wb"))
        print(
            "Sample and parameter data saved to {}\n".format(output_filename))

    return feature_data
Esempio n. 10
0
import sys
import grpc
import movie_pb2
import movie_pb2_grpc
import argparse
from descriptor import Descriptor

parser = argparse.ArgumentParser()
parser.add_argument("--model",help="the path of the model to save or load",\
        required=True)
parser.add_argument("--address",
                    help="the ip and port this service want to listen",
                    default="[::]:5011")
parser.add_argument("--topk", help="top k", type=int, default=50)
args = parser.parse_args()
descriptor = Descriptor()
descriptor.load_model(args.model, "max")


class movieServicer(movie_pb2_grpc.FindMovieServiceServicer):
    def FindMovies(self, request, context):
        query = request.query
        print(
            time.strftime('%Y-%m-%d/%H:%M:%S', time.localtime(time.time())) +
            '\t' + query)
        sys.stdout.flush()
        ngram_desc = descriptor.match_desc_max(query)
        titles = descriptor.rank_titles(ngram_desc, args.topk)
        movies = [title for title, _, _ in titles]
        return movie_pb2.FindMovieReply(movies=movies)
Esempio n. 11
0
def helper(feature_model, dimension_reduction, k, label_choice, image_id):
    path, pos = Config().read_path(), None
    descriptor_type = DescriptorType(feature_model).descriptor_type
    symantics_type = LatentSymanticsType(dimension_reduction).symantics_type
    label, value, complementary_value = Labels(label_choice).label

    image = cv2.imread("{}{}{}".format(Config().read_all_path(), image_id,
                                       ".jpg"))
    image_feature_vector = Descriptor(image, feature_model,
                                      dimension_reduction).feature_descriptor

    label_filtered_image_ids = [
        item["image_id"]
        for item in Database().retrieve_metadata_with_labels(label, value)
    ]
    complementary_label_filtered_image_ids = [
        item["image_id"] for item in Database().retrieve_metadata_with_labels(
            label, complementary_value)
    ]

    if DescriptorType(feature_model).check_sift():
        label_feature_vector, label_ids, label_pos = functions.process_files(
            path, feature_model, dimension_reduction, label_filtered_image_ids)
        complementary_label_feature_vector, complementary_label_ids, complementary_label_pos = functions.process_files(
            path,
            feature_model,
            dimension_reduction,
            complementary_label_filtered_image_ids,
        )
        feature_vector = np.concatenate((
            label_feature_vector,
            complementary_label_feature_vector,
            image_feature_vector,
        ))
        pos = label_pos + complementary_label_pos + [
            image_feature_vector.shape[0]
        ]
    else:
        label_feature_vector, label_ids = functions.process_files(
            path, feature_model, dimension_reduction, label_filtered_image_ids)
        complementary_label_feature_vector, complementary_label_ids = functions.process_files(
            path,
            feature_model,
            dimension_reduction,
            complementary_label_filtered_image_ids,
        )
        feature_vector = np.concatenate((
            label_feature_vector,
            complementary_label_feature_vector,
            np.array([image_feature_vector]),
        ))

    ids = label_ids + complementary_label_ids + [image_id]

    _, latent_symantics = LatentSymantics(feature_vector, k,
                                          dimension_reduction).latent_symantics

    records = functions.set_records(ids, descriptor_type, symantics_type, k,
                                    latent_symantics, pos, 5)

    for record in records:
        if record["image_id"] == image_id:
            continue
        elif record["image_id"] in label_ids:
            record[label] = value
        elif record["image_id"] in complementary_label_ids:
            record[label] = complementary_value

    Database().insert_many(records)
Esempio n. 12
0
            isShuttingDown = True


input_thread = threading.Thread(target=get_user_input)
input_thread.start()

while True:
    if isShuttingDown:
        break

    # accepts a new connection into the socket server
    connectionSocket, addr = serverSocket.accept()

    # ask for a nickname
    nickname_message = Message(data="Digite um nickname para você: ")
    connectionSocket.send(nickname_message.encode().encode('utf-8'))

    # wait for nickname
    nickname_payload = connectionSocket.recv(1024).decode('utf-8')
    nickname_message = Message()
    nickname_message.decode(nickname_payload)
    nickname = nickname_message.data

    # initializes the client descriptor for its thread
    client = Descriptor(nickname, addr[0], addr[1], connectionSocket,
                        get_connected_clients, global_sender, send_to_client)

    # starts the client thread
    clients.append(client)
    client.start()
Esempio n. 13
0
    def helper(self,feature_model, dimension_reduction, k):
        unlabelled_path = "C:/Users/himan/OneDrive/Desktop/MWDB/phase3_sample_data/Unlabelled/Set 1/"
        files = os.listdir(unlabelled_path)
        path, pos = Config().read_path(), None
        descriptor_type = DescriptorType(feature_model).descriptor_type
        symantics_type = LatentSymanticsType(dimension_reduction).symantics_type
        label, value, complementary_value = ("dorsal", 1, 0)
        unlabelled_image_feature_vector = []
        unlabelled_image_ids = []


        for i, file in enumerate(files):
            print(file)

            image = cv2.imread("{}{}".format(unlabelled_path, file))
            image_feature_vector = Descriptor(
                image, feature_model, dimension_reduction
            ).feature_descriptor
            unlabelled_image_feature_vector.append(image_feature_vector)
            unlabelled_image_ids.append(file)




        label_filtered_image_ids = [
            item["image_id"]
            for item in Database().retrieve_metadata_with_labels(label, value)
        ]
        complementary_label_filtered_image_ids = [
            item["image_id"]
            for item in Database().retrieve_metadata_with_labels(label, complementary_value)
        ]

        if DescriptorType(feature_model).check_sift():
            label_feature_vector, label_ids, label_pos = functions_phase2.process_files(
                path, feature_model, dimension_reduction, label_filtered_image_ids
            )
            complementary_label_feature_vector, complementary_label_ids, complementary_label_pos = functions_phase2.process_files(
                path,
                feature_model,
                dimension_reduction,
                complementary_label_filtered_image_ids,
            )
            feature_vector = np.concatenate(
                (
                    label_feature_vector,
                    complementary_label_feature_vector,
                    unlabelled_image_feature_vector,
                )
            )
            # pos = label_pos + complementary_label_pos + [image_feature_vector.shape[0]]
        else:
            label_feature_vector, label_ids = functions_phase2.process_files(
                path, feature_model, dimension_reduction, label_filtered_image_ids
            )
            complementary_label_feature_vector, complementary_label_ids = functions_phase2.process_files(
                path,
                feature_model,
                dimension_reduction,
                complementary_label_filtered_image_ids,
            )

            feature_vector = np.concatenate(
                (
                    label_feature_vector,
                    complementary_label_feature_vector,
                    unlabelled_image_feature_vector

                )
            )

        ids = label_ids + complementary_label_ids + unlabelled_image_ids

        _, latent_symantics = LatentSymantics(
            feature_vector, k, dimension_reduction
        ).latent_symantics

        # for i, ids in unlabelled_image_ids:
        #     _, latent_symantics = LatentSymantics(
        #         unlabelled_image_feature_vector[i], k, dimension_reduction
        #     ).latent_symantics

        records = functions_phase2.set_records(
            ids, descriptor_type, symantics_type, k, latent_symantics, pos, 5
        )

        for record in records:

            if record["image_id"] in label_ids:
                record[label] = value
            elif record["image_id"] in complementary_label_ids:
                record[label] = complementary_value
            else:
                continue

        Database().insert_many(records)
Esempio n. 14
0
def clustering(path, c):
    mongo_url = "mongodb://localhost:27017/"
    database_name = "mwdb_phase3"
    lbld_collection_name = "labelled_hands"
    unlbld_collection_name = "unlabelled_hands"
    meta_collection_name = "metadata"
    lbld_csv = "C:/Users/priya/Documents/images/Phase 3/phase3_sample_data/labelled_set1.csv"
    unlabelled_csv = "C:/Users/priya/Documents/images/Phase 3/phase3_sample_data/Unlabelled/unlablled_set1.csv"
    try:
        connection = MongoClient(mongo_url)
        database = connection[database_name]
        lbld_collection = database[lbld_collection_name]
        unlbld_collection = database[unlbld_collection_name]
        meta_collection = database[meta_collection_name]
        # storing labelled images
        df = pd.read_csv(lbld_csv)
        lbld_records = df.to_dict(orient='records')
        lbld_collection.remove()
        lbld_collection.insert_many(lbld_records)

        # storing unlabelled images
        df = pd.read_csv(unlabelled_csv)
        unlbld_records = df.to_dict(orient='records')
        unlbld_collection.remove()
        unlbld_collection.insert_many(unlbld_records)

        ids1, ids2, feature_vector1, feature_vector2, feature_vector3 = [], [], [], [], []
        colors = ['red', 'blue', 'green', 'cyan', 'magenta']
        markers = ['o', '<', 's', '+', 'v', '^', '.', '>', ',', 'd']
        clust_labels = []
        cent_labels = []
        cluster = "Cluster "
        cent = "Centroid "
        for i in range(c):
            clust_labels.append(cluster.join(str(i)))
            cent_labels.append(cent.join(str(i)))
        # extracting features
        # dorsal
        for subject in lbld_collection.find({"aspectOfHand": {"$regex": "dorsal"}}, {"imageName": 1}):
            image_id = subject['imageName']
            img_path = path + image_id
            image = cv2.imread(img_path)
            ids1.append(image_id.replace(".jpg", ""))
            feature_descriptor = Descriptor(image, 1).feature_descriptor
            # normalize features
            features_norm = (feature_descriptor - feature_descriptor.min()) / (
                    feature_descriptor.max() - feature_descriptor.min())
            feature_vector1.append(features_norm)

        _, d_latent_semantics = LatentSymantics(
            np.array(feature_vector1), 2, 1
        ).latent_symantics
        # K means
        centroids, prev_centroids, classes, X, centroid_norm, d_img_classes = [], [], [], [], [], []
        max_iterations = 1
        isOptimal = False
        for i in range(c):
            centroids.append(d_latent_semantics[i])
            prev_centroids.append(d_latent_semantics[i])
        while not isOptimal and max_iterations < 501:
            d_distances = []
            classes = []
            d_img_classes = []
            for i in range(c):
                classes.append([])
                d_img_classes.append([])
            # Calculating clusters for each feature
            for i in range(d_latent_semantics.shape[0]):
                features = d_latent_semantics[i]
                d_distances = [euclidean(features, centroid) for centroid in centroids]
                classification = d_distances.index(min(d_distances))
                classes[classification].append(features)
                d_img_classes[classification].append(ids1[i])
            # Recalculating centroids
            for i in range(len(classes)):
                centroids[i] = np.mean(classes[i], axis=0)
            isOptimal = True
            for i in range(len(centroids)):
                if sum((centroids[i] - prev_centroids[i]) / prev_centroids[i] * 100.0) > tolerance:
                    isOptimal = False
                    break
                prev_centroids[i] = centroids[i]
            max_iterations += 1
        # # Visualize clusters -- takes longer time to show so commented
        # for i in range(c):
        #     plt.scatter(centroids[i][0], centroids[i][1], s=300, c="black", marker="x", label=cent_labels[i])
        #     for features in classes[i]:
        #         plt.scatter(features[0], features[1], color=colors[i], s=30, marker=markers[i], label=clust_labels[i])
        # plt.show()
        print "Dorsal CLusters: "
        for i in range(len(d_img_classes)):
            print ("Cluster %d: " % i)
            print d_img_classes[i]
        # ---------------------------------------------------------------------------------------------------------------------
        # extracting features
        # palmar
        for subject in lbld_collection.find({"aspectOfHand": {"$regex": "palmar"}}, {"imageName": 1}):
            image_id = subject['imageName']
            img_path = path + image_id
            image = cv2.imread(img_path)
            ids2.append(image_id.replace(".jpg", ""));
            # normalize features
            feature_descriptor = Descriptor(image, 1).feature_descriptor
            features_norm = (feature_descriptor - feature_descriptor.min()) / (
                    feature_descriptor.max() - feature_descriptor.min())
            feature_vector2.append(features_norm)
        _, p_latent_semantics = LatentSymantics(
            np.array(feature_vector2), 2, 1
        ).latent_symantics
        # K means
        p_centroids, p_prev_centroids, p_classes, p_X, p_centroid_norm, p_img_classes = [], [], [], [], [], []
        p_max_iterations = 1
        p_isOptimal = False
        for i in range(c):
            p_centroids.append(p_latent_semantics[i])
            p_prev_centroids.append(p_latent_semantics[i])
            p_classes.append([])
            p_img_classes.append([])
        while not p_isOptimal and p_max_iterations < 501:
            p_distances = []
            p_classes = []
            p_img_classes = []
            for i in range(c):
                p_classes.append([])
                p_img_classes.append([])
            # Calculating clusters for each feature
            for i in range(p_latent_semantics.shape[0]):
                features = p_latent_semantics[i]
                p_distances = [euclidean(features, centroid) for centroid in p_centroids]
                classification = p_distances.index(min(p_distances))
                p_classes[classification].append(features)
                p_img_classes[classification].append(ids2[i])
            # Recalculating centroids
            for i in range(len(p_classes)):
                p_centroids[i] = np.mean(p_classes[i], axis=0)
            p_isOptimal = True
            for i in range(len(p_centroids)):
                if sum((p_centroids[i] - p_prev_centroids[i]) / p_prev_centroids[i] * 100.0) > tolerance:
                    p_isOptimal = False
                    break
                p_prev_centroids[i] = p_centroids[i]
            p_max_iterations += 1

        # # Visualize clusters -- takes longer time to show so commented
        # for i in range(c):
        #     plt.scatter(p_centroids[i][0], p_centroids[i][1], s=130, marker="x")
        #     for features in p_classes[i]:
        #         plt.scatter(features[0], features[1], color=colors[i], s=30, marker=markers[i])
        # plt.show()
        print "Palmar CLusters: "
        for i in range(len(p_img_classes)):
            print ("Cluster %d" % i)
            print p_img_classes[i]
        # ----------------------------------------------------------------------------------------------------------------------
        # Classification
        # mean_dorsal = np.mean(centroids, axis=0)
        # mean_palmar = np.mean(p_centroids, axis=0)
        image_name = []
        dorsal_cnt = 0
        palmar_cnt = 0
        d_cnt = 0
        p_cnt = 0
        for image_path in glob.glob(test_path):
            image = cv2.imread(image_path)
            # get filename
            image_name.append(os.path.basename(image_path))
            feature_descriptor = Descriptor(image, 1).feature_descriptor
            # normalize features
            features_norm = (feature_descriptor - feature_descriptor.min()) / (
                    feature_descriptor.max() - feature_descriptor.min())
            feature_vector3.append(features_norm)
        _, latent_semantics = LatentSymantics(np.array(feature_vector3), 2, 1).latent_symantics
        for i in range(len(latent_semantics)):
            ddistances = [euclidean(latent_semantics[i], centroid) for centroid in centroids]
            pdistances = [euclidean(latent_semantics[i], centroid) for centroid in p_centroids]

            subject_img = unlbld_collection.find_one({"imageName": image_name[i]}, {"aspectOfHand": 1})
            if "dorsal" in subject_img['aspectOfHand']:
                d_cnt += 1
            else:
                p_cnt += 1
            if min(ddistances) < min(pdistances):
                if "dorsal" in subject_img['aspectOfHand']:
                    dorsal_cnt += 1
                print ("Image ID: %s, %s" % (image_name[i], "dorsal"))
            else:
                if "palmar" in subject_img['aspectOfHand']:
                    palmar_cnt += 1
                print ("Image ID: %s, %s" % (image_name[i], "palmar"))
        print ("Dorsal Accuracy %d" % ((dorsal_cnt*100)/d_cnt))
        print ("Palmar Accuracy %d" % ((palmar_cnt*100)/p_cnt))

    except Exception as e:
        traceback.print_exc()
        print("Connection refused... ")
Esempio n. 15
0
def insert_images_in_database(feature_model,
                              dimension_reduction,
                              k,
                              identifier,
                              set1_dir=True,
                              set2_dir=True):
    """
    :param feature_model: 1 - CM, 2 - LBP, 3 - HOG, 4 - SIFT
    :param dimension_reduction: 1 - PCA, 2 - SVD, 3 - NMF, 4 - LDA
    :param k: reduced dimension value
    :param identifier: 0 - Read all, 1 - Read from Labelled, 2 - Read from Unlabelled
    :param set1_dir (Optional): True - Read from Set1 folder of Labelled/Unlabelled, False otherwise
    :param set2_dir (Optional): True - Read from Set2 folder of Labelled/Unlabelled, False otherwise
    :return None

    Default case: Read from both Set1 and Set2 folders
    """

    # Read images and feature extraction
    if identifier == 0:
        read_all_path = Config().read_all_path()
        files = os.listdir(read_all_path)
        connection = Database().open_connection()
        db = connection[Config().database_name()]
        collection = db[Config().collection_name()]

        for i, file in enumerate(files):
            print("Reading file: {} | {} % Done".format(
                file, ((i + 1) * 100.0) / len(files)))
            image = cv2.imread("{}{}".format(read_all_path, file))

            feature_descriptor = Descriptor(
                image, feature_model, dimension_reduction).feature_descriptor
            image_id = file.replace(".jpg", "")
            collection.insert_one({
                "image_id": image_id,
                "vector": feature_descriptor.tolist()
            })

        connection.close()
        query_results = Database().retrieve_many()
        ids = [item["image_id"] for item in query_results]
        x = np.array([item["vector"] for item in query_results])

    elif identifier == 1:
        if set1_dir and set2_dir:
            ids1, x1 = functions.process_files(
                Config().read_training_set1_path(), feature_model,
                dimension_reduction)
            ids2, x2 = functions.process_files(
                Config().read_training_set2_path(), feature_model,
                dimension_reduction)
            ids = ids1 + ids2
            x = np.concatenate((x1, x2))
        elif set1_dir:
            ids, x = functions.process_files(
                Config().read_training_set1_path(), feature_model,
                dimension_reduction)
        elif set2_dir:
            ids, x = functions.process_files(
                Config().read_training_set2_path(), feature_model,
                dimension_reduction)
    else:
        if set1_dir and set2_dir:
            ids1, x1 = functions.process_files(
                Config().read_testing_set1_path(), feature_model,
                dimension_reduction)
            ids2, x2 = functions.process_files(
                Config().read_testing_set2_path(), feature_model,
                dimension_reduction)
            ids = ids1 + ids2
            x = np.concatenate((x1, x2))
        elif set1_dir:
            ids, x = functions.process_files(Config().read_testing_set1_path(),
                                             feature_model,
                                             dimension_reduction)
        elif set2_dir:
            ids, x = functions.process_files(Config().read_testing_set2_path(),
                                             feature_model,
                                             dimension_reduction)

    # Find Latent_symantics
    _, latent_symantics = LatentSymantics(x, k,
                                          dimension_reduction).latent_symantics

    # inserting data into Database
    if identifier == 0:
        records = functions.set_records(ids, latent_symantics)
        Database().insert_many(records)
    elif identifier == 1:
        records = functions.set_records(ids, latent_symantics, training=True)
        Database().insert_many(records, collection_type="training")
    else:
        records = functions.set_records(ids, latent_symantics)
        Database().insert_many(records, collection_type="testing")
    print("Done... ")