def get_prediction(image, trained_model, probability_limit):
    """

    Get the predicted probabilities for each class

    :param image: image to evaluate
    :param model: trained model
    :return: image name, probability for each class and overall classification
    """
    test_feature_vector = features.get_feature_vector(image)

    predicted_probabilities = trained_model.predict_proba(test_feature_vector)
    classification = get_classification(predicted_probabilities, probability_limit)

    return image, predicted_probabilities[0][0], predicted_probabilities[0][1], predicted_probabilities[0][2], classification
Exemple #2
0
def get_prediction(image, trained_model, probability_limit):
    """

    Get the predicted probabilities for each class

    :param image: image to evaluate
    :param model: trained model
    :return: image name, probability for each class and overall classification
    """
    test_feature_vector = features.get_feature_vector(image)

    predicted_probabilities = trained_model.predict_proba(test_feature_vector)
    classification = get_classification(predicted_probabilities,
                                        probability_limit)

    return image, predicted_probabilities[0][0], predicted_probabilities[0][
        1], predicted_probabilities[0][2], classification
def get_training_data(training_data_directory):
    """
    Takes labelled folders of images, featurizes them and returns training data that can be used to train models

    :param training_data_directory: directory where the training data is stored
    :return: X and y
    """

    X = []
    y = []

    for directory in os.listdir(training_data_directory):
        if os.path.isdir(training_data_directory + directory + '/'):
            for file in os.listdir(training_data_directory + directory + '/'):
                if not file.startswith('.'):

                    featurevector = features.get_feature_vector(training_data_directory + directory + '/' + file)

                    X.append(featurevector)
                    y.append(directory)

    return X, y
Exemple #4
0
def get_training_data(training_data_directory):
    """
    Takes labelled folders of images, featurizes them and returns training data that can be used to train models

    :param training_data_directory: directory where the training data is stored
    :return: X and y
    """

    X = []
    y = []

    for directory in os.listdir(training_data_directory):
        if os.path.isdir(training_data_directory + directory + '/'):
            for file in os.listdir(training_data_directory + directory + '/'):
                if not file.startswith('.'):

                    featurevector = features.get_feature_vector(
                        training_data_directory + directory + '/' + file)

                    X.append(featurevector)
                    y.append(directory)

    return X, y
def extract_feature_vector(image):
    """Extract feature vector from the given image.
    """
    ctrans_image = utils.convert_color_space(image, color_space=s.color_space)
    return f.get_feature_vector(ctrans_image)
Exemple #6
0
def worker(nproc):
    def _print(*args, **kwargs):
        # Avoid printing the same stuff multiple times
        if nproc == 0:
            print(*args, **kwargs)

    def _regular_iterator(ls):
        for l in ls:
            yield l

    iterator = tqdm if nproc == 0 else _regular_iterator

    graph = nx.MultiDiGraph() if DIRECTIONAL_GRAPH else nx.MultiGraph()
    possible_targets = {}
    positive_train_triples = []

    train_lines = count_file_lines(PATH_TRAIN)
    test_lines = count_file_lines(PATH_TEST)

    # Start and end ranges for the triples that this thread will process
    start_range_train = int(nproc * train_lines / N_THREADS)
    end_range_train = int((nproc + 1) * train_lines / N_THREADS)

    start_range_test = int(nproc * test_lines / N_THREADS)
    end_range_test = int((nproc + 1) * test_lines / N_THREADS)

    rels_to_study = None
    rels_study_path = f"datasets/{DATASET}/relations_to_study.txt"
    if isfile(rels_study_path):
        rels_to_study = []
        with open(rels_study_path, "r") as f:
            for line in f:
                if line:
                    rels_to_study.append(line.strip().split("\t")[0])

    # Load the data from the training split
    _print("Loading training data")
    with open(PATH_TRAIN, "r") as f:
        for i, line in enumerate(f):
            spl = line.strip().split("\t")

            # Skip negative examples in the training split, since we generate our own negatives
            if len(spl) >= 4 and spl[3] != "1": continue

            s, r, t = spl[:3]
            if r not in possible_targets:
                possible_targets[r] = []
            possible_targets[r].append(t)

            graph.add_edge(s, t, rel=r, key=r)
            if start_range_train <= i < end_range_train and (
                    rels_to_study is None or r in rels_to_study):
                positive_train_triples.append((s, r, t))

    _print("Removing duplicate targets")
    # Remove duplicates from the possible targets dict
    for r, ls in possible_targets.items():
        possible_targets[r] = list(set(ls))

    with open(PATH_RELS, "r") as f:
        relations = [x.strip().split("\t")[0] for x in f.readlines()]

    # Generate the negatives by replacing the target entity with a random one
    # from the same range
    _print("Generating negatives")
    negative_train_triples = generate_negatives(positive_train_triples,
                                                possible_targets)
    labelled_triples_train = [
        ((s, r, t, 1), None) for s, r, t in positive_train_triples
    ] + negative_train_triples

    _print("Computing features for the training split")
    training_csv = open(f"output/{DATASET}/train.csv.{nproc}", "a")

    centrality_indices = degree_centrality(graph)

    if not rels_to_study:
        rels_to_study = relations

    t1 = time.thread_time()
    for (s, r, t, label), orig in iterator(labelled_triples_train):
        fvec = get_feature_vector(graph, (s, r, t),
                                  relations,
                                  bool(label),
                                  orig,
                                  centrality_indices=centrality_indices,
                                  rels_to_study=rels_to_study)
        training_csv.write(
            f"{s},{r},{t};{label};{';'.join(str(x) for x in fvec)}\n")

    t2 = time.thread_time()
    training_csv.close()

    _print("Loading testing data")
    labelled_triples_test = []
    with open(PATH_TEST, "r") as f:
        for i, line in enumerate(f):
            if start_range_test <= i < end_range_test:
                spl = line.strip().split("\t")
                s, r, t, lbl = spl[:4]
                if rels_to_study is None or r in rels_to_study:
                    labelled_triples_test.append(
                        (s, r, t, 1 if lbl == "1" else 0))

    _print("Computing features for the testing split")
    testing_csv = open(f"output/{DATASET}/test.csv.{nproc}", "a")

    t3 = time.thread_time()
    for s, r, t, label in iterator(labelled_triples_test):
        try:
            fvec = get_feature_vector(graph, (s, r, t),
                                      relations,
                                      centrality_indices=centrality_indices,
                                      rels_to_study=rels_to_study)
        except NodeNotFound:
            # Since the testing data does not appear in the training split,
            # an entity present in the testing split may not appear in the
            # graph generated by the training split.
            continue
        testing_csv.write(
            f"{s},{r},{t};{label};{';'.join(str(x) for x in fvec)}\n")

    t4 = time.thread_time()
    testing_csv.close()

    elapsed_seconds = (t2 - t1) + (t4 - t3)

    with open("compute_times.txt", "a") as f:
        f.write(
            f"{DATASET};c{MAX_CONTEXT_SIZE};thread{nproc};{elapsed_seconds}\n")
Exemple #7
0
    def search_for_matches(self, image, region_of_interest=None, scale=1.0, visualize=False):
        """Apply sliding window search on the given image.
        
        :param image: the region which search is imposed on.
         
        :param region_of_interest: region in which the search is limited in. If unspecified (None), defaults to the  
            full region of the image. Specified in the format:
            `((top-left-x, top-left-y), (bottom-right-x, bottom-right-y))`
        
        :param scale: Searching window scales.

        :param visualize: If True, returns a visualizing image.
        """
        if visualize:
            # note: format for visualize_img is BGR
            visualize_img = np.copy(image)

        if region_of_interest is None:
            region_of_interest = ((0, 0), (image.shape[1], image.shape[0]))

        x_start, x_stop = region_of_interest[0][0], region_of_interest[1][0]
        y_start, y_stop = region_of_interest[0][1], region_of_interest[1][1]

        search_region = image[y_start:y_stop, x_start:x_stop, :]
        search_region = utils.convert_color_space(search_region, s.color_space)
        # print("Shape of search region: ", search_region.shape)

        # scaling the input if necessary
        if scale != 1:
            search_region = cv2.resize(search_region,
                                       (int(search_region.shape[1] / scale), int(search_region.shape[0] / scale)))
            # print("Scaled shape of search region: ", search_region.shape)

        # cars looked smaller and closer to the horizon. so I can limit the searching area
        # for smaller scale (which is used for searching for "small" car) to the upper part
        # of the search region
        crop = min((0.5 * scale, 1))
        search_region = search_region[:int(crop * search_region.shape[0]), :]

        # size (number of pixels) of window
        size_window = 64

        # parameters:
        pixels_per_cell = s.hog_pixels_per_cell
        cells_per_block = s.hog_cells_per_block
        channel = s.hog_channel
        orientations = s.hog_orientations

        # number of blocks per sliding window
        blocks_per_window = (size_window // pixels_per_cell) - cells_per_block + 1
        # cell increments for sliding
        inc_cells = s.sliding_window_cells_increment

        # number of (complete) blocks horizontally (along x) / vertically (along y)
        num_blocks_x = (search_region.shape[1] // pixels_per_cell) - cells_per_block + 1
        num_blocks_y = (search_region.shape[0] // pixels_per_cell) - cells_per_block + 1
        # number of windows horizontally (along x) / vertically (along y)
        stepx = (num_blocks_x - blocks_per_window) // inc_cells + 1
        stepy = (num_blocks_y - blocks_per_window) // inc_cells + 1

        # get HOG features for the whole search region
        hog_features = f.get_hog(search_region,
                                 pixels_per_cell=pixels_per_cell,
                                 cells_per_block=cells_per_block,
                                 orientations=orientations,
                                 channel=channel)

        # result window rects
        rects = []

        for x in range(stepx):
            for y in range(stepy):
                xpos = x * inc_cells
                ypos = y * inc_cells
                x_tl = xpos * pixels_per_cell
                y_tl = ypos * pixels_per_cell
                win_img = search_region[y_tl:y_tl + size_window, x_tl:x_tl + size_window]
                win_hog = hog_features[:, ypos:ypos + blocks_per_window, xpos:xpos + blocks_per_window].ravel()
                features = f.get_feature_vector(win_img, subsampled_hog_features=win_hog)

                scaled_features = self.scaler.transform(features.reshape(1, -1))
                prediction = self.classifier.predict(scaled_features)
                if prediction == 1:
                    x_topleft = scale * x_tl
                    y_topleft = scale * y_tl
                    window_size = scale * size_window
                    box = ((int(x_topleft + x_start), int(y_topleft + y_start)),
                           (int(x_topleft + x_start + window_size), int(y_topleft + y_start + window_size)))
                    rects.append(box)
                    if visualize:
                        cv2.rectangle(visualize_img, box[0], box[1], (255, 0, 0), 3)

        if visualize:
            return rects, visualize_img
        else:
            return rects
Exemple #8
0
def main():
    comm = Communicate(IP_ADDR)
    print('Handshake started')
    comm.get_handshake()
    print('Handshake completed')
    classifier = Classifier(FILE_PATH)
    print(classifier)
    freqPredict = FreqPredictor()
    if comm.has_handshake():
        print("starting a new iteration: ")
    input("Press any key to continue")
    state_queue = deque()

    while True:
        if comm.has_handshake():
            # Get data from IMU
            # raw_data = comm.getData(duration=1)
            # raw_data = comm.getData2(window = 60)
            raw_data = comm.getData2(window=45)
            if raw_data == None:
                print("Comms Error: None Type")
                break

            # Process data
            feature_vector = get_feature_vector(raw_data)
            predict = classifier.predict_once(feature_vector)
            predict = predict.lower()

            freqPredict.store_moves(predict)
            state_queue.append(predict)

            if (len(state_queue) == 2):
                if (predict == state_queue[0]):
                    final_predict = state_queue.popleft()
                    print('Final Prediction (Queue):', final_predict)
                    send_prediction(final_predict, comm)
                    state_queue.clear()
                    freqPredict.clear_hist()
                    continue
                else:
                    if (freqPredict.get_hist_count() == 5):
                        final_predict = freqPredict.get_predict()
                        print('Final Prediction (Hist):', final_predict)
                        send_prediction(final_predict, comm)
                        state_queue.clear()
                        freqPredict.clear_hist()
                        continue
                    else:
                        state_queue.clear()
                        state_queue.append(predict)
                        continue
            else:
                if (freqPredict.get_hist_count() == 5):
                    final_predict = freqPredict.get_predict()
                    print('Final Prediction (Hist):', final_predict)
                    send_prediction(final_predict, comm)
                    state_queue.clear()
                    freqPredict.clear_hist()
                    continue
                else:
                    if (predict == state_queue[0]):
                        continue
                    else:
                        state_queue.clear()
                        state_queue.append(predict)
                        continue
        else:
            print('Handshake broken')
Exemple #9
0
def load_dataset(files_directory, pickle_directory):
    files = glob.glob(files_directory + "*")
    x = []
    y = []
    rule_based_wrong_count = 0
    rule_based_correct_count = 0
    fnum = 1
    try:
        raise Exception('Reload x and y')
        x = pkl.load(open(pickle_directory + 'x_all.pkl', 'rb'))
        y = pkl.load(open(pickle_directory + 'y_all.pkl', 'rb'))
    except:
        for fname in files:
            print 'processing file number', fnum, 'of', len(files), 'files'
            fnum += 1
            mentionDictionary = load_dict(fname)
            classes = load_dict(pickle_directory +
                                fname.split('/')[-1].split('.')[0] +
                                '_classes.p')
            keys = sorted(mentionDictionary.keys())
            len_keys = len(keys)
            for i in range(len_keys):
                print i, 'of', len_keys
                for j in range(i + 1, min(len_keys, i + 20)):
                    mention1 = mentionDictionary[keys[i]]
                    mention2 = mentionDictionary[keys[j]]
                    x.append(
                        features.get_feature_vector(mention1, mention2,
                                                    classes))
                    if mention1['ID'] == mention2['ID']:
                        y.append(1)
                    else:
                        y.append(0)
                    # if We_Should_Consider(mention1,mention2):
                    #     x.append(features.get_feature_vector(mention1,mention2,classes))
                    #     if mention1['ID'] == mention2['ID']:
                    #         y.append(1)
                    #     else:
                    #         y.append(0)
                    # else:
                    #     if mention1['ID'] == mention2['ID']:
                    #         rule_based_wrong_count += 1
                    #     else:
                    #         rule_based_correct_count += 1
        pkl.dump(x, open(pickle_directory + 'x_all.pkl', 'wb'))
        pkl.dump(y, open(pickle_directory + 'y_all.pkl', 'wb'))
    # indices = {}
    # print 'Set of Values in y before sampling', set(y)
    # for t in set(y):
    #     indices[t] = [i for i in range(len(y)) if y[i] == t]
    # min_len = min([len(indices[t]) for t in indices])
    # for t in indices:
    #     indices[t] = random.sample(indices[t], min_len/3)
    # print 'Zero Valued : ', len(indices[0]), [y[i] for i in indices[0][:10]]
    # print 'One Valued  : ', len(indices[1]), [y[i] for i in indices[1][:10]]
    # indices = indices[0]+indices[1]
    # print 'indices finally', indices[:10]
    # # for i in indices:
    # #     if y[i] == 1:

    # x_train = []
    # y_train = []
    # x_test = []
    # y_test = []
    # for i in range(len(y)):
    #     if i in indices:
    #         x_train.append(x[i])
    #         y_train.append(y[i])
    #     else:
    #         x_test.append(x[i])
    #         y_test.append(y[i])

    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=.4,
                                                        random_state=42)

    print len(x_train), len(y_train), len(x_test), len(y_test)
    return x_train, y_train, x_test, y_test, rule_based_wrong_count, rule_based_correct_count