def create_forest(): """ Create new random decision forest """ print('Creating random forest') images = read_images() # assign classes image_classes = [icls for icls, _ in images] # extract features feature_vectors = [] for _, img_path in images: cur = get_feature_vector(img_path) feature_vectors.append(cur) assert len(feature_vectors) > 0, 'Must have at least one feature vector to train on' # good estimator number for classification tasks sqrt_feat_num = int(np.sqrt(len(feature_vectors[0]))) print(' ', sqrt_feat_num, 'estimator%s' % ('' if sqrt_feat_num == 1 else 's')) # create forest clf = RandomForestClassifier( n_estimators=sqrt_feat_num, n_jobs=-1, max_depth=None, min_samples_split=1 ) clf = clf.fit(feature_vectors, image_classes) return clf
def process_batch_custom(datum, max_people): features = datum['features'] labels = datum['labels'] batch_x = [] batch_y = [] batch_pad = [] for member in range(labels.shape[0]): pair_feat = [] pair_membership = [] pair_pad = [] for agent in range(labels.shape[0]): pair_feat.append( get_feature_vector(features[member], features[agent])) pair_membership.append(labels[member] == labels[agent]) if member == agent: pair_pad.append(-2) else: pair_pad.append(0) n_remaining = max_people - len(pair_membership) for dummy in range(n_remaining): pair_feat.append(pair_feat[0]) pair_membership.append(pair_membership[0]) pair_pad.append(-2) batch_x.append(np.array(pair_feat).astype(np.float32)) batch_y.append( np.expand_dims(np.array(pair_membership).astype(np.float32), axis=1)) batch_pad.append( np.expand_dims(np.array(pair_pad).astype(np.float32), axis=1)) return batch_x, batch_y, batch_pad
def compute_score(root_dir, clf): """ Test model with all images in given directory """ fvecs = [] true_classes = [] for fn in os.listdir(root_dir): assert len(fn.split('_')) > 1, 'Invalid filename' fname = os.path.join(root_dir, fn) fvecs.append(get_feature_vector(fname)) true_classes.append(fn.split('_')[0]) print('Accuracy:', clf.score(fvecs, true_classes))
def classify_image(image_path, clf): """ Classify given image """ fvecs = [get_feature_vector(image_path)] print(clf.predict(fvecs), clf.predict_proba(fvecs))