def calibrate(gmm, pos_bboxes, neg_bboxes, title='', show_plot=False): pos_features = iu.get_gmm_features(pos_bboxes, in_format='xywh') pos_scores = iu.gmm_pdf(pos_features, gmm.weights_, gmm.means_, gmm.covariances_) n_pos = len(pos_features) pos_labels = np.ones(n_pos) #* ((n_pos + 1.)/(n_pos + 2.)) neg_features = iu.get_gmm_features(neg_bboxes, in_format='xywh') neg_scores = iu.gmm_pdf(neg_features, gmm.weights_, gmm.means_, gmm.covariances_) n_neg = len(neg_features) neg_labels = np.zeros(n_neg) #* (1. / (n_neg + 2.)) all_features = np.concatenate((pos_features, neg_features)) all_labels = np.concatenate((pos_labels, neg_labels)) all_scores = np.concatenate((pos_scores, neg_scores)) all_log_scores = np.log(all_scores + np.finfo(np.float).eps) shuff_scores, shuff_labels = shuffle(all_scores, all_labels) platt_cal = lm.LogisticRegression(penalty='l2') platt_cal.fit(shuff_scores.reshape(-1, 1), shuff_labels) platt_params = (platt_cal.coef_[0][0], platt_cal.intercept_[0]) if show_plot: # TODO: output plats with fname import matplotlib.pyplot as plt plt.scatter(all_scores, all_labels) x_vals = np.linspace(0.0, np.max(all_scores), num=100) y_vals = 1. / (1. + np.exp(-(platt_params[0] * x_vals + platt_params[1]))) plt.plot(x_vals, y_vals) plt.title(title) plt.show() return platt_params
def train_gmm(pos_boxes, neg_boxes=None, n_components=3): import numpy as np import irsg_utils as iutl import sklearn.mixture as skl import sklearn.linear_model as lm pos_features = iutl.get_gmm_features(pos_boxes, in_format='xywh') n_pos = len(pos_features) pos_labels = np.ones(n_pos) #* ((n_pos + 1.)/(n_pos + 2.)) neg_features = iutl.get_gmm_features(neg_boxes, in_format='xywh') n_neg = len(neg_features) neg_labels = np.zeros(n_neg) #* (1. / (n_neg + 2.)) all_features = np.concatenate((pos_features, neg_features)) all_labels = np.concatenate((pos_labels, neg_labels)) gmm = skl.GaussianMixture(n_components, 'full', verbose='true', max_iter=500, n_init=50, tol=1e-6, init_params='random') gmm.fit(pos_features) # test X and Y fit for GMM #gmm_ = skl.GaussianMixture(n_components, 'full', verbose='true', n_init=25, tol=1e-6) #gmm_.fit(all_features, all_labels) # use GMM scoring #pos_scores = gmm.score_samples(pos_features) #neg_scores = gmm.score_samples(neg_features) pos_scores = iutl.gmm_pdf(pos_features, gmm.weights_, gmm.means_, gmm.covariances_) neg_scores = iutl.gmm_pdf(neg_features, gmm.weights_, gmm.means_, gmm.covariances_) all_scores = np.concatenate((pos_scores, neg_scores)) all_log_scores = all_scores #np.log(all_scores + np.finfo(np.float).eps) from sklearn.utils import shuffle shuff_scores, shuff_labels = shuffle(all_log_scores, all_labels) platt_cal = lm.LogisticRegression(penalty='l1') platt_cal.fit(shuff_scores.reshape(-1, 1), shuff_labels) platt_params = (platt_cal.coef_[0][0], platt_cal.intercept_[0]) return gmm, platt_params
def __init__(self, sub_bboxes, obj_bboxes, gmm): box_pairs = np.array([x for x in it.product(sub_bboxes, obj_bboxes)]) box_vec = iu.get_gmm_features(box_pairs, in_format='xywh') density = iu.gmm_pdf(box_vec, gmm.gmm_weights, gmm.gmm_mu, gmm.gmm_sigma) density = np.reshape(density, (len(sub_bboxes), len(obj_bboxes))) self.prob = 1. / (1. + np.exp(-(gmm.platt_a * density + gmm.platt_b)))
def gen_neg_bboxes(gmm, pos_bboxes, n_samples=1000000, neg_size=100): # take many samples from the gmm and use the lowest N as low probability configs samples = gmm.sample(n_samples)[0] scores = iu.gmm_pdf(samples, gmm.weights_, gmm.means_, gmm.covariances_) sorted_ixs = np.argsort(scores) neg_samples = samples[sorted_ixs[:neg_size]] # generate gmms for sampling subject bboxes subject_bboxes = pos_bboxes[:, 0] print(' generating subject bbox GMM...', end='') sys.stdout.flush() subject_bbox_gmm = skl.GaussianMixture(3, 'full', verbose=0, max_iter=500, n_init=50, tol=1e-6, init_params='random') subject_bbox_gmm.fit(subject_bboxes) print('done!') # generate a subject bbox for each neg sample subject_samples = subject_bbox_gmm.sample(neg_size)[0] obj_w = neg_samples[:, 2] * subject_samples[:, 2] obj_x = subject_samples[:, 0] + 0.5 * subject_samples[:, 2] - subject_samples[:, 2] * neg_samples[:, 0] - 0.5 * obj_w obj_h = neg_samples[:, 3] * subject_samples[:, 3] obj_y = subject_samples[:, 1] + 0.5 * subject_samples[:, 3] - subject_samples[:, 3] * neg_samples[:, 1] - 0.5 * obj_h object_samples = np.vstack((obj_x, obj_y, obj_w, obj_h)).T # clamp subject and object samples to [0, +inf] and convert to int subject_samples = np.array(subject_samples, dtype=np.int) subject_samples[:, 0] = np.clip(subject_samples[:, 0], 0, None) subject_samples[:, 1] = np.clip(subject_samples[:, 1], 0, None) subject_samples[:, 2] = np.clip(subject_samples[:, 2], 1, None) subject_samples[:, 3] = np.clip(subject_samples[:, 3], 1, None) object_samples = np.array(object_samples, dtype=np.int) object_samples[:, 0] = np.clip(object_samples[:, 0], 0, None) object_samples[:, 1] = np.clip(object_samples[:, 1], 0, None) object_samples[:, 2] = np.clip(object_samples[:, 2], 1, None) object_samples[:, 3] = np.clip(object_samples[:, 3], 1, None) neg_bboxes = np.hstack((subject_samples, object_samples)) neg_bboxes = np.reshape(neg_bboxes, (neg_size, 2, 4)) return neg_bboxes
def get_relationship_csv(gmm_params, subject_bbox, subject_score, object_bbox, object_score): """ generate csv output for the relations defined by the bboxes and gmm Args: gmm_params: a RelationshipParameters object subject_bbox: numpy array of subject xywh subject_score: softmax calibrated confidence score p(class|box) object_bbox: numpy array of object xywh object_score: softmax calibrated confidence score p(class|box) Returns: comma seperated values - binary relation confidence, binary relation PDF, subject x, y, w, h, rcnn confidence, object x, y, w, h, rcnn confidence """ bbox_pair = np.array((subject_bbox, object_bbox)) bbox_pair = bbox_pair[np.newaxis, :, :] input_vec = iutl.get_gmm_features(bbox_pair, in_format='xywh') pdf_score = iutl.gmm_pdf(input_vec, gmm_params.gmm_weights, gmm_params.gmm_mu, gmm_params.gmm_sigma) prob_score = 1. / (1. + np.exp(-(gmm_params.platt_a * pdf_score + gmm_params.platt_b))) ret_str = '{:0.6f}, {:0.6f}, {}, {}, {}, {}, {:0.3f}, {}, {}, {}, {}, {:0.3f}, {:0.3f}, {:0.3f}, {:0.3f}, {:0.3f}'.format(prob_score[0], pdf_score[0], subject_bbox[0], subject_bbox[1], subject_bbox[2], subject_bbox[3], subject_score, object_bbox[0], object_bbox[1], object_bbox[2], object_bbox[3], object_score, input_vec[0][0], input_vec[0][1], input_vec[0][2], input_vec[0][3]) return ret_str
def __init__(self, sub_bboxes, obj_bboxes, gmm): self.box_pairs = np.array([x for x in it.product(sub_bboxes, obj_bboxes)]) self.box_vec = iu.get_gmm_features(self.box_pairs, in_format='xywh') self.density = iu.gmm_pdf(self.box_vec, gmm.gmm_weights, gmm.gmm_mu, gmm.gmm_sigma) self.prob = 1. / (1. + np.exp(-(gmm.platt_a * self.density + gmm.platt_b))) self.sort_ixs = np.argsort(self.prob)[::-1]
def get_binary_scores(query, qry_to_model_map, model_components): import itertools use_scaling = True do_binary_xform = True binary_models_dict = model_components.binary_components unary_obj_descriptors = model_components.unary_components bin_relations = query.binary_triples relationships = [] if isinstance(bin_relations, np.ndarray): for rel in bin_relations: relationships.append(rel) else: relationships.append(bin_relations) bin_fn_list = [] for rel in relationships: # get object boxes and generate box pairs subject_name = query.objects[rel.subject].names object_name = query.objects[rel.object].names # specific: <subject_<relationship>_<object> specific_rel = subject_name + '_' specific_rel += rel.predicate.replace(' ', '_') specific_rel += '_' + object_name # wildcard: *_<relationship>_* wildcard_rel = rel.predicate.replace(' ', '_') # get the model string relationship_key = '' if specific_rel in binary_models_dict: relationship_key = specific_rel elif wildcard_rel in binary_models_dict: relationship_key = wildcard_rel else: continue # generate box pairs sub_boxes = get_boxes(subject_name, unary_obj_descriptors) n_sub_boxes = len(sub_boxes) obj_boxes = get_boxes(object_name, unary_obj_descriptors) n_obj_boxes = len(obj_boxes) box_pairs = np.array([x for x in itertools.product(sub_boxes, obj_boxes)]) gmm_features = iutl.get_gmm_features(box_pairs, in_format='xywh') params = binary_models_dict[relationship_key] # run the features through the relationship model scores = iutl.gmm_pdf(gmm_features, params.gmm_weights, params.gmm_mu, params.gmm_sigma) if params.platt_a is not None and params.platt_b is not None: scores = 1. / (1. + np.exp(-(params.platt_a * scores + params.platt_b))) #scores = -np.log(scores) bin_fns = np.reshape(scores, (n_sub_boxes, n_obj_boxes)) sub_var_ix = qry_to_model_map[rel.subject] obj_var_ix = qry_to_model_map[rel.object] #var_ixs = [sub_var_ix, obj_var_ix] #if obj_var_ix < sub_var_ix: # bin_fns = bin_fns.T # var_ixs = [obj_var_ix, sub_var_ix] bin_fn_list.append((sub_var_ix, subject_name, obj_var_ix, object_name, relationship_key, bin_fns)) #bf = bin_fn_list[0][5] #box_ixs = np.unravel_index(np.argmax(bf), bf.shape) #bin_results = get_rel_data(model_components, (2,2), bf) #return bin_results return bin_fn_list
def gen_factor_graph(query, model_components, objects_per_class, verbose=False, use_scaling=True, max_rels=None): import itertools verbose_tab = ' ' do_unary_xform = True do_binary_xform = True unary_obj_descriptors = model_components.unary_components binary_models_dict = model_components.binary_components n_vars = [] fg_to_sg = [] sg_to_unary = [] fg_functions = [] zero_slices = [] #--------------------------------------------------------------------------- # GENERATE UNARY FUNCTIONS for sg_obj_ix, sg_object in enumerate(query.objects): if verbose: print('{}using model for object "{}"'.format(verbose_tab, sg_object.names)) img_obj_ix = -1 for ix, img_obj in enumerate(unary_obj_descriptors): if img_obj.name == sg_object.names: img_obj_ix = ix if img_obj_ix == -1: continue n_labels = len(unary_obj_descriptors[img_obj_ix].boxes) n_vars.append(n_labels) fg_to_sg.append(sg_obj_ix) sg_to_unary.append(img_obj_ix) zero_slices.append(None) gm = ogm.gm(n_vars, operator='adder') # add unary functions to gm unary_fn_count = np.zeros_like(objects_per_class, dtype=np.int) for ix in fg_to_sg: unary_ix = sg_to_unary[ix] scores = np.copy(unary_obj_descriptors[unary_ix].scores) if objects_per_class[unary_ix] > 1: zero_ix = (unary_fn_count[unary_ix]+1) % objects_per_class[unary_ix] zero_slices[ix] = np.index_exp[zero_ix:zero_ix+1] #if zero_slices[ix] is not None: scores[zero_slices[ix]] = 0.0 unary_fn_count[unary_ix] += 1 if do_unary_xform: scores += np.finfo(np.float).eps scores = -np.log(scores) fn_id = gm.addFunction(scores) fg_functions.append((1, fn_id, [ix])) #--------------------------------------------------------------------------- # GENERATE BINARY FUNCTIONS # prep the relationships bin_relations = query.binary_triples relationships = [] if isinstance(bin_relations, np.ndarray): for rel in bin_relations: relationships.append(rel) else: relationships.append(bin_relations) # generate a function for each relationship for rel in relationships: # get object boxes and generate box pairs subject_name = query.objects[rel.subject].names object_name = query.objects[rel.object].names # specific: <subject_<relationship>_<object> specific_rel = subject_name + '_' specific_rel += rel.predicate.replace(' ', '_') specific_rel += '_' + object_name # wildcard: *_<relationship>_* wildcard_rel = rel.predicate.replace(' ', '_') # get the model string relationship_key = '' if specific_rel in binary_models_dict: if verbose: print('{}using relationship model for "{}"'.format(verbose_tab, specific_rel)) relationship_key = specific_rel elif wildcard_rel in binary_models_dict: if verbose: print('{}no relationship model for "{}", using "{}"'.format(verbose_tab, specific_rel, wildcard_rel)) relationship_key = wildcard_rel else: if verbose: print('{}no relationship models for "{}" or "{}", skipping relationship'.format(verbose_tab, specific_rel, wildcard_rel)) continue bin_fns = None if max_rels is not None: bin_fns = max_rels[relationship_key][rc.image_filename] bin_fns += np.finfo(np.float).eps bin_fns = -np.log(scores) else: # generate box pairs sub_boxes = get_boxes(subject_name, unary_obj_descriptors) n_sub_boxes = len(sub_boxes) obj_boxes = get_boxes(object_name, unary_obj_descriptors) n_obj_boxes = len(obj_boxes) box_pairs = np.array([x for x in itertools.product(sub_boxes, obj_boxes)]) gmm_features = iutl.get_gmm_features(box_pairs, in_format='xywh') params = binary_models_dict[relationship_key] # run the features through the relationship model scores = iutl.gmm_pdf(gmm_features, params.gmm_weights, params.gmm_mu, params.gmm_sigma) if do_binary_xform: if use_scaling and params.platt_a is not None and params.platt_b is not None: scores = 1. / (1. + np.exp(-(params.platt_a * scores + params.platt_b))) scores += np.finfo(np.float).eps # float epsilon so that we don't try ln(0) scores = -np.log(scores) bin_fns = np.reshape(scores, (n_sub_boxes, n_obj_boxes)) if zero_slices[rel.subject] is not None: zero_slice = zero_slices[rel.subject][0] bin_fns[zero_slice, :] = -np.log(np.finfo(np.float).eps) if zero_slices[rel.object] is not None: zero_slice = zero_slices[rel.object][0] bin_fns[:, zero_slice] = -np.log(np.finfo(np.float).eps) sub_var_ix = fg_to_sg[rel.subject] obj_var_ix = fg_to_sg[rel.object] var_ixs = [sub_var_ix, obj_var_ix] if obj_var_ix < sub_var_ix: bin_fns = bin_fns.T var_ixs = [obj_var_ix, sub_var_ix] fid = gm.addFunction(bin_fns) fg_functions.append((2, fid, var_ixs)) #--------------------------------------------------------------------------- # ADD FUNCTIONS TO GM for fn_tup in fg_functions: if fn_tup[0] == 1: gm.addFactor(fn_tup[1], fn_tup[2][0]) else: gm.addFactor(fn_tup[1], fn_tup[2]) return gm, sg_to_unary