# Linear (dot-product) kernel that takes the average of each feature over all images # for each business. def mean_linear_kernel(X1, X2): f1 = np.array([np.mean(features_by_bid[int(bid)].T, axis=1) for bid in X1[:,0]]) f2 = np.array([np.mean(features_by_bid[int(bid)].T, axis=1) for bid in X2[:,0]]) return np.dot(f1, f2.T) def max_linear_kernel(X1, X2): f1 = np.array([np.ndarray.max(features_by_bid[int(bid)].T, axis=1) for bid in X1[:,0]]) f2 = np.array([np.ndarray.max(features_by_bid[int(bid)].T, axis=1) for bid in X2[:,0]]) return np.dot(f1, f2.T) if emd_matrix_prefix is not None: emd_matrix = EMDMatrix.load(emd_matrix_prefix) def EMD_kernel(X1, X2, gamma): emd = emd_matrix.for_business_ids(X1[:,0], X2[:,0]) if emd is None: raise Exception('Provided EMD matrix does not cover needed business IDs.') # emd **= 2 emd *= -gamma return np.exp(emd) def accuracy_for_label(label_num): label = shuffled_labels[:,label_num] data = shuffled_bids.reshape(-1, 1) params = ''
features_prefix = sys.argv[1] fdata = FeatureData(features_prefix) features_by_bid = {} for f, bid in izip(fdata.features, fdata.business_ids): if bid in features_by_bid: features_by_bid[bid].append(f) else: features_by_bid[bid] = [f] for bid in features_by_bid.keys(): features_by_bid[bid] = np.array(features_by_bid[bid]) business_ids = np.array(sorted(features_by_bid.keys())[:2]) print "bids: ", business_ids print "recalculated:" for bid1 in business_ids: for bid2 in business_ids: print "D(%d, %d): %.4f" % (bid1, bid2, emd(features_by_bid[bid1], features_by_bid[bid2])) print "from file:" emd_matrix = EMDMatrix.load(sys.argv[2]) print emd_matrix.for_business_ids(business_ids, business_ids)
self.business_ids = np.load(file_prefix + '-business_ids.npy') features_prefix = sys.argv[1] fdata = FeatureData(features_prefix) features_by_bid = {} for f, bid in izip(fdata.features, fdata.business_ids): if bid in features_by_bid: features_by_bid[bid].append(f) else: features_by_bid[bid] = [f] for bid in features_by_bid.keys(): features_by_bid[bid] = np.array(features_by_bid[bid]) business_ids = np.array(sorted(features_by_bid.keys())[:2]) print "bids: ", business_ids print "recalculated:" for bid1 in business_ids: for bid2 in business_ids: print "D(%d, %d): %.4f" % ( bid1, bid2, emd(features_by_bid[bid1], features_by_bid[bid2])) print "from file:" emd_matrix = EMDMatrix.load(sys.argv[2]) print emd_matrix.for_business_ids(business_ids, business_ids)
return np.dot(f1, f2.T) def max_linear_kernel(X1, X2): f1 = np.array([ np.ndarray.max(features_by_bid[int(bid)].T, axis=1) for bid in X1[:, 0] ]) f2 = np.array([ np.ndarray.max(features_by_bid[int(bid)].T, axis=1) for bid in X2[:, 0] ]) return np.dot(f1, f2.T) if emd_matrix_prefix is not None: emd_matrix = EMDMatrix.load(emd_matrix_prefix) def EMD_kernel(X1, X2, gamma): emd = emd_matrix.for_business_ids(X1[:, 0], X2[:, 0]) if emd is None: raise Exception( 'Provided EMD matrix does not cover needed business IDs.') # emd **= 2 emd *= -gamma return np.exp(emd) def accuracy_for_label(label_num):