Ejemplo n.º 1
0
# Linear (dot-product) kernel that takes the average of each feature over all images 
# for each business.
def mean_linear_kernel(X1, X2):
	f1 = np.array([np.mean(features_by_bid[int(bid)].T, axis=1) for bid in X1[:,0]])
	f2 = np.array([np.mean(features_by_bid[int(bid)].T, axis=1) for bid in X2[:,0]])

	return np.dot(f1, f2.T)

def max_linear_kernel(X1, X2):
	f1 = np.array([np.ndarray.max(features_by_bid[int(bid)].T, axis=1) for bid in X1[:,0]])
	f2 = np.array([np.ndarray.max(features_by_bid[int(bid)].T, axis=1) for bid in X2[:,0]])

	return np.dot(f1, f2.T)

if emd_matrix_prefix is not None:
	emd_matrix = EMDMatrix.load(emd_matrix_prefix)

def EMD_kernel(X1, X2, gamma):
	emd = emd_matrix.for_business_ids(X1[:,0], X2[:,0])
	if emd is None:
		raise Exception('Provided EMD matrix does not cover needed business IDs.')
#	emd **= 2
	emd *= -gamma
	return np.exp(emd)

def accuracy_for_label(label_num):
	label = shuffled_labels[:,label_num]
	data = shuffled_bids.reshape(-1, 1)

	params = ''
Ejemplo n.º 2
0
features_prefix = sys.argv[1]
fdata = FeatureData(features_prefix)

features_by_bid = {}
for f, bid in izip(fdata.features, fdata.business_ids):
	if bid in features_by_bid:
		features_by_bid[bid].append(f)
	else:
		features_by_bid[bid] = [f]

for bid in features_by_bid.keys():
	features_by_bid[bid] = np.array(features_by_bid[bid])

business_ids = np.array(sorted(features_by_bid.keys())[:2])
print "bids: ", business_ids

print "recalculated:"

for bid1 in business_ids:
	for bid2 in business_ids:
		print "D(%d, %d): %.4f" % (bid1, bid2,
		                           emd(features_by_bid[bid1], features_by_bid[bid2]))

print "from file:"

emd_matrix = EMDMatrix.load(sys.argv[2])
print emd_matrix.for_business_ids(business_ids, business_ids)



Ejemplo n.º 3
0
        self.business_ids = np.load(file_prefix + '-business_ids.npy')


features_prefix = sys.argv[1]
fdata = FeatureData(features_prefix)

features_by_bid = {}
for f, bid in izip(fdata.features, fdata.business_ids):
    if bid in features_by_bid:
        features_by_bid[bid].append(f)
    else:
        features_by_bid[bid] = [f]

for bid in features_by_bid.keys():
    features_by_bid[bid] = np.array(features_by_bid[bid])

business_ids = np.array(sorted(features_by_bid.keys())[:2])
print "bids: ", business_ids

print "recalculated:"

for bid1 in business_ids:
    for bid2 in business_ids:
        print "D(%d, %d): %.4f" % (
            bid1, bid2, emd(features_by_bid[bid1], features_by_bid[bid2]))

print "from file:"

emd_matrix = EMDMatrix.load(sys.argv[2])
print emd_matrix.for_business_ids(business_ids, business_ids)
Ejemplo n.º 4
0
    return np.dot(f1, f2.T)


def max_linear_kernel(X1, X2):
    f1 = np.array([
        np.ndarray.max(features_by_bid[int(bid)].T, axis=1) for bid in X1[:, 0]
    ])
    f2 = np.array([
        np.ndarray.max(features_by_bid[int(bid)].T, axis=1) for bid in X2[:, 0]
    ])

    return np.dot(f1, f2.T)


if emd_matrix_prefix is not None:
    emd_matrix = EMDMatrix.load(emd_matrix_prefix)


def EMD_kernel(X1, X2, gamma):
    emd = emd_matrix.for_business_ids(X1[:, 0], X2[:, 0])
    if emd is None:
        raise Exception(
            'Provided EMD matrix does not cover needed business IDs.')


#	emd **= 2
    emd *= -gamma
    return np.exp(emd)


def accuracy_for_label(label_num):