def libsvm_train(Kxx, cx, subset, c, pos_weight = 1.0, eps = 1e-3, verbose = 0, probability = 0): # check input assert Kxx.shape[0] == Kxx.shape[1] and Kxx.flags.c_contiguous, Kxx.shape assert subset.flags.c_contiguous and cx.flags.c_contiguous assert np.all(subset < Kxx.shape[0]) and np.all(subset >= 0) # set libsvm params param = libsvm_precomputed.svm_parameter() libsvm_precomputed.svm_param_set_default(param) param.nr_weight = 2 param.weight_label = weight_label = yael.ivec(2) weight_label[0] = -1 weight_label[1] = 1 param.weight = weights = yael.dvec(2) npos = (cx[subset] == 1).sum() nneg = (cx[subset] == -1).sum() weights[0] = 2 * npos / float(npos + nneg) weights[1] = 2 * nneg / float(npos + nneg) * pos_weight param.C = c param.nu = param.p = 0 param.shrinking = 1 param.probability = probability param.eps = eps libsvm_precomputed.svm_set_verbose(verbose) # prepare output nex = subset.size dual_coeffs = np.empty((nex,), dtype = np.float64) bias_out = yael.dvec(3) # actual call ret = libsvm_precomputed.svm_train_precomputed( nex, yael.numpy_to_ivec_ref(subset), yael.numpy_to_ivec_ref(cx), yael.numpy_to_fvec_ref(Kxx), Kxx.shape[1], param, yael.numpy_to_dvec_ref(dual_coeffs), bias_out) assert ret > 0 bias_term = bias_out[0] #print dual_coeffs, bias_term if probability: probA = bias_out[1] probB = bias_out[2] return dual_coeffs, bias_term, probA, probB else: return dual_coeffs, bias_term
elif a == '-d': d = int(args.pop(0)) elif a == '-nt': nt = threads.parse_nt(args.pop(0)) else: print >> sys.stderr, "unknown arg", a sys.exit(1) (nb, sizeb, szb) = yutils.vecfile_stats(fb_name, d, fmt_b) (nq, sizeq, szq) = yutils.vecfile_stats(fq_name, d, fmt_q) (vb, nb) = yutils.load_vectors_fmt(fb_name, fmt_b, d) print 'nb=%d db=%d nq=%d ' % (nb, sizeb, nq) nn = yael.ivec(slice_size * k) nndis = yael.fvec(slice_size * k) fnn = open(fnn_name, 'w') fdis = open(fdis_name, 'w') t0 = time.time() for istart in xrange(0, nq, slice_size): iend = min(istart + slice_size, nq) nqslice = iend - istart # Read the queries (vq, nqslice2) = yutils.load_vectors_fmt(fq_name, fmt_q, d,
from yael import yael yael.common_srandom(12345) d = 16 n = 1024 k = 4 pts = yael.fvec_new_rand(d * n) pts = yael.fvec.acquirepointer(pts) cents = yael.fvec(d * k) nt = 1 assign = yael.ivec(n) def d_chi2(a, b): return (a - b)**2 / (a + b) print "clustering %d uniform %dD pts in %d centroids" % (n, d, k) for name, flags in ("L2", 0), ("L1", yael.KMEANS_L1), ("Chi2", yael.KMEANS_CHI2): print "%s clustering" % name t0 = time.time() flags |= nt | yael.KMEANS_QUIET
from yael import yael yael.common_srandom(12345) d=16 n=1024 k=4 pts = yael.fvec_new_rand(d * n) pts = yael.fvec.acquirepointer(pts) cents = yael.fvec(d * k) nt = 1 assign = yael.ivec(n) def d_chi2(a, b): return (a - b) ** 2 / (a + b) print "clustering %d uniform %dD pts in %d centroids" % (n, d, k) for name, flags in ("L2", 0), ("L1", yael.KMEANS_L1), ("Chi2", yael.KMEANS_CHI2): print "%s clustering" % name t0 = time.time() flags |= nt | yael.KMEANS_QUIET for run in range(10):
d = int(args.pop(0)) elif a=='-nt': nt = threads.parse_nt(args.pop(0)) else: print >> sys.stderr, "unknown arg",a sys.exit(1) (nb, sizeb, szb) = yutils.vecfile_stats (fb_name, d, fmt_b) (nq, sizeq, szq) = yutils.vecfile_stats (fq_name, d, fmt_q) (vb, nb) = yutils.load_vectors_fmt (fb_name, fmt_b, d) print 'nb=%d db=%d nq=%d ' % (nb, sizeb, nq) nn = yael.ivec (slice_size * k) nndis = yael.fvec (slice_size * k) fnn = open (fnn_name, 'w') fdis = open (fdis_name, 'w') t0 = time.time() for istart in xrange (0, nq, slice_size): iend = min (istart + slice_size, nq) nqslice = iend - istart # Read the queries (vq, nqslice2) = yutils.load_vectors_fmt (fq_name, fmt_q, d, nqslice, istart, verbose=False) assert nqslice == nqslice2