Exemple #1
0
def prepare(bags, class_prior, L, U, T):
  """
  Parameters
  ----------
  bags        : original dataset
  class_prior : the ratio of positive samples
  L           : the number of labeled samples in output dataset
  U           : the number of unlabeled samples in output dataset
  T           : the number of test samples in output dataset
  """
  # original data
  p_bags = MI.extract_bags(bags,  1, with_label = True)
  n_bags = MI.extract_bags(bags, -1, with_label = True)
  random.shuffle(p_bags)
  random.shuffle(n_bags)
  P = len(p_bags)
  N = len(n_bags)

  retry_count = 0
  while retry_count < 5:
    try:
      return _prepare(p_bags, n_bags, P, N, class_prior, L, U, T)
    except:
      # if the obtained split is invalid, try sampling again
      sys.stderr.write("Warning: Retry train-test-split (recommend to change the splitting number)\n")
      retry_count += 1
      continue
Exemple #2
0
def train(bags, s, l, args):
    P = np.vstack(MI.extract_bags(bags, 0))
    Q = np.vstack(MI.extract_bags(bags, 1))

    n = len(P)
    m = len(Q)

    X = np.vstack((P, Q))
    KP = np.exp(-(r(P**2) - 2 * P.dot(X.T) + r(X**2).T) / (2 * s**2))
    KQ = np.exp(-(r(Q**2) - 2 * Q.dot(X.T) + r(X**2).T) / (2 * s**2))

    # initialization step
    L = np.r_[np.c_[l * np.eye(n + m),
                    np.zeros((n + m, n)),
                    np.zeros((n + m, m))], np.c_[np.zeros((n, n + m)),
                                                 np.zeros((n, n)),
                                                 np.zeros((n, m))],
              np.c_[np.zeros((m, n + m)),
                    np.zeros((m, n)),
                    np.zeros((m, m))], ]
    k = np.r_[np.zeros((n + m, 1)), np.ones((n, 1)) / n, np.ones((m, 1)) / m, ]
    G = np.r_[np.c_[np.zeros((n, n + m)), -np.eye(n),
                    np.zeros((n, m))], np.c_[KP, -np.eye(n),
                                             np.zeros((n, m))],
              np.c_[np.zeros((m, n + m)),
                    np.zeros((m, n)), -np.eye(m)], np.c_[KQ,
                                                         np.zeros((m, n)),
                                                         -np.eye(m)], ]
    h = np.r_[np.zeros((n, 1)), -np.ones((n, 1)),
              np.zeros((m, 1)),
              np.ones((m, 1)), ]

    result = cvxopt.solvers.qp(matrix(L), matrix(k), matrix(G), matrix(h))
    a = np.array(result['x'])[:n + m]

    T = 10
    for t in range(T):
        # tighten the upper-bound
        b = KP.dot(a) >= 1
        c = KQ.dot(a) >= -1

        # minimize the upper-bound
        k = np.r_[-KP.T.dot(b) / n - KQ.T.dot(c) / m,
                  np.ones((n, 1)) / n,
                  np.ones((m, 1)) / m, ]

        result = cvxopt.solvers.qp(matrix(L), matrix(k), matrix(G), matrix(h))
        a = np.array(result['x'])[:n + m]

    def classifier(x):
        x = x.reshape(1, -1)
        return a.T.dot(
            np.exp(-(r(X**2) - 2 * X.dot(x.T) + r(x**2).T) / (2 * s**2)))

    return lambda X: np.max([classifier(x) for x in X])
Exemple #3
0
def prediction_error(bags, model, theta):
    N1 = len(MI.extract_bags(bags, 1))
    N0 = len(MI.extract_bags(bags, 0))
    error = nc_risk(theta, N1, N0, zero_one_loss)
    return sum(
        list(
            map(
                lambda B: float(
                    error(model(B.data()),
                          Variable(np.array([[B.label()]]).astype(np.float32)))
                    .data), bags))) - theta
Exemple #4
0
def validation_error(validation_set, training_set, s, l, t):
  X = np.vstack((
    np.vstack(MI.extract_bags(training_set, 1)),
    np.vstack(MI.extract_bags(training_set, 0))))
  d = X.shape[1]
  P = np.vstack(MI.extract_bags(validation_set, 1))
  Q = np.vstack(MI.extract_bags(validation_set, 0))
  H = (np.pi * s**2)**(d/2) * np.exp(- (r(X**2) - 2*X.dot(X.T) + r(X**2).T) / (4*s**2))
  h = np.exp(- (r(X**2) - 2*X.dot(P.T) + r(P**2).T) / (2*s**2)).mean(axis=1) \
    - np.exp(- (r(X**2) - 2*X.dot(Q.T) + r(Q**2).T) / (2*s**2)).mean(axis=1)
  return t.dot(H.dot(t)) - 2*h.T.dot(t)
Exemple #5
0
def _class_prior(bags, basis, r):
  # cf. (du Plessis et al., 2014)
  p_bags = MI.extract_bags(bags, 1)
  u_bags = MI.extract_bags(bags, 0)
  n1 = len(p_bags)
  n0 = len(u_bags)
  H = 1./n1 * np.sum([np.outer(basis(B), basis(B).T) for B in p_bags], axis=0)
  h = 1./n0 * np.sum(list(map(lambda B: basis(B), u_bags)), axis=0)
  G = H + r * np.eye(n1 + n0)
  G_ = np.linalg.inv(G)
  return (2*h.T.dot(G_.dot(h))-h.T.dot(G_.dot(H.dot(G_.dot(h)))))**(-1)
Exemple #6
0
def train(bags, width, reg, args):
  P = np.vstack(MI.extract_bags(bags, 1))
  Q = np.vstack(MI.extract_bags(bags, 0))

  t = LSDD(P, Q, width, reg)
  X = np.vstack((P, Q))

  def classifier(x):
    x = x.reshape(1, -1)
    return t.T.dot(np.exp(- (r(X**2) - 2*X.dot(x.T) + r(x**2).T) / (2*width**2)))

  return lambda X: np.max([classifier(x) for x in X])
Exemple #7
0
def train_lsdd(data, args):
    widths = [1.0e-2, 1.0e-4, 1.0e-6]
    regs = [1.0, 1.0e-03, 1.0e-06]

    def train(data, width, reg, measure_time=False):
        if measure_time:
            t_start = time.time()

        model = MI.UU.LSDD.train(data, width, reg, args)
        metadata = {'width': width, 'reg': reg}

        if measure_time:
            t_end = time.time()
            print("#  elapsed time = {}".format(t_end - t_start))

        return model, metadata

    # cross validation
    best_param = {}
    best_error = np.inf
    if args.verbose:
        print("# *** Cross Validation ***")
    for width, reg in itertools.product(widths, regs):
        errors = []
        for data_train, data_val in MI.cross_validation(data, 5):
            t = MI.UU.LSDD.LSDD(np.vstack(MI.extract_bags(data_train, 1)),
                                np.vstack(MI.extract_bags(data_train, 0)),
                                width, reg)
            e = MI.UU.LSDD.validation_error(data_val, data_train, width, reg,
                                            t)
            errors.append(e)

        error = np.mean(errors)

        if args.verbose:
            print("#  width = {:.3e} / reg = {:.3e} / error = {:.3e}".format(
                width, reg, error))

        if error < best_error:
            best_error = error
            best_param = {'width': width, 'reg': reg}

    if args.verbose:
        print("# {}".format('-' * 80))

    model, metadata = train(data,
                            best_param['width'],
                            best_param['reg'],
                            measure_time=True)

    return model, best_param
Exemple #8
0
def train_sl(bags, basis, bdim, theta, r, args):
    p_bags = MI.extract_bags(bags, 1)
    u_bags = MI.extract_bags(bags, 0)
    N1 = len(p_bags)
    N0 = len(u_bags)
    N = N1 + N0
    P1 = np.array([np.r_[1, basis(B)].T for B in p_bags])
    P0 = np.array([np.r_[1, basis(B)].T for B in u_bags])

    param = np.linalg.inv(0.5 / N0 * P0.T.dot(P0) + r * np.eye(bdim + 1)).dot( \
        theta / N1 * P1.T.dot(np.ones((N1, 1))) - 0.5 / N0 * P0.T.dot(np.ones((N0, 1)))
    )

    alpha = param[1:]
    beta = float(param[:1])
    clf = lambda X: alpha.T.dot(basis(X)) + beta

    return clf
Exemple #9
0
def minimax_basis(bags, degree=1):
    """
  Build basis function based on minimax kernel.

  Parameters
  ----------
  deg : Degree of polynomial kernel.
  """
    degree = int(degree)

    p_bags = MI.extract_bags(bags, 1)
    u_bags = MI.extract_bags(bags, 0)
    n_bags = MI.extract_bags(bags, -1)
    bags = p_bags + u_bags + n_bags

    stat = lambda X: np.r_[X.min(axis=0), X.max(axis=0)]
    poly_kern = lambda X, Y: (stat(X).dot(stat(Y)) + 1)**degree

    return lambda X: np.array([poly_kern(X, B) for B in bags])
Exemple #10
0
def nsk_basis(bags, width=1.0e-01):
    """
  Build basis function based on normalized set kernel.
  """

    ins_kern = lambda x, c: np.exp(-width * np.linalg.norm(x - c)**2)

    p_bags = MI.extract_bags(bags, 1)
    u_bags = MI.extract_bags(bags, 0)
    n_bags = MI.extract_bags(bags, -1)
    bags = p_bags + u_bags + n_bags

    # (un-normalized) set kernel
    usk = lambda S0, S1: sum(
        list(
            map(lambda s: ins_kern(s[0], s[1]), list(itertools.product(S0, S1))
                )))

    # normalized set kernel
    nsk = lambda S0, S1: usk(S0, S1) / np.sqrt(usk(S0, S0) * usk(S1, S1))

    return lambda X: np.array([nsk(X, B) for B in bags])
Exemple #11
0
def train_dh(bags, basis, bdim, theta, r, args):
    if _SOLVER == 'cvxopt':
        import cvxopt
        from cvxopt import matrix
        from cvxopt.solvers import qp
        cvxopt.solvers.options['show_progress'] = False

    elif _SOLVER == 'openopt':
        from openopt import QP
        import warnings
        warnings.simplefilter(action="ignore", category=FutureWarning)

    elif _SOLVER == 'gurobi':
        import sys
        sys.path.append(
            "/home/local/bin/gurobi650/linux64/lib/python3.4_utf32/gurobipy")
        import gurobipy
        from MI.gurobi_helper.helper import quadform, dot, mvmul

    p_bags = MI.extract_bags(bags, 1)
    u_bags = MI.extract_bags(bags, 0)
    N1 = len(p_bags)
    N0 = len(u_bags)
    N = N1 + N0
    d = bdim
    P1 = np.array([basis(B).T for B in p_bags])
    P0 = np.array([basis(B).T for B in u_bags])
    H = np.r_[np.c_[r * np.eye(d),
                    np.zeros((d, 1)),
                    np.zeros((d, N0))], np.c_[np.zeros((1, d)), 0,
                                              np.zeros((1, N0))],
              np.c_[np.zeros((N0, d)),
                    np.zeros((N0, 1)),
                    np.zeros((N0, N0))]]
    f = np.r_[-theta / N1 * P1.T.sum(axis=1).reshape((-1, 1)), [[-theta]],
              1. / N0 * np.ones((N0, 1))]
    L = np.r_[np.c_[0.5 * P0, 0.5 * np.ones((N0, 1)), -np.eye(N0)],
              np.c_[P0, np.ones((N0, 1)), -np.eye(N0)], np.c_[np.zeros(
                  (N0, d)), np.zeros((N0, 1)), -np.eye(N0)]]
    k = np.r_[-0.5 * np.ones((N0, 1)), np.zeros((N0, 1)), -np.zeros((N0, 1))]

    if _SOLVER == 'cvxopt':
        result = qp(matrix(H), matrix(f), matrix(L), matrix(k))
        gamma = np.array(result['x'])

    elif _SOLVER == 'openopt':
        problem = QP(H + 1e-3 * np.eye(H.shape[0]), f, A=L, b=k)
        result = problem.solve('qlcp')
        gamma = result.xf

    elif _SOLVER == 'gurobi':
        # model and target variables
        m = gurobipy.Model('qp')
        m.setParam('OutputFlag', False)
        opt_dim = H.shape[0]
        x = [
            m.addVar(lb=-gurobipy.GRB.INFINITY, name='x{}'.format(i))
            for i in range(opt_dim)
        ]
        m.update()

        # objective function and constraints
        obj = 0.5 * quadform(H.tolist(), x) + dot(f.reshape(-1).tolist(), x)
        constrs = [lhs <= rhs for lhs, rhs in zip(mvmul(L.tolist(), x), k)]

        # solve
        m.setObjective(obj)
        for i, constr in enumerate(constrs):
            m.addConstr(constr, 'c{}'.format(i))

        try:
            m.optimize()
            gamma = np.array([v.x for v in m.getVars()])

        except gurobipy.GurobiError:
            raise ValueError()

    alpha = gamma[:d]
    beta = gamma[d]
    clf = lambda X: alpha.T.dot(basis(X)) + beta

    return clf