コード例 #1
0
ファイル: lp.py プロジェクト: shunzh/RLCodeBase
def lp(S, A, r, T, s0):
  """
  Solve the LP problem to find out the optimal occupancy
  
  Args:
    S: state set
    A: action set
    r: reward
    T: transition function
    s0: init state
  """
  m = CPlexModel()
  if not config.VERBOSE or config.DEBUG: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))
 
  v = m.new(len(S), name='v')

  for s in Sr:
    for a in A:
      m.constrain(v[s] >= r(S[s], a) + sum(v[sp] * T(S[s], a, S[sp]) for sp in Sr))
  
  # obj
  obj = m.minimize(v[s0])
  ret = util.Counter()
  for s in Sr:
    ret[S[s]] = m[v][s]
  return ret
コード例 #2
0
ファイル: lp.py プロジェクト: ashdtu/constrained-MDP
def lp(S, A, r, T, s0):
  """
  Solve the LP problem to find out the optimal occupancy

  Args:
    S: state set
    A: action set
    r: reward
    T: transition function
    s0: init state
  """
  m = CPlexModel()
  if not config.VERBOSE or config.DEBUG: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))

  v = m.new(len(S), name='v')

  for s in Sr:
    for a in A:
      m.constrain(v[s] >= r(S[s], a) + sum(v[sp] * T(S[s], a, S[sp]) for sp in Sr))

  # obj
  obj = m.minimize(v[s0])
  ret = util.Counter()
  for s in Sr:
    ret[S[s]] = m[v][s]
  return ret
コード例 #3
0
def rewardUncertainMILP(S, A, R, T, s0, terminal, k, optV, gamma=1):
    """
  The algorithm adapted from
  Viappiani, Paolo and Boutilier, CraigOptimal. set recommendations based on regret

  This algorithm would find the minimax-regret policy query in our problem.
  Not sure how to use this algorithm.
  """
    m = CPlexModel()
    if not config.VERBOSE: m.setVerbosity(0)

    M = 100000

    # state range
    Sr = range(len(S))
    # action range
    Ar = range(len(A))

    mr = m.new(name='mr')
    # decision variables
    x = m.new((k, len(S), len(A)), lb=0, name='x')
    v = m.new((k, len(R)), name='v')
    I = m.new((k, len(R)), vtype=bool, name='I')

    for r in range(len(R)):
        m.constrain(mr >= sum(v[i, r]) for i in range(k))

    for r in range(len(R)):
        for i in range(k):
            m.constrain(v[i, r] >= optV[r] - sum(x[i, s, a] * R[r](S[s], A[a])
                                                 for s in Sr for a in Ar) +
                        (I[i, r] - 1) * M)

    # make sure x is a valid occupancy
    for i in range(k):
        for sp in Sr:
            m.constrain(
                sum(x[i, s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]))
                    for s in Sr for a in Ar) == (S[sp] == s0))

    for r in range(len(R)):
        m.constrain(sum(I[i, r] for i in range(k)) == 1)

    for r in range(len(R)):
        for i in range(k):
            m.constrain(v[i, r] >= 0)

    obj = m.minimize(mr)

    return obj, m[I]
コード例 #4
0
ファイル: lp.py プロジェクト: shunzh/RLCodeBase
def rewardUncertainMILP(S, A, R, T, s0, terminal, k, optV, gamma=1):
  """
  The algorithm adapted from
  Viappiani, Paolo and Boutilier, CraigOptimal. set recommendations based on regret

  This algorithm would find the minimax-regret policy query in our problem.
  Not sure how to use this algorithm.
  """
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  M = 100000

  # state range
  Sr = range(len(S))
  # action range
  Ar = range(len(A))
  
  mr = m.new(name='mr')
  # decision variables
  x = m.new((k, len(S), len(A)), lb=0, name='x')
  v = m.new((k, len(R)), name='v')
  I = m.new((k, len(R)), vtype=bool, name='I')
  
  for r in range(len(R)):
    m.constrain(mr >= sum(v[i, r]) for i in range(k))
  
  for r in range(len(R)):
    for i in range(k):
      m.constrain(v[i, r] >= optV[r] - sum(x[i, s, a] * R[r](S[s], A[a]) for s in Sr for a in Ar) + (I[i, r] - 1) * M)

  # make sure x is a valid occupancy
  for i in range(k):
    for sp in Sr:
      m.constrain(sum(x[i, s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp])) for s in Sr for a in Ar) == (S[sp] == s0))
  
  for r in range(len(R)):
    m.constrain(sum(I[i, r] for i in range(k)) == 1)
  
  for r in range(len(R)):
    for i in range(k):
      m.constrain(v[i, r] >= 0)
  
  obj = m.minimize(mr)
  
  return obj, m[I]
コード例 #5
0
def solve_GT_LP_relax_noisy_CPLEX(A, y, lambda_reg, do_binary=False):
    ''' solve the LP formulation of noisy boolean 
    compressed sensing using CPLEX'''

    M, N = A.shape

    assert np.linalg.norm(y - y**2) <= 1e-10, "Inputs must be binary"

    inds_1 = np.where(y == 1)[0]
    inds_0 = np.where(y == 0)[0]
    A1 = A[inds_1, :]
    A0 = A[inds_0, :]
    ###  introduce random small perturbations to avoid degenerate solutions
    w_pert_x = np.ones(N) + 0.001 * np.random.rand(N)
    w_pert_xi = np.ones(M) + 0.001 * np.random.rand(M, 1)

    ### try cplex directy:
    M1 = len(inds_1)
    M0 = len(inds_0)

    w_pert_xi_pos = w_pert_xi[inds_1]
    w_pert_xi_neg = w_pert_xi[inds_0]

    m = CPlexModel(verbosity=0)

    if do_binary:  ### solve the binary problem using cuts + branch and bound
        x_sdp = m.new(N, vtype='bool', lb=0, ub=1)
        xi_sdp0 = m.new(M0, vtype='bool', lb=0)
        xi_sdp1 = m.new(M1, vtype='bool', lb=0)
    else:
        x_sdp = m.new(N, vtype='real', lb=0, ub=1)
        xi_sdp0 = m.new(M0, vtype='real', lb=0)
        xi_sdp1 = m.new(M1, vtype='real', lb=0)
    m.constrain(A1 * x_sdp + xi_sdp1 >= 1)
    m.constrain(A0 * x_sdp == 0 + xi_sdp0)
    value = m.minimize(x_sdp.sum() + lambda_reg *
                       (xi_sdp0.sum() + xi_sdp1.sum()))
    #m.minimize(w_pert_x*x_sdp + lambda_reg*w_pert_xi*xi_sdp)
    x_hat = m[x_sdp]
    xi_hat0 = m[xi_sdp0]
    xi_hat1 = m[xi_sdp1]
    xi_hat = np.zeros(M)
    xi_hat[inds_0] = xi_hat0
    xi_hat[inds_1] = xi_hat1
    return x_hat, xi_hat
コード例 #6
0
ファイル: cplexfun.py プロジェクト: lavanyaj/pycpx
    m.constrain(totalSize <= sizeBin)
    upperBound = sum(sizeItems)
    m.constrain(totalSize <= binUsed[b] * upperBound)
    m.constrain(binUsed[b] <= totalSize)
    pass

# each item assigned to exactly one bin
for i in range(numItems):
    numBinsForItem = sum([assign[i,b] for b in range(numBins)])
    m.constrain(numBinsForItem == 1)
    pass

numBinsUsed = sum(binUsed)

try:
    m.minimize(numBinsUsed, starting_dict={assign:startAssign}, emphasis=1,\
                   time_limit=100)
    pass
except Exception, e:
    logging.exception(e)
    pass

print "Number of constraints: %.1f" % m.getNRows()
print "Number of variables: %.1f" % m.getNCols()
print "Number of quadratic constraints: %.1f" % m.getNQCs()

m.minimize(numBinsUsed, starting_dict={assign:startAssign}, emphasis=1,\
               tree_limit=2, work_dir="mapper/output")


print m[binUsed]
print int(m[numBinsUsed])