예제 #1
0
def lp(S, A, r, T, s0):
  """
  Solve the LP problem to find out the optimal occupancy

  Args:
    S: state set
    A: action set
    r: reward
    T: transition function
    s0: init state
  """
  m = CPlexModel()
  if not config.VERBOSE or config.DEBUG: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))

  v = m.new(len(S), name='v')

  for s in Sr:
    for a in A:
      m.constrain(v[s] >= r(S[s], a) + sum(v[sp] * T(S[s], a, S[sp]) for sp in Sr))

  # obj
  obj = m.minimize(v[s0])
  ret = util.Counter()
  for s in Sr:
    ret[S[s]] = m[v][s]
  return ret
예제 #2
0
파일: lp.py 프로젝트: shunzh/RLCodeBase
def lp(S, A, r, T, s0):
  """
  Solve the LP problem to find out the optimal occupancy
  
  Args:
    S: state set
    A: action set
    r: reward
    T: transition function
    s0: init state
  """
  m = CPlexModel()
  if not config.VERBOSE or config.DEBUG: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))
 
  v = m.new(len(S), name='v')

  for s in Sr:
    for a in A:
      m.constrain(v[s] >= r(S[s], a) + sum(v[sp] * T(S[s], a, S[sp]) for sp in Sr))
  
  # obj
  obj = m.minimize(v[s0])
  ret = util.Counter()
  for s in Sr:
    ret[S[s]] = m[v][s]
  return ret
예제 #3
0
def opt(e_init, e_target, hrv_hist, hrv_en):
    # verbosity is how much log is reported back from CPlex. 3 is the most verbose
    verbosity = 3
    m = CPlexModel(verbosity)
    b = m.new((epochs_per_day, nodes, mod_levels),
              vtype=int,
              lb=0,
              ub=1,
              name='b')
    l = m.new((epochs_per_day, nodes, bin_num),
              vtype=float,
              lb=-1,
              ub=battery_cap,
              name='l')
    fixed_prob = np.linespace(0, 1, num=bin_num, endpoint=True, dtype=float)
    e_init_hist = np.zeros(e_init.shape, dtype=float)
    e_init_hist[:, 0] = 1
    hist_rv = np.zeros((epochs_per_day, nodes, bin_num), dtype=float)
    hist_rv[0] = e_init_hist
    # prepare the energy vector here
    en_rv = np.zeros((epochs_per_day, nodes, bin_num), dtype=float)
    en_rv[0] = e_init
    for i in xrange(1, epochs_per_day):
        en_rv[i], hist_rv[i] = next_battery_level(en_rv[i - 1], hist_rv[i - 1],\
        hrv_en[i, :, :] - (np.vectorize(energy))(b[i, :]), hrv_hist[i, :, :])
        m.constrain(en_rv[i] >= 0)
        m.constrain(sum(np.vectorize(time)(b[i, :])) <= D)
    m.maximize(objective_function(en_rv[-1], hist_rv[-1]))
    return m
def findUndominatedReward(mdpH, mdpR, newPi, humanPi, localDifferentPis,
                          domPis):
    """
  Implementation of the linear programming problem (Eq.2) in report 12.5
  Returns the objective value and a reward function (which is only useful when the obj value is > 0)
  
  newPi is \hat{\pi} in the linear programming problem in the report.
  The robot tries to see if there exists a reward function where newPi is better than the best policy in domPis.
  """
    m = CPlexModel()
    if not config.VERBOSE: m.setVerbosity(0)

    S = mdpH.S
    robotA = mdpR.A
    humanA = mdpH.A

    # index of states and actions
    Sr = range(len(S))
    robotAr = range(len(robotA))
    humanAr = range(len(humanA))

    r = m.new(len(S), lb=0, ub=1, name='r')
    z = m.new(
        name='z'
    )  # when the optimal value is attained, z = \max_{domPi \in domPis} V^{domPi}_r

    for domPi in domPis:
        m.constrain(z >= sum(
            [domPi[S[s], robotA[a]] * r[s] for s in Sr for a in robotAr]))

    # make sure r is consistent with humanPi
    for s in S:
        for a in humanA:
            # humanPi is better than a locally different policy which takes action a in state a
            m.constrain(sum(sum((humanPi[S[sp], humanA[ap]] - localDifferentPis[s, a][S[sp], humanA[ap]]) for ap in humanAr)\
                            * r[sp] for sp in Sr) >= 0)

    # maxi_r { V^{newPi}_r - \max_{domPi \in domPis} V^{domPi}_r }
    cplexObj = m.maximize(
        sum(newPi[S[s], robotA[a]] * r[s] for s in Sr for a in robotAr) - z)

    obj = sum([newPi[S[s], robotA[a]] * m[r][s] for s in Sr
               for a in robotAr]) - m[z]

    # the reward function has the same values for same states, but need to convert back to the S x A space
    rFunc = lambda s, a: m[r][Sr.index(s)]

    print 'cplexobj', cplexObj
    print 'obj', obj
    print 'newPi'
    printPi(newPi)
    print 'z', m[z], 'r', m[r]

    return obj, rFunc
예제 #5
0
def rewardUncertainMILP(S, A, R, T, s0, terminal, k, optV, gamma=1):
    """
  The algorithm adapted from
  Viappiani, Paolo and Boutilier, CraigOptimal. set recommendations based on regret

  This algorithm would find the minimax-regret policy query in our problem.
  Not sure how to use this algorithm.
  """
    m = CPlexModel()
    if not config.VERBOSE: m.setVerbosity(0)

    M = 100000

    # state range
    Sr = range(len(S))
    # action range
    Ar = range(len(A))

    mr = m.new(name='mr')
    # decision variables
    x = m.new((k, len(S), len(A)), lb=0, name='x')
    v = m.new((k, len(R)), name='v')
    I = m.new((k, len(R)), vtype=bool, name='I')

    for r in range(len(R)):
        m.constrain(mr >= sum(v[i, r]) for i in range(k))

    for r in range(len(R)):
        for i in range(k):
            m.constrain(v[i, r] >= optV[r] - sum(x[i, s, a] * R[r](S[s], A[a])
                                                 for s in Sr for a in Ar) +
                        (I[i, r] - 1) * M)

    # make sure x is a valid occupancy
    for i in range(k):
        for sp in Sr:
            m.constrain(
                sum(x[i, s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]))
                    for s in Sr for a in Ar) == (S[sp] == s0))

    for r in range(len(R)):
        m.constrain(sum(I[i, r] for i in range(k)) == 1)

    for r in range(len(R)):
        for i in range(k):
            m.constrain(v[i, r] >= 0)

    obj = m.minimize(mr)

    return obj, m[I]
def findUndominatedReward(mdpH, mdpR, newPi, humanPi, localDifferentPis, domPis):
  """
  Implementation of the linear programming problem (Eq.2) in report 12.5
  Returns the objective value and a reward function (which is only useful when the obj value is > 0)
  
  newPi is \hat{\pi} in the linear programming problem in the report.
  The robot tries to see if there exists a reward function where newPi is better than the best policy in domPis.
  """
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)
  
  S = mdpH.S
  robotA = mdpR.A
  humanA = mdpH.A

  # index of states and actions
  Sr = range(len(S))
  robotAr = range(len(robotA))
  humanAr = range(len(humanA))
  
  r = m.new(len(S), lb=0, ub=1, name='r')
  z = m.new(name='z') # when the optimal value is attained, z = \max_{domPi \in domPis} V^{domPi}_r

  for domPi in domPis:
    m.constrain(z >= sum([domPi[S[s], robotA[a]] * r[s] for s in Sr for a in robotAr]))
  
  # make sure r is consistent with humanPi
  for s in S:
    for a in humanA:
      # humanPi is better than a locally different policy which takes action a in state a
      m.constrain(sum(sum((humanPi[S[sp], humanA[ap]] - localDifferentPis[s, a][S[sp], humanA[ap]]) for ap in humanAr)\
                      * r[sp] for sp in Sr) >= 0)
    
  # maxi_r { V^{newPi}_r - \max_{domPi \in domPis} V^{domPi}_r }
  cplexObj = m.maximize(sum(newPi[S[s], robotA[a]] * r[s] for s in Sr for a in robotAr) - z)
  
  obj = sum([newPi[S[s], robotA[a]] * m[r][s] for s in Sr for a in robotAr]) - m[z]

  # the reward function has the same values for same states, but need to convert back to the S x A space
  rFunc = lambda s, a: m[r][Sr.index(s)]

  print 'cplexobj', cplexObj
  print 'obj', obj
  print 'newPi'
  printPi(newPi)
  print 'z', m[z], 'r', m[r]

  return obj, rFunc
예제 #7
0
def lwaLP(graph_object):
    capabilites = graph_object.capabilities
    no_vertices = graph_object.n
    attacker_strategy = list()
    m = CPlexModel()
    cv = m.new(no_vertices, vtype=float, ub=1, lb=0)
    U = m.new(vtype=float)
    diag = np.diag(capabilites)
    m.constrain(U <= -diag * (1 - cv))
    m.constrain(sum(cv) <= graph_object.R)
    m.maximize(U)
    for i in xrange(m[cv]):
        if m[cv][i] > 0:
            attacker_strategy.append(i)
    start = findStrategySet(m[cv], graph_object.R)
    return start, attacker_strategy
예제 #8
0
파일: lp.py 프로젝트: shunzh/RLCodeBase
def rewardUncertainMILP(S, A, R, T, s0, terminal, k, optV, gamma=1):
  """
  The algorithm adapted from
  Viappiani, Paolo and Boutilier, CraigOptimal. set recommendations based on regret

  This algorithm would find the minimax-regret policy query in our problem.
  Not sure how to use this algorithm.
  """
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  M = 100000

  # state range
  Sr = range(len(S))
  # action range
  Ar = range(len(A))
  
  mr = m.new(name='mr')
  # decision variables
  x = m.new((k, len(S), len(A)), lb=0, name='x')
  v = m.new((k, len(R)), name='v')
  I = m.new((k, len(R)), vtype=bool, name='I')
  
  for r in range(len(R)):
    m.constrain(mr >= sum(v[i, r]) for i in range(k))
  
  for r in range(len(R)):
    for i in range(k):
      m.constrain(v[i, r] >= optV[r] - sum(x[i, s, a] * R[r](S[s], A[a]) for s in Sr for a in Ar) + (I[i, r] - 1) * M)

  # make sure x is a valid occupancy
  for i in range(k):
    for sp in Sr:
      m.constrain(sum(x[i, s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp])) for s in Sr for a in Ar) == (S[sp] == s0))
  
  for r in range(len(R)):
    m.constrain(sum(I[i, r] for i in range(k)) == 1)
  
  for r in range(len(R)):
    for i in range(k):
      m.constrain(v[i, r] >= 0)
  
  obj = m.minimize(mr)
  
  return obj, m[I]
예제 #9
0
def milp(S, A, R, T, s0, psi, maxV):
  """
  Solve the MILP problem in greedy construction of policy query
  
  Args:
    S: state set
    A: action set
    R: reward candidate set
    T: transition function
    s0: init state
    psi: prior belief on rewards
    maxV: maxV[i] = max_{\pi \in q} V_{r_i}^\pi
  """
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  rLen = len(R)
  M = 10000 # a large number
  Sr = range(len(S))
  Ar = range(len(A))
  
  # decision variables
  # FIXME i removed upper bound of x. it shoundn't have such bound without transient-state assumption, right?
  x = m.new((len(S), len(A)), lb=0, name='x')
  z = m.new(rLen, vtype=bool, name='z')
  y = m.new(rLen, name='y')

  # constraints on y
  m.constrain([y[i] <= sum([x[s, a] * R[i](S[s], A[a]) for s in Sr for a in Ar]) - maxV[i] + (1 - z[i]) * M for i in xrange(rLen)])
  m.constrain([y[i] <= z[i] * M for i in xrange(rLen)])
  
  # constraints on x (valid occupancy)
  for sp in Sr:
    if S[sp] == s0:
      m.constrain(sum([x[sp, ap] for ap in Ar]) == 1)
    else:
      m.constrain(sum([x[sp, ap] for ap in Ar]) == sum([x[s, a] * T(S[s], A[a], S[sp]) for s in Sr for a in Ar]))
  
  # obj
  obj = m.maximize(sum([psi[i] * y[i] for i in xrange(rLen)]))

  if config.VERBOSE:
    print 'obj', obj
    print 'x', m[x]
    print 'y', m[y]
    print 'z', m[z]
  
  # build occupancy as S x A -> x[.,.]
  # z[i] == 1 then this policy is better than maxV on the i-th reward candidate
  res = util.Counter()
  for s in Sr:
    for a in Ar:
      res[S[s], A[a]] = m[x][s, a] 
  return res
예제 #10
0
def domPiMilp(S, A, r, T, s0, terminal, domPis, consIdx, gamma=1):
  """
  Finding dominating policies by representing constraints as possible negative rewards.
  Described in the report on aug.19, 2017.
  """
  rmax = 10000
  M = 0.001
  consLen = len(consIdx)

  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # state range
  Sr = range(len(S))
  # action range
  Ar = range(len(A))
  
  # decision variables
  x = m.new((len(S), len(A)), lb=0, name='x')
  z = m.new(consLen, vtype=bool, name='z')
  #z = [0, 1, 0] # test for office nav domain
  t = m.new(name='t')
  
  # flow conservation
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    # and make sure there is no flow back from the terminal states
    if not terminal(S[sp]):
      m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[s]))) for s in Sr for a in Ar) == (S[sp] == s0))
      #print S[sp], [(S[s], A[a]) for s in Sr for a in Ar if T(S[s], A[a], S[sp]) > 0]
    else:
      m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[sp]))) for s in Sr for a in Ar) == (S[sp] == s0))

  # t is the lower bound of the difference between x and y
  # note: i don't think expressions in constraints can call other functions
  for y in domPis:
    # note that y is indexed by elements in S x A, not numbered indices
    m.constrain(sum(x[s, a] * r(S[s], A[a]) for s in Sr for a in Ar) -\
                sum(y[S[s], A[a]] *
                    (r(S[s], A[a]) + sum(- rmax * (S[s][consIdx[i]] != s0[consIdx[i]]) * z[i] for i in range(consLen)))\
                    for s in Sr for a in Ar)\
                >= t)
   
  for s in Sr:
    for i in range(consLen):
      if S[s][consIdx[i]] != s0[consIdx[i]]:
        for a in Ar:
          m.constrain(z[i] + M * x[s, a] <= 1)

  # obj
  obj = m.maximize(t)
  
  print m[z]
  
  return obj, {(S[s], A[a]): m[x][s, a] for s in Sr for a in Ar}
예제 #11
0
파일: lp.py 프로젝트: shunzh/RLCodeBase
def milp(S, A, R, T, s0, psi, maxV):
  """
  Solve the MILP problem in greedy construction of policy query
  
  Args:
    S: state set
    A: action set
    R: reward candidate set
    T: transition function
    s0: init state
    psi: prior belief on rewards
    maxV: maxV[i] = max_{\pi \in q} V_{r_i}^\pi
  """
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  rLen = len(R)
  M = 10000 # a large number
  Sr = range(len(S))
  Ar = range(len(A))
  
  # decision variables
  # FIXME i removed upper bound of x. it shoundn't have such bound without transient-state assumption, right?
  x = m.new((len(S), len(A)), lb=0, name='x')
  z = m.new(rLen, vtype=bool, name='z')
  y = m.new(rLen, name='y')

  # constraints on y
  m.constrain([y[i] <= sum([x[s, a] * R[i](S[s], A[a]) for s in Sr for a in Ar]) - maxV[i] + (1 - z[i]) * M for i in xrange(rLen)])
  m.constrain([y[i] <= z[i] * M for i in xrange(rLen)])
  
  # constraints on x (valid occupancy)
  for sp in Sr:
    if S[sp] == s0:
      m.constrain(sum([x[sp, ap] for ap in Ar]) == 1)
    else:
      m.constrain(sum([x[sp, ap] for ap in Ar]) == sum([x[s, a] * T(S[s], A[a], S[sp]) for s in Sr for a in Ar]))
  
  # obj
  obj = m.maximize(sum([psi[i] * y[i] for i in xrange(rLen)]))

  if config.VERBOSE:
    print 'obj', obj
    print 'x', m[x]
    print 'y', m[y]
    print 'z', m[z]
  
  # build occupancy as S x A -> x[.,.]
  # z[i] == 1 then this policy is better than maxV on the i-th reward candidate
  res = util.Counter()
  for s in Sr:
    for a in Ar:
      res[S[s], A[a]] = m[x][s, a] 
  return res
예제 #12
0
파일: lp.py 프로젝트: shunzh/RLCodeBase
def domPiMilp(S, A, r, T, s0, terminal, domPis, consIdx, gamma=1):
  """
  Finding dominating policies by representing constraints as possible negative rewards.
  Described in the report on aug.19, 2017.
  """
  rmax = 10000
  M = 0.001
  consLen = len(consIdx)

  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # state range
  Sr = range(len(S))
  # action range
  Ar = range(len(A))
  
  # decision variables
  x = m.new((len(S), len(A)), lb=0, name='x')
  z = m.new(consLen, vtype=bool, name='z')
  #z = [0, 1, 0] # test for office nav domain
  t = m.new(name='t')
  
  # flow conservation
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    # and make sure there is no flow back from the terminal states
    if not terminal(S[sp]):
      m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[s]))) for s in Sr for a in Ar) == (S[sp] == s0))
      #print S[sp], [(S[s], A[a]) for s in Sr for a in Ar if T(S[s], A[a], S[sp]) > 0]
    else:
      m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[sp]))) for s in Sr for a in Ar) == (S[sp] == s0))

  # t is the lower bound of the difference between x and y
  # note: i don't think expressions in constraints can call other functions
  for y in domPis:
    # note that y is indexed by elements in S x A, not numbered indices
    m.constrain(sum(x[s, a] * r(S[s], A[a]) for s in Sr for a in Ar) -\
                sum(y[S[s], A[a]] *
                    (r(S[s], A[a]) + sum(- rmax * (S[s][consIdx[i]] != s0[consIdx[i]]) * z[i] for i in range(consLen)))\
                    for s in Sr for a in Ar)\
                >= t)
   
  for s in Sr:
    for i in range(consLen):
      if S[s][consIdx[i]] != s0[consIdx[i]]:
        for a in Ar:
          m.constrain(z[i] + M * x[s, a] <= 1)

  # obj
  obj = m.maximize(t)
  
  print m[z]
  
  return obj, {(S[s], A[a]): m[x][s, a] for s in Sr for a in Ar}
예제 #13
0
def solve_GT_LP_relax_noisy_CPLEX(A, y, lambda_reg, do_binary=False):
    ''' solve the LP formulation of noisy boolean 
    compressed sensing using CPLEX'''

    M, N = A.shape

    assert np.linalg.norm(y - y**2) <= 1e-10, "Inputs must be binary"

    inds_1 = np.where(y == 1)[0]
    inds_0 = np.where(y == 0)[0]
    A1 = A[inds_1, :]
    A0 = A[inds_0, :]
    ###  introduce random small perturbations to avoid degenerate solutions
    w_pert_x = np.ones(N) + 0.001 * np.random.rand(N)
    w_pert_xi = np.ones(M) + 0.001 * np.random.rand(M, 1)

    ### try cplex directy:
    M1 = len(inds_1)
    M0 = len(inds_0)

    w_pert_xi_pos = w_pert_xi[inds_1]
    w_pert_xi_neg = w_pert_xi[inds_0]

    m = CPlexModel(verbosity=0)

    if do_binary:  ### solve the binary problem using cuts + branch and bound
        x_sdp = m.new(N, vtype='bool', lb=0, ub=1)
        xi_sdp0 = m.new(M0, vtype='bool', lb=0)
        xi_sdp1 = m.new(M1, vtype='bool', lb=0)
    else:
        x_sdp = m.new(N, vtype='real', lb=0, ub=1)
        xi_sdp0 = m.new(M0, vtype='real', lb=0)
        xi_sdp1 = m.new(M1, vtype='real', lb=0)
    m.constrain(A1 * x_sdp + xi_sdp1 >= 1)
    m.constrain(A0 * x_sdp == 0 + xi_sdp0)
    value = m.minimize(x_sdp.sum() + lambda_reg *
                       (xi_sdp0.sum() + xi_sdp1.sum()))
    #m.minimize(w_pert_x*x_sdp + lambda_reg*w_pert_xi*xi_sdp)
    x_hat = m[x_sdp]
    xi_hat0 = m[xi_sdp0]
    xi_hat1 = m[xi_sdp1]
    xi_hat = np.zeros(M)
    xi_hat[inds_0] = xi_hat0
    xi_hat[inds_1] = xi_hat1
    return x_hat, xi_hat
예제 #14
0
def lpDualCPLEX(mdp,
                zeroConstraints=(),
                positiveConstraints=(),
                positiveConstraintsOcc=1):
    """
  DEPRECATED since we moved to gurobi. but leave the function here for sanity check
  Solve the dual problem of lp, maybe with some constraints
  Same arguments
  Note that this is a lower level function that does not consider feature extraction.
  r should be a reward function, not a reward parameter.
  """
    S = mdp.S
    A = mdp.A
    T = mdp.T
    r = mdp.r
    gamma = mdp.gamma
    alpha = mdp.alpha

    m = CPlexModel()
    if not config.VERBOSE: m.setVerbosity(0)

    # useful constants
    Sr = range(len(S))
    Ar = range(len(A))

    x = m.new((len(S), len(A)), lb=0, name='x')

    # make sure x is a valid occupancy
    for sp in Sr:
        # x (x(s) - \gamma * T) = \sigma
        m.constrain(
            sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]))
                for s in Sr for a in Ar) == alpha(S[sp]))

    # == constraints
    if len(zeroConstraints) > 0:
        m.constrain(
            sum(x[S.index(s), A.index(a)] for s, a in zeroConstraints) == 0)

    # >= constraints
    if len(positiveConstraints) > 0:
        m.constrain(
            sum(x[S.index(s), A.index(a)]
                for s, a in positiveConstraints) >= positiveConstraintsOcc)

    # obj
    try:
        obj = m.maximize(sum([x[s, a] * r(S[s], A[a]) for s in Sr
                              for a in Ar]))
    except CPlexException as err:
        print 'Exception', err
        # we return obj value as None and occ measure as {}. this should be handled correctly
        return {'feasible': False}

    return {
        'feasible': True,
        'obj': obj,
        'pi': {(S[s], A[a]): m[x][s, a]
               for s in Sr for a in Ar}
    }
예제 #15
0
파일: lp.py 프로젝트: shunzh/RLCodeBase
def decomposePiLP(S, A, T, s0, terminal, rawX, x, gamma=1):
  """
  DEPRECATED.
  This tries to decouple a policy into the optimal policy (following no constraints) and another policy \pi'.
  \pi' may be a dominating policy.
  Described in Eq. 2 on Aug.29, 2017.
  """
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))
  Ar = range(len(A))
 
  y = m.new((len(S), len(A)), lb=0, name='y')
  sigma = m.new(lb=0, ub=1, name='sigma')
  
  for s in Sr:
    for a in Ar:
      # note that x and rawX use S x A as domains
      m.constrain(sigma * rawX[S[s], A[a]] + y[s, a] == x[S[s], A[a]])

  # make sure y is a valid occupancy
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    # and make sure there is no flow back from the terminal states
    if not terminal(S[sp]):
      m.constrain(sum(y[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[s]))) for s in Sr for a in Ar) == (1 - sigma) * (S[sp] == s0))
    else:
      m.constrain(sum(y[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[sp]))) for s in Sr for a in Ar) == (1 - sigma) * (S[sp] == s0))
 
  obj = m.maximize(sigma)
  
  # return sigma and the value of y
  return obj, {(S[s], A[a]): m[y][s, a] for s in Sr for a in Ar}
예제 #16
0
def decomposePiLP(S, A, T, s0, terminal, rawX, x, gamma=1):
  """
  This tries to decouple a policy into the optimal policy (following no constraints) and another policy \pi'.
  \pi' may be a dominating policy.
  Described in Eq. 2 on Aug.29, 2017.
  """
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))
  Ar = range(len(A))
 
  y = m.new((len(S), len(A)), lb=0, name='y')
  sigma = m.new(lb=0, ub=1, name='sigma')
  
  for s in Sr:
    for a in Ar:
      # note that x and rawX use S x A as domains
      m.constrain(sigma * rawX[S[s], A[a]] + y[s, a] == x[S[s], A[a]])

  # make sure y is a valid occupancy
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    # and make sure there is no flow back from the terminal states
    if not terminal(S[sp]):
      m.constrain(sum(y[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[s]))) for s in Sr for a in Ar) == (1 - sigma) * (S[sp] == s0))
    else:
      m.constrain(sum(y[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[sp]))) for s in Sr for a in Ar) == (1 - sigma) * (S[sp] == s0))
 
  obj = m.maximize(sigma)
  
  # return sigma and the value of y
  return obj, {(S[s], A[a]): m[y][s, a] for s in Sr for a in Ar}
예제 #17
0
파일: lp.py 프로젝트: shunzh/RLCodeBase
def lpDualCPLEX(mdp, zeroConstraints=[], positiveConstraints=[], positiveConstraintsOcc=1):
  """
  Solve the dual problem of lp, maybe with some constraints
  Same arguments

  Note that this is a lower level function that does not consider feature extraction.
  r should be a reward function, not a reward parameter.
  """

  S = mdp.S
  A = mdp.A
  T = mdp.T
  r = mdp.r
  gamma = mdp.gamma
  alpha = mdp.alpha

  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))
  Ar = range(len(A))

  x = m.new((len(S), len(A)), lb=0, name='x')

  # make sure x is a valid occupancy
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp])) for s in Sr for a in Ar) == alpha(S[sp]))

  # == constraints
  if len(zeroConstraints) > 0:
    m.constrain(sum(x[S.index(s), A.index(a)] for s, a in zeroConstraints) == 0)

  # >= constraints
  if len(positiveConstraints) > 0:
    m.constrain(sum(x[S.index(s), A.index(a)] for s, a in positiveConstraints) >= positiveConstraintsOcc)

  # obj
  try:
    obj = m.maximize(sum([x[s, a] * r(S[s], A[a]) for s in Sr for a in Ar]))
  except CPlexException as err:
    print 'Exception', err
    # we return obj value as None and occ measure as {}. this should be handled correctly
    return {'feasible': False}

  return {'feasible': True, 'obj': obj, 'pi': {(S[s], A[a]): m[x][s, a] for s in Sr for a in Ar}}
예제 #18
0
    def create_problem(self, data):

        host_attacked = data["attacked"]
        host_executing = data["executing"]
        metrics_used = data["metric"]
        damage_used = data["damage"]
        responses_used = data["response"]

        numResp = len(responses_used)
        damageMapper = {}
        costMapper = {}
        conflictMapper = []
        responseList = []

        for response in responses_used:
            responseList.append(response.name)

        logging.info('Responses Used: %s', numResp)

        if damage_used is not None:
            for elem in damage_used:
                damageMapper[elem.name] = elem.value

        for elem in metrics_used:
            costMapper[elem.name] = []

        costs = []
        for response in responses_used:
            cost = 0
            for r in response.metrics:
                cost = cost + r.value
                costMapper[r.name].append(cost)
            costs.append(cost)
            if response.conflicting_responses:
                for r in response.conflicting_responses:
                    tmp = [0] * numResp
                    tmp[responseList.index(response.name)] = 1
                    tmp[responseList.index(r)] = 1
                    conflictMapper.append(tmp)
        costs = np.array(costs)
        hostMatrix = []
        for host in host_attacked:
            hostRow = []
            host = host.name
            for response in responses_used:
                if host in response.dest:
                    hostRow.append(1)
                else:
                    hostRow.append(0)
            hostMatrix.append(hostRow)

        m = CPlexModel(verbosity=0)

        # Each Response can only be executed once
        x = m.new(numResp, vtype='bool', name='x')

        i = 0
        # all attacked hosts are freed
        for elem in hostMatrix:
            elemArr = np.array(elem)
            m.constrain(sum(x.mult(elemArr)) >= 1)
            i = i + 1

        logging.info('Freed Constraints: %s', i)

        i = 0
        # all single metrics used have to be below damage
        for key, elem in damageMapper.iteritems():
            elemArr = np.array(costMapper[key])
            m.constrain(sum(x.mult(elemArr)) <= elem)
            i = i + 1

        logging.info('Damage Constraints: %s', i)

        i = 0
        # no conflicting actions are executed
        for elem in conflictMapper:
            elemArr = np.array(elem)
            m.constrain(sum(x.mult(elemArr)) <= 1)
            i = i + 1

        logging.info('Conflicting Constraints: %s', i)

        return [m, x, costs]
예제 #19
0
파일: cplexfun.py 프로젝트: lavanyaj/pycpx
startAssign[19,33] = 1
startAssign[31,34] = 1
startAssign[36,34] = 1
startAssign[2,35] = 1
startAssign[24,35] = 1
startAssign[25,36] = 1
startAssign[35,36] = 1
startAssign[22,37] = 1
startAssign[14,38] = 1
startAssign[8,39] = 1
startAssign[16,39] = 1


for b in range(numBins):
    totalSize = sum([assign[i,b] * sizeItems[i] for i in range(numItems)])
    m.constrain(totalSize <= sizeBin)
    upperBound = sum(sizeItems)
    m.constrain(totalSize <= binUsed[b] * upperBound)
    m.constrain(binUsed[b] <= totalSize)
    pass

# each item assigned to exactly one bin
for i in range(numItems):
    numBinsForItem = sum([assign[i,b] for b in range(numBins)])
    m.constrain(numBinsForItem == 1)
    pass

numBinsUsed = sum(binUsed)

try:
    m.minimize(numBinsUsed, starting_dict={assign:startAssign}, emphasis=1,\
예제 #20
0
    def create_problem(self, data): 

        host_attacked = data["attacked"]
        host_executing = data["executing"]
        metrics_used = data["metric"]
        damage_used = data["damage"]
        responses_used = data["response"]

        numResp = len(responses_used)
        damageMapper = {}
        costMapper = {}
        conflictMapper = []
        responseList = []

        for response in responses_used:
            responseList.append(response.name)

        logging.info('Responses Used: %s', numResp)

        if damage_used is not None:
            for elem in damage_used:
                damageMapper[elem.name] = elem.value

        for elem in metrics_used:
            costMapper[elem.name] = []
        
        costs = []
        for response in responses_used:
            cost = 0
            for r in response.metrics:
                cost = cost + r.value
                costMapper[r.name].append(cost)
            costs.append(cost)
            if response.conflicting_responses:
                for r in response.conflicting_responses:
                    tmp = [0] * numResp
                    tmp[responseList.index(response.name)] = 1
                    tmp[responseList.index(r)] = 1
                    conflictMapper.append(tmp)
        costs = np.array(costs)
        hostMatrix = []
        for host in host_attacked:
            hostRow = []
            host = host.name
            for response in responses_used:
                if host in response.dest:
                    hostRow.append(1)
                else:
                    hostRow.append(0)
            hostMatrix.append(hostRow)    

        m = CPlexModel(verbosity = 0)
        
        # Each Response can only be executed once
        x = m.new(numResp,vtype='bool', name='x')


        i = 0
        # all attacked hosts are freed
        for elem in hostMatrix:
            elemArr = np.array(elem)
            m.constrain(sum(x.mult(elemArr)) >= 1)
            i = i + 1

        logging.info('Freed Constraints: %s', i)

        i = 0
        # all single metrics used have to be below damage
        for key, elem in damageMapper.iteritems():
            elemArr = np.array(costMapper[key])
            m.constrain(sum(x.mult(elemArr)) <= elem)
            i = i + 1

        logging.info('Damage Constraints: %s', i)

        i = 0
        # no conflicting actions are executed
        for elem in conflictMapper:
            elemArr = np.array(elem)
            m.constrain(sum(x.mult(elemArr)) <= 1)
            i = i + 1
        
        logging.info('Conflicting Constraints: %s', i)

        return [m, x, costs]