Esempi in Python per CPlexModel.maximize, esempi in Python per pycpx.CPlexModel.maximize

Esempio n. 1

0

Mostra file

File: milp.py Progetto: maryamb/code_samples

def opt(e_init, e_target, hrv_hist, hrv_en):
    # verbosity is how much log is reported back from CPlex. 3 is the most verbose
    verbosity = 3
    m = CPlexModel(verbosity)
    b = m.new((epochs_per_day, nodes, mod_levels),
              vtype=int,
              lb=0,
              ub=1,
              name='b')
    l = m.new((epochs_per_day, nodes, bin_num),
              vtype=float,
              lb=-1,
              ub=battery_cap,
              name='l')
    fixed_prob = np.linespace(0, 1, num=bin_num, endpoint=True, dtype=float)
    e_init_hist = np.zeros(e_init.shape, dtype=float)
    e_init_hist[:, 0] = 1
    hist_rv = np.zeros((epochs_per_day, nodes, bin_num), dtype=float)
    hist_rv[0] = e_init_hist
    # prepare the energy vector here
    en_rv = np.zeros((epochs_per_day, nodes, bin_num), dtype=float)
    en_rv[0] = e_init
    for i in xrange(1, epochs_per_day):
        en_rv[i], hist_rv[i] = next_battery_level(en_rv[i - 1], hist_rv[i - 1],\
        hrv_en[i, :, :] - (np.vectorize(energy))(b[i, :]), hrv_hist[i, :, :])
        m.constrain(en_rv[i] >= 0)
        m.constrain(sum(np.vectorize(time)(b[i, :])) <= D)
    m.maximize(objective_function(en_rv[-1], hist_rv[-1]))
    return m

Esempio n. 2

0

Mostra file

File: lp.py Progetto: shunzh/RLCodeBase

def decomposePiLP(S, A, T, s0, terminal, rawX, x, gamma=1):
  """
  DEPRECATED.
  This tries to decouple a policy into the optimal policy (following no constraints) and another policy \pi'.
  \pi' may be a dominating policy.
  Described in Eq. 2 on Aug.29, 2017.
  """
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))
  Ar = range(len(A))
 
  y = m.new((len(S), len(A)), lb=0, name='y')
  sigma = m.new(lb=0, ub=1, name='sigma')
  
  for s in Sr:
    for a in Ar:
      # note that x and rawX use S x A as domains
      m.constrain(sigma * rawX[S[s], A[a]] + y[s, a] == x[S[s], A[a]])

  # make sure y is a valid occupancy
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    # and make sure there is no flow back from the terminal states
    if not terminal(S[sp]):
      m.constrain(sum(y[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[s]))) for s in Sr for a in Ar) == (1 - sigma) * (S[sp] == s0))
    else:
      m.constrain(sum(y[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[sp]))) for s in Sr for a in Ar) == (1 - sigma) * (S[sp] == s0))
 
  obj = m.maximize(sigma)
  
  # return sigma and the value of y
  return obj, {(S[s], A[a]): m[y][s, a] for s in Sr for a in Ar}

Esempio n. 3

0

Mostra file

File: lp.py Progetto: ashdtu/constrained-MDP

def decomposePiLP(S, A, T, s0, terminal, rawX, x, gamma=1):
  """
  This tries to decouple a policy into the optimal policy (following no constraints) and another policy \pi'.
  \pi' may be a dominating policy.
  Described in Eq. 2 on Aug.29, 2017.
  """
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))
  Ar = range(len(A))
 
  y = m.new((len(S), len(A)), lb=0, name='y')
  sigma = m.new(lb=0, ub=1, name='sigma')
  
  for s in Sr:
    for a in Ar:
      # note that x and rawX use S x A as domains
      m.constrain(sigma * rawX[S[s], A[a]] + y[s, a] == x[S[s], A[a]])

  # make sure y is a valid occupancy
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    # and make sure there is no flow back from the terminal states
    if not terminal(S[sp]):
      m.constrain(sum(y[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[s]))) for s in Sr for a in Ar) == (1 - sigma) * (S[sp] == s0))
    else:
      m.constrain(sum(y[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[sp]))) for s in Sr for a in Ar) == (1 - sigma) * (S[sp] == s0))
 
  obj = m.maximize(sigma)
  
  # return sigma and the value of y
  return obj, {(S[s], A[a]): m[y][s, a] for s in Sr for a in Ar}

Esempio n. 4

0

Mostra file

def lpDualCPLEX(mdp,
                zeroConstraints=(),
                positiveConstraints=(),
                positiveConstraintsOcc=1):
    """
  DEPRECATED since we moved to gurobi. but leave the function here for sanity check
  Solve the dual problem of lp, maybe with some constraints
  Same arguments
  Note that this is a lower level function that does not consider feature extraction.
  r should be a reward function, not a reward parameter.
  """
    S = mdp.S
    A = mdp.A
    T = mdp.T
    r = mdp.r
    gamma = mdp.gamma
    alpha = mdp.alpha

    m = CPlexModel()
    if not config.VERBOSE: m.setVerbosity(0)

    # useful constants
    Sr = range(len(S))
    Ar = range(len(A))

    x = m.new((len(S), len(A)), lb=0, name='x')

    # make sure x is a valid occupancy
    for sp in Sr:
        # x (x(s) - \gamma * T) = \sigma
        m.constrain(
            sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]))
                for s in Sr for a in Ar) == alpha(S[sp]))

    # == constraints
    if len(zeroConstraints) > 0:
        m.constrain(
            sum(x[S.index(s), A.index(a)] for s, a in zeroConstraints) == 0)

    # >= constraints
    if len(positiveConstraints) > 0:
        m.constrain(
            sum(x[S.index(s), A.index(a)]
                for s, a in positiveConstraints) >= positiveConstraintsOcc)

    # obj
    try:
        obj = m.maximize(sum([x[s, a] * r(S[s], A[a]) for s in Sr
                              for a in Ar]))
    except CPlexException as err:
        print 'Exception', err
        # we return obj value as None and occ measure as {}. this should be handled correctly
        return {'feasible': False}

    return {
        'feasible': True,
        'obj': obj,
        'pi': {(S[s], A[a]): m[x][s, a]
               for s in Sr for a in Ar}
    }

Esempio n. 5

0

Mostra file

File: lwa.py Progetto: luvagarwal/TerroristPlotDetection

def lwaLP(graph_object):
    capabilites = graph_object.capabilities
    no_vertices = graph_object.n
    attacker_strategy = list()
    m = CPlexModel()
    cv = m.new(no_vertices, vtype=float, ub=1, lb=0)
    U = m.new(vtype=float)
    diag = np.diag(capabilites)
    m.constrain(U <= -diag * (1 - cv))
    m.constrain(sum(cv) <= graph_object.R)
    m.maximize(U)
    for i in xrange(m[cv]):
        if m[cv][i] > 0:
            attacker_strategy.append(i)
    start = findStrategySet(m[cv], graph_object.R)
    return start, attacker_strategy

Esempio n. 6

0

Mostra file

File: lp.py Progetto: shunzh/RLCodeBase

def domPiMilp(S, A, r, T, s0, terminal, domPis, consIdx, gamma=1):
  """
  Finding dominating policies by representing constraints as possible negative rewards.
  Described in the report on aug.19, 2017.
  """
  rmax = 10000
  M = 0.001
  consLen = len(consIdx)

  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # state range
  Sr = range(len(S))
  # action range
  Ar = range(len(A))
  
  # decision variables
  x = m.new((len(S), len(A)), lb=0, name='x')
  z = m.new(consLen, vtype=bool, name='z')
  #z = [0, 1, 0] # test for office nav domain
  t = m.new(name='t')
  
  # flow conservation
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    # and make sure there is no flow back from the terminal states
    if not terminal(S[sp]):
      m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[s]))) for s in Sr for a in Ar) == (S[sp] == s0))
      #print S[sp], [(S[s], A[a]) for s in Sr for a in Ar if T(S[s], A[a], S[sp]) > 0]
    else:
      m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[sp]))) for s in Sr for a in Ar) == (S[sp] == s0))

  # t is the lower bound of the difference between x and y
  # note: i don't think expressions in constraints can call other functions
  for y in domPis:
    # note that y is indexed by elements in S x A, not numbered indices
    m.constrain(sum(x[s, a] * r(S[s], A[a]) for s in Sr for a in Ar) -\
                sum(y[S[s], A[a]] *
                    (r(S[s], A[a]) + sum(- rmax * (S[s][consIdx[i]] != s0[consIdx[i]]) * z[i] for i in range(consLen)))\
                    for s in Sr for a in Ar)\
                >= t)
   
  for s in Sr:
    for i in range(consLen):
      if S[s][consIdx[i]] != s0[consIdx[i]]:
        for a in Ar:
          m.constrain(z[i] + M * x[s, a] <= 1)

  # obj
  obj = m.maximize(t)
  
  print m[z]
  
  return obj, {(S[s], A[a]): m[x][s, a] for s in Sr for a in Ar}

Esempio n. 7

0

Mostra file

File: lp.py Progetto: shunzh/RLCodeBase

def milp(S, A, R, T, s0, psi, maxV):
  """
  Solve the MILP problem in greedy construction of policy query
  
  Args:
    S: state set
    A: action set
    R: reward candidate set
    T: transition function
    s0: init state
    psi: prior belief on rewards
    maxV: maxV[i] = max_{\pi \in q} V_{r_i}^\pi
  """
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  rLen = len(R)
  M = 10000 # a large number
  Sr = range(len(S))
  Ar = range(len(A))
  
  # decision variables
  # FIXME i removed upper bound of x. it shoundn't have such bound without transient-state assumption, right?
  x = m.new((len(S), len(A)), lb=0, name='x')
  z = m.new(rLen, vtype=bool, name='z')
  y = m.new(rLen, name='y')

  # constraints on y
  m.constrain([y[i] <= sum([x[s, a] * R[i](S[s], A[a]) for s in Sr for a in Ar]) - maxV[i] + (1 - z[i]) * M for i in xrange(rLen)])
  m.constrain([y[i] <= z[i] * M for i in xrange(rLen)])
  
  # constraints on x (valid occupancy)
  for sp in Sr:
    if S[sp] == s0:
      m.constrain(sum([x[sp, ap] for ap in Ar]) == 1)
    else:
      m.constrain(sum([x[sp, ap] for ap in Ar]) == sum([x[s, a] * T(S[s], A[a], S[sp]) for s in Sr for a in Ar]))
  
  # obj
  obj = m.maximize(sum([psi[i] * y[i] for i in xrange(rLen)]))

  if config.VERBOSE:
    print 'obj', obj
    print 'x', m[x]
    print 'y', m[y]
    print 'z', m[z]
  
  # build occupancy as S x A -> x[.,.]
  # z[i] == 1 then this policy is better than maxV on the i-th reward candidate
  res = util.Counter()
  for s in Sr:
    for a in Ar:
      res[S[s], A[a]] = m[x][s, a] 
  return res

Esempio n. 8

0

Mostra file

File: lp.py Progetto: ashdtu/constrained-MDP

def domPiMilp(S, A, r, T, s0, terminal, domPis, consIdx, gamma=1):
  """
  Finding dominating policies by representing constraints as possible negative rewards.
  Described in the report on aug.19, 2017.
  """
  rmax = 10000
  M = 0.001
  consLen = len(consIdx)

  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # state range
  Sr = range(len(S))
  # action range
  Ar = range(len(A))
  
  # decision variables
  x = m.new((len(S), len(A)), lb=0, name='x')
  z = m.new(consLen, vtype=bool, name='z')
  #z = [0, 1, 0] # test for office nav domain
  t = m.new(name='t')
  
  # flow conservation
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    # and make sure there is no flow back from the terminal states
    if not terminal(S[sp]):
      m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[s]))) for s in Sr for a in Ar) == (S[sp] == s0))
      #print S[sp], [(S[s], A[a]) for s in Sr for a in Ar if T(S[s], A[a], S[sp]) > 0]
    else:
      m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[sp]))) for s in Sr for a in Ar) == (S[sp] == s0))

  # t is the lower bound of the difference between x and y
  # note: i don't think expressions in constraints can call other functions
  for y in domPis:
    # note that y is indexed by elements in S x A, not numbered indices
    m.constrain(sum(x[s, a] * r(S[s], A[a]) for s in Sr for a in Ar) -\
                sum(y[S[s], A[a]] *
                    (r(S[s], A[a]) + sum(- rmax * (S[s][consIdx[i]] != s0[consIdx[i]]) * z[i] for i in range(consLen)))\
                    for s in Sr for a in Ar)\
                >= t)
   
  for s in Sr:
    for i in range(consLen):
      if S[s][consIdx[i]] != s0[consIdx[i]]:
        for a in Ar:
          m.constrain(z[i] + M * x[s, a] <= 1)

  # obj
  obj = m.maximize(t)
  
  print m[z]
  
  return obj, {(S[s], A[a]): m[x][s, a] for s in Sr for a in Ar}

Esempio n. 9

0

Mostra file

File: lp.py Progetto: ashdtu/constrained-MDP

def milp(S, A, R, T, s0, psi, maxV):
  """
  Solve the MILP problem in greedy construction of policy query
  
  Args:
    S: state set
    A: action set
    R: reward candidate set
    T: transition function
    s0: init state
    psi: prior belief on rewards
    maxV: maxV[i] = max_{\pi \in q} V_{r_i}^\pi
  """
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  rLen = len(R)
  M = 10000 # a large number
  Sr = range(len(S))
  Ar = range(len(A))
  
  # decision variables
  # FIXME i removed upper bound of x. it shoundn't have such bound without transient-state assumption, right?
  x = m.new((len(S), len(A)), lb=0, name='x')
  z = m.new(rLen, vtype=bool, name='z')
  y = m.new(rLen, name='y')

  # constraints on y
  m.constrain([y[i] <= sum([x[s, a] * R[i](S[s], A[a]) for s in Sr for a in Ar]) - maxV[i] + (1 - z[i]) * M for i in xrange(rLen)])
  m.constrain([y[i] <= z[i] * M for i in xrange(rLen)])
  
  # constraints on x (valid occupancy)
  for sp in Sr:
    if S[sp] == s0:
      m.constrain(sum([x[sp, ap] for ap in Ar]) == 1)
    else:
      m.constrain(sum([x[sp, ap] for ap in Ar]) == sum([x[s, a] * T(S[s], A[a], S[sp]) for s in Sr for a in Ar]))
  
  # obj
  obj = m.maximize(sum([psi[i] * y[i] for i in xrange(rLen)]))

  if config.VERBOSE:
    print 'obj', obj
    print 'x', m[x]
    print 'y', m[y]
    print 'z', m[z]
  
  # build occupancy as S x A -> x[.,.]
  # z[i] == 1 then this policy is better than maxV on the i-th reward candidate
  res = util.Counter()
  for s in Sr:
    for a in Ar:
      res[S[s], A[a]] = m[x][s, a] 
  return res

Esempio n. 10

0

Mostra file

File: findRewardUncertainDomPis.py Progetto: shunzh/RLCodeBase

def findUndominatedReward(mdpH, mdpR, newPi, humanPi, localDifferentPis,
                          domPis):
    """
  Implementation of the linear programming problem (Eq.2) in report 12.5
  Returns the objective value and a reward function (which is only useful when the obj value is > 0)
  
  newPi is \hat{\pi} in the linear programming problem in the report.
  The robot tries to see if there exists a reward function where newPi is better than the best policy in domPis.
  """
    m = CPlexModel()
    if not config.VERBOSE: m.setVerbosity(0)

    S = mdpH.S
    robotA = mdpR.A
    humanA = mdpH.A

    # index of states and actions
    Sr = range(len(S))
    robotAr = range(len(robotA))
    humanAr = range(len(humanA))

    r = m.new(len(S), lb=0, ub=1, name='r')
    z = m.new(
        name='z'
    )  # when the optimal value is attained, z = \max_{domPi \in domPis} V^{domPi}_r

    for domPi in domPis:
        m.constrain(z >= sum(
            [domPi[S[s], robotA[a]] * r[s] for s in Sr for a in robotAr]))

    # make sure r is consistent with humanPi
    for s in S:
        for a in humanA:
            # humanPi is better than a locally different policy which takes action a in state a
            m.constrain(sum(sum((humanPi[S[sp], humanA[ap]] - localDifferentPis[s, a][S[sp], humanA[ap]]) for ap in humanAr)\
                            * r[sp] for sp in Sr) >= 0)

    # maxi_r { V^{newPi}_r - \max_{domPi \in domPis} V^{domPi}_r }
    cplexObj = m.maximize(
        sum(newPi[S[s], robotA[a]] * r[s] for s in Sr for a in robotAr) - z)

    obj = sum([newPi[S[s], robotA[a]] * m[r][s] for s in Sr
               for a in robotAr]) - m[z]

    # the reward function has the same values for same states, but need to convert back to the S x A space
    rFunc = lambda s, a: m[r][Sr.index(s)]

    print 'cplexobj', cplexObj
    print 'obj', obj
    print 'newPi'
    printPi(newPi)
    print 'z', m[z], 'r', m[r]

    return obj, rFunc

Esempio n. 11

0

Mostra file

File: findRewardUncertainDomPis.py Progetto: shunzh/RLCodeBase

def findUndominatedReward(mdpH, mdpR, newPi, humanPi, localDifferentPis, domPis):
  """
  Implementation of the linear programming problem (Eq.2) in report 12.5
  Returns the objective value and a reward function (which is only useful when the obj value is > 0)
  
  newPi is \hat{\pi} in the linear programming problem in the report.
  The robot tries to see if there exists a reward function where newPi is better than the best policy in domPis.
  """
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)
  
  S = mdpH.S
  robotA = mdpR.A
  humanA = mdpH.A

  # index of states and actions
  Sr = range(len(S))
  robotAr = range(len(robotA))
  humanAr = range(len(humanA))
  
  r = m.new(len(S), lb=0, ub=1, name='r')
  z = m.new(name='z') # when the optimal value is attained, z = \max_{domPi \in domPis} V^{domPi}_r

  for domPi in domPis:
    m.constrain(z >= sum([domPi[S[s], robotA[a]] * r[s] for s in Sr for a in robotAr]))
  
  # make sure r is consistent with humanPi
  for s in S:
    for a in humanA:
      # humanPi is better than a locally different policy which takes action a in state a
      m.constrain(sum(sum((humanPi[S[sp], humanA[ap]] - localDifferentPis[s, a][S[sp], humanA[ap]]) for ap in humanAr)\
                      * r[sp] for sp in Sr) >= 0)
    
  # maxi_r { V^{newPi}_r - \max_{domPi \in domPis} V^{domPi}_r }
  cplexObj = m.maximize(sum(newPi[S[s], robotA[a]] * r[s] for s in Sr for a in robotAr) - z)
  
  obj = sum([newPi[S[s], robotA[a]] * m[r][s] for s in Sr for a in robotAr]) - m[z]

  # the reward function has the same values for same states, but need to convert back to the S x A space
  rFunc = lambda s, a: m[r][Sr.index(s)]

  print 'cplexobj', cplexObj
  print 'obj', obj
  print 'newPi'
  printPi(newPi)
  print 'z', m[z], 'r', m[r]

  return obj, rFunc

Esempio n. 12

0

Mostra file

File: lp.py Progetto: shunzh/RLCodeBase

def lpDualCPLEX(mdp, zeroConstraints=[], positiveConstraints=[], positiveConstraintsOcc=1):
  """
  Solve the dual problem of lp, maybe with some constraints
  Same arguments

  Note that this is a lower level function that does not consider feature extraction.
  r should be a reward function, not a reward parameter.
  """

  S = mdp.S
  A = mdp.A
  T = mdp.T
  r = mdp.r
  gamma = mdp.gamma
  alpha = mdp.alpha

  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))
  Ar = range(len(A))

  x = m.new((len(S), len(A)), lb=0, name='x')

  # make sure x is a valid occupancy
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp])) for s in Sr for a in Ar) == alpha(S[sp]))

  # == constraints
  if len(zeroConstraints) > 0:
    m.constrain(sum(x[S.index(s), A.index(a)] for s, a in zeroConstraints) == 0)

  # >= constraints
  if len(positiveConstraints) > 0:
    m.constrain(sum(x[S.index(s), A.index(a)] for s, a in positiveConstraints) >= positiveConstraintsOcc)

  # obj
  try:
    obj = m.maximize(sum([x[s, a] * r(S[s], A[a]) for s in Sr for a in Ar]))
  except CPlexException as err:
    print 'Exception', err
    # we return obj value as None and occ measure as {}. this should be handled correctly
    return {'feasible': False}

  return {'feasible': True, 'obj': obj, 'pi': {(S[s], A[a]): m[x][s, a] for s in Sr for a in Ar}}