Ejemplo n.º 1
def decomposePiLP(S, A, T, s0, terminal, rawX, x, gamma=1):
  This tries to decouple a policy into the optimal policy (following no constraints) and another policy \pi'.
  \pi' may be a dominating policy.
  Described in Eq. 2 on Aug.29, 2017.
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))
  Ar = range(len(A))
  y = m.new((len(S), len(A)), lb=0, name='y')
  sigma = m.new(lb=0, ub=1, name='sigma')
  for s in Sr:
    for a in Ar:
      # note that x and rawX use S x A as domains
      m.constrain(sigma * rawX[S[s], A[a]] + y[s, a] == x[S[s], A[a]])

  # make sure y is a valid occupancy
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    # and make sure there is no flow back from the terminal states
    if not terminal(S[sp]):
      m.constrain(sum(y[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[s]))) for s in Sr for a in Ar) == (1 - sigma) * (S[sp] == s0))
      m.constrain(sum(y[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[sp]))) for s in Sr for a in Ar) == (1 - sigma) * (S[sp] == s0))
  obj = m.maximize(sigma)
  # return sigma and the value of y
  return obj, {(S[s], A[a]): m[y][s, a] for s in Sr for a in Ar}
Ejemplo n.º 2
def decomposePiLP(S, A, T, s0, terminal, rawX, x, gamma=1):
  This tries to decouple a policy into the optimal policy (following no constraints) and another policy \pi'.
  \pi' may be a dominating policy.
  Described in Eq. 2 on Aug.29, 2017.
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))
  Ar = range(len(A))
  y = m.new((len(S), len(A)), lb=0, name='y')
  sigma = m.new(lb=0, ub=1, name='sigma')
  for s in Sr:
    for a in Ar:
      # note that x and rawX use S x A as domains
      m.constrain(sigma * rawX[S[s], A[a]] + y[s, a] == x[S[s], A[a]])

  # make sure y is a valid occupancy
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    # and make sure there is no flow back from the terminal states
    if not terminal(S[sp]):
      m.constrain(sum(y[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[s]))) for s in Sr for a in Ar) == (1 - sigma) * (S[sp] == s0))
      m.constrain(sum(y[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[sp]))) for s in Sr for a in Ar) == (1 - sigma) * (S[sp] == s0))
  obj = m.maximize(sigma)
  # return sigma and the value of y
  return obj, {(S[s], A[a]): m[y][s, a] for s in Sr for a in Ar}
Ejemplo n.º 3
def opt(e_init, e_target, hrv_hist, hrv_en):
    # verbosity is how much log is reported back from CPlex. 3 is the most verbose
    verbosity = 3
    m = CPlexModel(verbosity)
    b = m.new((epochs_per_day, nodes, mod_levels),
    l = m.new((epochs_per_day, nodes, bin_num),
    fixed_prob = np.linespace(0, 1, num=bin_num, endpoint=True, dtype=float)
    e_init_hist = np.zeros(e_init.shape, dtype=float)
    e_init_hist[:, 0] = 1
    hist_rv = np.zeros((epochs_per_day, nodes, bin_num), dtype=float)
    hist_rv[0] = e_init_hist
    # prepare the energy vector here
    en_rv = np.zeros((epochs_per_day, nodes, bin_num), dtype=float)
    en_rv[0] = e_init
    for i in xrange(1, epochs_per_day):
        en_rv[i], hist_rv[i] = next_battery_level(en_rv[i - 1], hist_rv[i - 1],\
        hrv_en[i, :, :] - (np.vectorize(energy))(b[i, :]), hrv_hist[i, :, :])
        m.constrain(en_rv[i] >= 0)
        m.constrain(sum(np.vectorize(time)(b[i, :])) <= D)
    m.maximize(objective_function(en_rv[-1], hist_rv[-1]))
    return m
Ejemplo n.º 4
def milp(S, A, R, T, s0, psi, maxV):
  Solve the MILP problem in greedy construction of policy query
    S: state set
    A: action set
    R: reward candidate set
    T: transition function
    s0: init state
    psi: prior belief on rewards
    maxV: maxV[i] = max_{\pi \in q} V_{r_i}^\pi
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  rLen = len(R)
  M = 10000 # a large number
  Sr = range(len(S))
  Ar = range(len(A))
  # decision variables
  # FIXME i removed upper bound of x. it shoundn't have such bound without transient-state assumption, right?
  x = m.new((len(S), len(A)), lb=0, name='x')
  z = m.new(rLen, vtype=bool, name='z')
  y = m.new(rLen, name='y')

  # constraints on y
  m.constrain([y[i] <= sum([x[s, a] * R[i](S[s], A[a]) for s in Sr for a in Ar]) - maxV[i] + (1 - z[i]) * M for i in xrange(rLen)])
  m.constrain([y[i] <= z[i] * M for i in xrange(rLen)])
  # constraints on x (valid occupancy)
  for sp in Sr:
    if S[sp] == s0:
      m.constrain(sum([x[sp, ap] for ap in Ar]) == 1)
      m.constrain(sum([x[sp, ap] for ap in Ar]) == sum([x[s, a] * T(S[s], A[a], S[sp]) for s in Sr for a in Ar]))
  # obj
  obj = m.maximize(sum([psi[i] * y[i] for i in xrange(rLen)]))

  if config.VERBOSE:
    print 'obj', obj
    print 'x', m[x]
    print 'y', m[y]
    print 'z', m[z]
  # build occupancy as S x A -> x[.,.]
  # z[i] == 1 then this policy is better than maxV on the i-th reward candidate
  res = util.Counter()
  for s in Sr:
    for a in Ar:
      res[S[s], A[a]] = m[x][s, a] 
  return res
Ejemplo n.º 5
def domPiMilp(S, A, r, T, s0, terminal, domPis, consIdx, gamma=1):
  Finding dominating policies by representing constraints as possible negative rewards.
  Described in the report on aug.19, 2017.
  rmax = 10000
  M = 0.001
  consLen = len(consIdx)

  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # state range
  Sr = range(len(S))
  # action range
  Ar = range(len(A))
  # decision variables
  x = m.new((len(S), len(A)), lb=0, name='x')
  z = m.new(consLen, vtype=bool, name='z')
  #z = [0, 1, 0] # test for office nav domain
  t = m.new(name='t')
  # flow conservation
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    # and make sure there is no flow back from the terminal states
    if not terminal(S[sp]):
      m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[s]))) for s in Sr for a in Ar) == (S[sp] == s0))
      #print S[sp], [(S[s], A[a]) for s in Sr for a in Ar if T(S[s], A[a], S[sp]) > 0]
      m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[sp]))) for s in Sr for a in Ar) == (S[sp] == s0))

  # t is the lower bound of the difference between x and y
  # note: i don't think expressions in constraints can call other functions
  for y in domPis:
    # note that y is indexed by elements in S x A, not numbered indices
    m.constrain(sum(x[s, a] * r(S[s], A[a]) for s in Sr for a in Ar) -\
                sum(y[S[s], A[a]] *
                    (r(S[s], A[a]) + sum(- rmax * (S[s][consIdx[i]] != s0[consIdx[i]]) * z[i] for i in range(consLen)))\
                    for s in Sr for a in Ar)\
                >= t)
  for s in Sr:
    for i in range(consLen):
      if S[s][consIdx[i]] != s0[consIdx[i]]:
        for a in Ar:
          m.constrain(z[i] + M * x[s, a] <= 1)

  # obj
  obj = m.maximize(t)
  print m[z]
  return obj, {(S[s], A[a]): m[x][s, a] for s in Sr for a in Ar}
Ejemplo n.º 6
def milp(S, A, R, T, s0, psi, maxV):
  Solve the MILP problem in greedy construction of policy query
    S: state set
    A: action set
    R: reward candidate set
    T: transition function
    s0: init state
    psi: prior belief on rewards
    maxV: maxV[i] = max_{\pi \in q} V_{r_i}^\pi
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  rLen = len(R)
  M = 10000 # a large number
  Sr = range(len(S))
  Ar = range(len(A))
  # decision variables
  # FIXME i removed upper bound of x. it shoundn't have such bound without transient-state assumption, right?
  x = m.new((len(S), len(A)), lb=0, name='x')
  z = m.new(rLen, vtype=bool, name='z')
  y = m.new(rLen, name='y')

  # constraints on y
  m.constrain([y[i] <= sum([x[s, a] * R[i](S[s], A[a]) for s in Sr for a in Ar]) - maxV[i] + (1 - z[i]) * M for i in xrange(rLen)])
  m.constrain([y[i] <= z[i] * M for i in xrange(rLen)])
  # constraints on x (valid occupancy)
  for sp in Sr:
    if S[sp] == s0:
      m.constrain(sum([x[sp, ap] for ap in Ar]) == 1)
      m.constrain(sum([x[sp, ap] for ap in Ar]) == sum([x[s, a] * T(S[s], A[a], S[sp]) for s in Sr for a in Ar]))
  # obj
  obj = m.maximize(sum([psi[i] * y[i] for i in xrange(rLen)]))

  if config.VERBOSE:
    print 'obj', obj
    print 'x', m[x]
    print 'y', m[y]
    print 'z', m[z]
  # build occupancy as S x A -> x[.,.]
  # z[i] == 1 then this policy is better than maxV on the i-th reward candidate
  res = util.Counter()
  for s in Sr:
    for a in Ar:
      res[S[s], A[a]] = m[x][s, a] 
  return res
Ejemplo n.º 7
def domPiMilp(S, A, r, T, s0, terminal, domPis, consIdx, gamma=1):
  Finding dominating policies by representing constraints as possible negative rewards.
  Described in the report on aug.19, 2017.
  rmax = 10000
  M = 0.001
  consLen = len(consIdx)

  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # state range
  Sr = range(len(S))
  # action range
  Ar = range(len(A))
  # decision variables
  x = m.new((len(S), len(A)), lb=0, name='x')
  z = m.new(consLen, vtype=bool, name='z')
  #z = [0, 1, 0] # test for office nav domain
  t = m.new(name='t')
  # flow conservation
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    # and make sure there is no flow back from the terminal states
    if not terminal(S[sp]):
      m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[s]))) for s in Sr for a in Ar) == (S[sp] == s0))
      #print S[sp], [(S[s], A[a]) for s in Sr for a in Ar if T(S[s], A[a], S[sp]) > 0]
      m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]) * (not terminal(S[sp]))) for s in Sr for a in Ar) == (S[sp] == s0))

  # t is the lower bound of the difference between x and y
  # note: i don't think expressions in constraints can call other functions
  for y in domPis:
    # note that y is indexed by elements in S x A, not numbered indices
    m.constrain(sum(x[s, a] * r(S[s], A[a]) for s in Sr for a in Ar) -\
                sum(y[S[s], A[a]] *
                    (r(S[s], A[a]) + sum(- rmax * (S[s][consIdx[i]] != s0[consIdx[i]]) * z[i] for i in range(consLen)))\
                    for s in Sr for a in Ar)\
                >= t)
  for s in Sr:
    for i in range(consLen):
      if S[s][consIdx[i]] != s0[consIdx[i]]:
        for a in Ar:
          m.constrain(z[i] + M * x[s, a] <= 1)

  # obj
  obj = m.maximize(t)
  print m[z]
  return obj, {(S[s], A[a]): m[x][s, a] for s in Sr for a in Ar}
Ejemplo n.º 8
def findUndominatedReward(mdpH, mdpR, newPi, humanPi, localDifferentPis,
  Implementation of the linear programming problem (Eq.2) in report 12.5
  Returns the objective value and a reward function (which is only useful when the obj value is > 0)
  newPi is \hat{\pi} in the linear programming problem in the report.
  The robot tries to see if there exists a reward function where newPi is better than the best policy in domPis.
    m = CPlexModel()
    if not config.VERBOSE: m.setVerbosity(0)

    S = mdpH.S
    robotA = mdpR.A
    humanA = mdpH.A

    # index of states and actions
    Sr = range(len(S))
    robotAr = range(len(robotA))
    humanAr = range(len(humanA))

    r = m.new(len(S), lb=0, ub=1, name='r')
    z = m.new(
    )  # when the optimal value is attained, z = \max_{domPi \in domPis} V^{domPi}_r

    for domPi in domPis:
        m.constrain(z >= sum(
            [domPi[S[s], robotA[a]] * r[s] for s in Sr for a in robotAr]))

    # make sure r is consistent with humanPi
    for s in S:
        for a in humanA:
            # humanPi is better than a locally different policy which takes action a in state a
            m.constrain(sum(sum((humanPi[S[sp], humanA[ap]] - localDifferentPis[s, a][S[sp], humanA[ap]]) for ap in humanAr)\
                            * r[sp] for sp in Sr) >= 0)

    # maxi_r { V^{newPi}_r - \max_{domPi \in domPis} V^{domPi}_r }
    cplexObj = m.maximize(
        sum(newPi[S[s], robotA[a]] * r[s] for s in Sr for a in robotAr) - z)

    obj = sum([newPi[S[s], robotA[a]] * m[r][s] for s in Sr
               for a in robotAr]) - m[z]

    # the reward function has the same values for same states, but need to convert back to the S x A space
    rFunc = lambda s, a: m[r][Sr.index(s)]

    print 'cplexobj', cplexObj
    print 'obj', obj
    print 'newPi'
    print 'z', m[z], 'r', m[r]

    return obj, rFunc
Ejemplo n.º 9
def rewardUncertainMILP(S, A, R, T, s0, terminal, k, optV, gamma=1):
  The algorithm adapted from
  Viappiani, Paolo and Boutilier, CraigOptimal. set recommendations based on regret

  This algorithm would find the minimax-regret policy query in our problem.
  Not sure how to use this algorithm.
    m = CPlexModel()
    if not config.VERBOSE: m.setVerbosity(0)

    M = 100000

    # state range
    Sr = range(len(S))
    # action range
    Ar = range(len(A))

    mr = m.new(name='mr')
    # decision variables
    x = m.new((k, len(S), len(A)), lb=0, name='x')
    v = m.new((k, len(R)), name='v')
    I = m.new((k, len(R)), vtype=bool, name='I')

    for r in range(len(R)):
        m.constrain(mr >= sum(v[i, r]) for i in range(k))

    for r in range(len(R)):
        for i in range(k):
            m.constrain(v[i, r] >= optV[r] - sum(x[i, s, a] * R[r](S[s], A[a])
                                                 for s in Sr for a in Ar) +
                        (I[i, r] - 1) * M)

    # make sure x is a valid occupancy
    for i in range(k):
        for sp in Sr:
                sum(x[i, s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]))
                    for s in Sr for a in Ar) == (S[sp] == s0))

    for r in range(len(R)):
        m.constrain(sum(I[i, r] for i in range(k)) == 1)

    for r in range(len(R)):
        for i in range(k):
            m.constrain(v[i, r] >= 0)

    obj = m.minimize(mr)

    return obj, m[I]
Ejemplo n.º 10
def findUndominatedReward(mdpH, mdpR, newPi, humanPi, localDifferentPis, domPis):
  Implementation of the linear programming problem (Eq.2) in report 12.5
  Returns the objective value and a reward function (which is only useful when the obj value is > 0)
  newPi is \hat{\pi} in the linear programming problem in the report.
  The robot tries to see if there exists a reward function where newPi is better than the best policy in domPis.
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)
  S = mdpH.S
  robotA = mdpR.A
  humanA = mdpH.A

  # index of states and actions
  Sr = range(len(S))
  robotAr = range(len(robotA))
  humanAr = range(len(humanA))
  r = m.new(len(S), lb=0, ub=1, name='r')
  z = m.new(name='z') # when the optimal value is attained, z = \max_{domPi \in domPis} V^{domPi}_r

  for domPi in domPis:
    m.constrain(z >= sum([domPi[S[s], robotA[a]] * r[s] for s in Sr for a in robotAr]))
  # make sure r is consistent with humanPi
  for s in S:
    for a in humanA:
      # humanPi is better than a locally different policy which takes action a in state a
      m.constrain(sum(sum((humanPi[S[sp], humanA[ap]] - localDifferentPis[s, a][S[sp], humanA[ap]]) for ap in humanAr)\
                      * r[sp] for sp in Sr) >= 0)
  # maxi_r { V^{newPi}_r - \max_{domPi \in domPis} V^{domPi}_r }
  cplexObj = m.maximize(sum(newPi[S[s], robotA[a]] * r[s] for s in Sr for a in robotAr) - z)
  obj = sum([newPi[S[s], robotA[a]] * m[r][s] for s in Sr for a in robotAr]) - m[z]

  # the reward function has the same values for same states, but need to convert back to the S x A space
  rFunc = lambda s, a: m[r][Sr.index(s)]

  print 'cplexobj', cplexObj
  print 'obj', obj
  print 'newPi'
  print 'z', m[z], 'r', m[r]

  return obj, rFunc
Ejemplo n.º 11
def lp(S, A, r, T, s0):
  Solve the LP problem to find out the optimal occupancy

    S: state set
    A: action set
    r: reward
    T: transition function
    s0: init state
  m = CPlexModel()
  if not config.VERBOSE or config.DEBUG: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))

  v = m.new(len(S), name='v')

  for s in Sr:
    for a in A:
      m.constrain(v[s] >= r(S[s], a) + sum(v[sp] * T(S[s], a, S[sp]) for sp in Sr))

  # obj
  obj = m.minimize(v[s0])
  ret = util.Counter()
  for s in Sr:
    ret[S[s]] = m[v][s]
  return ret
Ejemplo n.º 12
def lpDualCPLEX(mdp,
  DEPRECATED since we moved to gurobi. but leave the function here for sanity check
  Solve the dual problem of lp, maybe with some constraints
  Same arguments
  Note that this is a lower level function that does not consider feature extraction.
  r should be a reward function, not a reward parameter.
    S = mdp.S
    A = mdp.A
    T = mdp.T
    r = mdp.r
    gamma = mdp.gamma
    alpha = mdp.alpha

    m = CPlexModel()
    if not config.VERBOSE: m.setVerbosity(0)

    # useful constants
    Sr = range(len(S))
    Ar = range(len(A))

    x = m.new((len(S), len(A)), lb=0, name='x')

    # make sure x is a valid occupancy
    for sp in Sr:
        # x (x(s) - \gamma * T) = \sigma
            sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp]))
                for s in Sr for a in Ar) == alpha(S[sp]))

    # == constraints
    if len(zeroConstraints) > 0:
            sum(x[S.index(s), A.index(a)] for s, a in zeroConstraints) == 0)

    # >= constraints
    if len(positiveConstraints) > 0:
            sum(x[S.index(s), A.index(a)]
                for s, a in positiveConstraints) >= positiveConstraintsOcc)

    # obj
        obj = m.maximize(sum([x[s, a] * r(S[s], A[a]) for s in Sr
                              for a in Ar]))
    except CPlexException as err:
        print 'Exception', err
        # we return obj value as None and occ measure as {}. this should be handled correctly
        return {'feasible': False}

    return {
        'feasible': True,
        'obj': obj,
        'pi': {(S[s], A[a]): m[x][s, a]
               for s in Sr for a in Ar}
Ejemplo n.º 13
def lp(S, A, r, T, s0):
  Solve the LP problem to find out the optimal occupancy
    S: state set
    A: action set
    r: reward
    T: transition function
    s0: init state
  m = CPlexModel()
  if not config.VERBOSE or config.DEBUG: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))
  v = m.new(len(S), name='v')

  for s in Sr:
    for a in A:
      m.constrain(v[s] >= r(S[s], a) + sum(v[sp] * T(S[s], a, S[sp]) for sp in Sr))
  # obj
  obj = m.minimize(v[s0])
  ret = util.Counter()
  for s in Sr:
    ret[S[s]] = m[v][s]
  return ret
Ejemplo n.º 14
def lwaLP(graph_object):
    capabilites = graph_object.capabilities
    no_vertices = graph_object.n
    attacker_strategy = list()
    m = CPlexModel()
    cv = m.new(no_vertices, vtype=float, ub=1, lb=0)
    U = m.new(vtype=float)
    diag = np.diag(capabilites)
    m.constrain(U <= -diag * (1 - cv))
    m.constrain(sum(cv) <= graph_object.R)
    for i in xrange(m[cv]):
        if m[cv][i] > 0:
    start = findStrategySet(m[cv], graph_object.R)
    return start, attacker_strategy
Ejemplo n.º 15
def rewardUncertainMILP(S, A, R, T, s0, terminal, k, optV, gamma=1):
  The algorithm adapted from
  Viappiani, Paolo and Boutilier, CraigOptimal. set recommendations based on regret

  This algorithm would find the minimax-regret policy query in our problem.
  Not sure how to use this algorithm.
  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  M = 100000

  # state range
  Sr = range(len(S))
  # action range
  Ar = range(len(A))
  mr = m.new(name='mr')
  # decision variables
  x = m.new((k, len(S), len(A)), lb=0, name='x')
  v = m.new((k, len(R)), name='v')
  I = m.new((k, len(R)), vtype=bool, name='I')
  for r in range(len(R)):
    m.constrain(mr >= sum(v[i, r]) for i in range(k))
  for r in range(len(R)):
    for i in range(k):
      m.constrain(v[i, r] >= optV[r] - sum(x[i, s, a] * R[r](S[s], A[a]) for s in Sr for a in Ar) + (I[i, r] - 1) * M)

  # make sure x is a valid occupancy
  for i in range(k):
    for sp in Sr:
      m.constrain(sum(x[i, s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp])) for s in Sr for a in Ar) == (S[sp] == s0))
  for r in range(len(R)):
    m.constrain(sum(I[i, r] for i in range(k)) == 1)
  for r in range(len(R)):
    for i in range(k):
      m.constrain(v[i, r] >= 0)
  obj = m.minimize(mr)
  return obj, m[I]
Ejemplo n.º 16
def solve_GT_LP_relax_noisy_CPLEX(A, y, lambda_reg, do_binary=False):
    ''' solve the LP formulation of noisy boolean 
    compressed sensing using CPLEX'''

    M, N = A.shape

    assert np.linalg.norm(y - y**2) <= 1e-10, "Inputs must be binary"

    inds_1 = np.where(y == 1)[0]
    inds_0 = np.where(y == 0)[0]
    A1 = A[inds_1, :]
    A0 = A[inds_0, :]
    ###  introduce random small perturbations to avoid degenerate solutions
    w_pert_x = np.ones(N) + 0.001 * np.random.rand(N)
    w_pert_xi = np.ones(M) + 0.001 * np.random.rand(M, 1)

    ### try cplex directy:
    M1 = len(inds_1)
    M0 = len(inds_0)

    w_pert_xi_pos = w_pert_xi[inds_1]
    w_pert_xi_neg = w_pert_xi[inds_0]

    m = CPlexModel(verbosity=0)

    if do_binary:  ### solve the binary problem using cuts + branch and bound
        x_sdp = m.new(N, vtype='bool', lb=0, ub=1)
        xi_sdp0 = m.new(M0, vtype='bool', lb=0)
        xi_sdp1 = m.new(M1, vtype='bool', lb=0)
        x_sdp = m.new(N, vtype='real', lb=0, ub=1)
        xi_sdp0 = m.new(M0, vtype='real', lb=0)
        xi_sdp1 = m.new(M1, vtype='real', lb=0)
    m.constrain(A1 * x_sdp + xi_sdp1 >= 1)
    m.constrain(A0 * x_sdp == 0 + xi_sdp0)
    value = m.minimize(x_sdp.sum() + lambda_reg *
                       (xi_sdp0.sum() + xi_sdp1.sum()))
    #m.minimize(w_pert_x*x_sdp + lambda_reg*w_pert_xi*xi_sdp)
    x_hat = m[x_sdp]
    xi_hat0 = m[xi_sdp0]
    xi_hat1 = m[xi_sdp1]
    xi_hat = np.zeros(M)
    xi_hat[inds_0] = xi_hat0
    xi_hat[inds_1] = xi_hat1
    return x_hat, xi_hat
Ejemplo n.º 17
def lpDualCPLEX(mdp, zeroConstraints=[], positiveConstraints=[], positiveConstraintsOcc=1):
  Solve the dual problem of lp, maybe with some constraints
  Same arguments

  Note that this is a lower level function that does not consider feature extraction.
  r should be a reward function, not a reward parameter.

  S = mdp.S
  A = mdp.A
  T = mdp.T
  r = mdp.r
  gamma = mdp.gamma
  alpha = mdp.alpha

  m = CPlexModel()
  if not config.VERBOSE: m.setVerbosity(0)

  # useful constants
  Sr = range(len(S))
  Ar = range(len(A))

  x = m.new((len(S), len(A)), lb=0, name='x')

  # make sure x is a valid occupancy
  for sp in Sr:
    # x (x(s) - \gamma * T) = \sigma
    m.constrain(sum(x[s, a] * ((s == sp) - gamma * T(S[s], A[a], S[sp])) for s in Sr for a in Ar) == alpha(S[sp]))

  # == constraints
  if len(zeroConstraints) > 0:
    m.constrain(sum(x[S.index(s), A.index(a)] for s, a in zeroConstraints) == 0)

  # >= constraints
  if len(positiveConstraints) > 0:
    m.constrain(sum(x[S.index(s), A.index(a)] for s, a in positiveConstraints) >= positiveConstraintsOcc)

  # obj
    obj = m.maximize(sum([x[s, a] * r(S[s], A[a]) for s in Sr for a in Ar]))
  except CPlexException as err:
    print 'Exception', err
    # we return obj value as None and occ measure as {}. this should be handled correctly
    return {'feasible': False}

  return {'feasible': True, 'obj': obj, 'pi': {(S[s], A[a]): m[x][s, a] for s in Sr for a in Ar}}
Ejemplo n.º 18
    def create_problem(self, data):

        host_attacked = data["attacked"]
        host_executing = data["executing"]
        metrics_used = data["metric"]
        damage_used = data["damage"]
        responses_used = data["response"]

        numResp = len(responses_used)
        damageMapper = {}
        costMapper = {}
        conflictMapper = []
        responseList = []

        for response in responses_used:

        logging.info('Responses Used: %s', numResp)

        if damage_used is not None:
            for elem in damage_used:
                damageMapper[elem.name] = elem.value

        for elem in metrics_used:
            costMapper[elem.name] = []

        costs = []
        for response in responses_used:
            cost = 0
            for r in response.metrics:
                cost = cost + r.value
            if response.conflicting_responses:
                for r in response.conflicting_responses:
                    tmp = [0] * numResp
                    tmp[responseList.index(response.name)] = 1
                    tmp[responseList.index(r)] = 1
        costs = np.array(costs)
        hostMatrix = []
        for host in host_attacked:
            hostRow = []
            host = host.name
            for response in responses_used:
                if host in response.dest:

        m = CPlexModel(verbosity=0)

        # Each Response can only be executed once
        x = m.new(numResp, vtype='bool', name='x')

        i = 0
        # all attacked hosts are freed
        for elem in hostMatrix:
            elemArr = np.array(elem)
            m.constrain(sum(x.mult(elemArr)) >= 1)
            i = i + 1

        logging.info('Freed Constraints: %s', i)

        i = 0
        # all single metrics used have to be below damage
        for key, elem in damageMapper.iteritems():
            elemArr = np.array(costMapper[key])
            m.constrain(sum(x.mult(elemArr)) <= elem)
            i = i + 1

        logging.info('Damage Constraints: %s', i)

        i = 0
        # no conflicting actions are executed
        for elem in conflictMapper:
            elemArr = np.array(elem)
            m.constrain(sum(x.mult(elemArr)) <= 1)
            i = i + 1

        logging.info('Conflicting Constraints: %s', i)

        return [m, x, costs]
Ejemplo n.º 19
import logging
from pycpx import CPlexModel
import numpy as np

m = CPlexModel(verbosity=3)

sizeItems = [1,4,3,2,1,3, 4, 1, 4, 5, 2, 5, 6, 2, 8, 4, 3, 4, 1, 1, 1, 2,\
                 7, 6, 5, 5, 3, 2, 2, 1, 6, 5, 7, 5, 4, 3, 3, 2, 2, 1, 2, 3]
sizeBin = max(sizeItems)

numItems = len(sizeItems)
numBins = numItems

assign = m.new((numItems,numBins), vtype=int, lb=0, ub=1,\

binUsed = m.new((numBins), vtype=int, lb=0, ub=1, name='binUsed')

startAssign = np.zeros((numItems,numBins))
startAssign[20,8] = 1
startAssign[32,8] = 1
startAssign[30,9] = 1
startAssign[38,9] = 1
startAssign[11,11] = 1
startAssign[26,11] = 1
startAssign[0,14] = 1
startAssign[4,14] = 1
startAssign[7,14] = 1
startAssign[29,14] = 1
startAssign[39,14] = 1
Ejemplo n.º 20
    def create_problem(self, data): 

        host_attacked = data["attacked"]
        host_executing = data["executing"]
        metrics_used = data["metric"]
        damage_used = data["damage"]
        responses_used = data["response"]

        numResp = len(responses_used)
        damageMapper = {}
        costMapper = {}
        conflictMapper = []
        responseList = []

        for response in responses_used:

        logging.info('Responses Used: %s', numResp)

        if damage_used is not None:
            for elem in damage_used:
                damageMapper[elem.name] = elem.value

        for elem in metrics_used:
            costMapper[elem.name] = []
        costs = []
        for response in responses_used:
            cost = 0
            for r in response.metrics:
                cost = cost + r.value
            if response.conflicting_responses:
                for r in response.conflicting_responses:
                    tmp = [0] * numResp
                    tmp[responseList.index(response.name)] = 1
                    tmp[responseList.index(r)] = 1
        costs = np.array(costs)
        hostMatrix = []
        for host in host_attacked:
            hostRow = []
            host = host.name
            for response in responses_used:
                if host in response.dest:

        m = CPlexModel(verbosity = 0)
        # Each Response can only be executed once
        x = m.new(numResp,vtype='bool', name='x')

        i = 0
        # all attacked hosts are freed
        for elem in hostMatrix:
            elemArr = np.array(elem)
            m.constrain(sum(x.mult(elemArr)) >= 1)
            i = i + 1

        logging.info('Freed Constraints: %s', i)

        i = 0
        # all single metrics used have to be below damage
        for key, elem in damageMapper.iteritems():
            elemArr = np.array(costMapper[key])
            m.constrain(sum(x.mult(elemArr)) <= elem)
            i = i + 1

        logging.info('Damage Constraints: %s', i)

        i = 0
        # no conflicting actions are executed
        for elem in conflictMapper:
            elemArr = np.array(elem)
            m.constrain(sum(x.mult(elemArr)) <= 1)
            i = i + 1
        logging.info('Conflicting Constraints: %s', i)

        return [m, x, costs]