예제 #1
0
파일: baseline2.py 프로젝트: linuxfl/cs229
index = 0
#generate simulated impressions
maxctr = 0.02
basecost = 10
baseIntensity = 2
"""
#initial state
hours = 24
imps = 0
clicks = 0
"""
dailyBudget = 100
ctrThres = 0
maxBid = 20
cpcGoal = 100
env = PacingEnv(dt, dailyBudget, ctrThres, maxBid, cpcGoal, now, index, maxctr,
                basecost, baseIntensity)


class BaselineAlgo:
    def __init__(self):
        #short term sensitivity
        self.shortS = 1  #there could be better initial value but we'll make do with this
        self.pacing = 1
        return

    def act(self, state, env, debugrec):
        #get states
        time = 288 - env.state[1] - 1
        budget = max(0, env.state[0])
        desired_ratio = budget / float(time)
        #print 'budget is: '+ str(budget)
예제 #2
0
from __future__ import division
import numpy as np
import torch
from torch.autograd import Variable
import os
import psutil
import gc
from pacingenv import PacingEnv
import matplotlib.pyplot as plt

import trainddpg
import buffer

env = PacingEnv()
# env = gym.make('Pendulum-v0')

MAX_EPISODES = 5
MAX_STEPS = 1000
MAX_BUFFER = 1000000
MAX_TOTAL_REWARD = 300
S_DIM = env.observation_space.shape[0]
A_DIM = env.action_space.shape[0]
A_MAX = env.action_space

print(' State Dimensions :- '), S_DIM
print(' Action Dimensions :- '), A_DIM
print(' Action Max :- '), A_MAX

ram = buffer.MemoryBuffer(MAX_BUFFER)
trainer = trainddpg.Trainer(S_DIM, A_DIM, ram)