index = 0 #generate simulated impressions maxctr = 0.02 basecost = 10 baseIntensity = 2 """ #initial state hours = 24 imps = 0 clicks = 0 """ dailyBudget = 100 ctrThres = 0 maxBid = 20 cpcGoal = 100 env = PacingEnv(dt, dailyBudget, ctrThres, maxBid, cpcGoal, now, index, maxctr, basecost, baseIntensity) class BaselineAlgo: def __init__(self): #short term sensitivity self.shortS = 1 #there could be better initial value but we'll make do with this self.pacing = 1 return def act(self, state, env, debugrec): #get states time = 288 - env.state[1] - 1 budget = max(0, env.state[0]) desired_ratio = budget / float(time) #print 'budget is: '+ str(budget)
from __future__ import division import numpy as np import torch from torch.autograd import Variable import os import psutil import gc from pacingenv import PacingEnv import matplotlib.pyplot as plt import trainddpg import buffer env = PacingEnv() # env = gym.make('Pendulum-v0') MAX_EPISODES = 5 MAX_STEPS = 1000 MAX_BUFFER = 1000000 MAX_TOTAL_REWARD = 300 S_DIM = env.observation_space.shape[0] A_DIM = env.action_space.shape[0] A_MAX = env.action_space print(' State Dimensions :- '), S_DIM print(' Action Dimensions :- '), A_DIM print(' Action Max :- '), A_MAX ram = buffer.MemoryBuffer(MAX_BUFFER) trainer = trainddpg.Trainer(S_DIM, A_DIM, ram)