def __init__(self, n_arms): self.dim = auctionHouse.NB_CATEGORIES self.arms = \ np.random.random_integers(0, auctionHouse.MAX_BID, size=(n_arms, self.dim)) self.graph = graph.Graph() # bids of other advertisers self.bids = np.random.randint( 0, auctionHouse.MAX_BID + 1, (auctionHouse.NB_ADVERTISERS, auctionHouse.NB_CATEGORIES)) # the environment knows the ad quality of the ads self.adQualitiesVector = np.clip( np.random.normal(0.5, 0.1, auctionHouse.NB_ADVERTISERS), 0.1, 0.9) # the environment also knows the click value for each advertiser self.valuesOfClick = np.clip( np.random.normal(2, 0.5, auctionHouse.NB_ADVERTISERS), 0.5, 5.0) self.history = [] self.learningAdvertiserWonAuctions = [] # keep track of history of auctions self.learningAdvertiserWonAuctionsHistory = [] # chances for each advertiser to change its bid at each time step self.changeProbability = 0.1
import Q1.MonteCarlo as MonteCarlo import matplotlib.pyplot as plt # reward when a click happens VALUE_OF_CLICK = 3 # prob to click on the ad knowing we looked at it AD_QUALITY = 0.7 np.random.seed(0) # Ad qualities of all advertisers (0 is the learning one) AdQualitiesVector = np.clip(np.random.normal(0.5, 0.1, auctionHouse.NB_ADVERTISERS), 0.1, 0.9) AdQualitiesVector[0] = AD_QUALITY ### GRAPH graph = graph.Graph() # randomize the bids, may be improved later bids = np.random.randint(0, auctionHouse.MAX_BID + 1, (auctionHouse.NB_ADVERTISERS, auctionHouse.NB_CATEGORIES)) rewardHistory = [] # to later draw curve of evolution of reward rewardHistoryWithoutRollbacks = [] labels = [] # to annotate points ### BIDS/AUCTIONS # sets bids of learning advertiser to 0 bids[0] = np.zeros(auctionHouse.NB_CATEGORIES) # set bids of current advertiser to 0 previousReward = 0 # when bids are 0, the reward will be 0 currentImprovedCategory = 0 # to jump to 0 at next iteration
import environment.graph as graph import numpy as np import time g = graph.Graph() np.random.seed(int(time.time())) max_output = g.nbNodes ## Build a random connection matrix undirected connection_matrix = g.generateConnectivityMatrix() print(connection_matrix) print(g.activation_probabilities_matrix) g.changeTransitionProbabilities2([1, 1, 1, 1, 1], g.activation_probabilities_matrix) print(g.prob_matrix) g.display()