Python OU.evolve Beispiele

Programmiersprache: Python

Namespace / Paketname: OU

Klasse / Typ: OU

Methode / Funktion: evolve

Beispiele auf hotexamples.com: 2

Python OU.evolve - 2 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die OU.OU.evolve, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

function(30)

OU(11)

OUnoise(2)

evolve(2)

function1(1)

function2(1)

ou(1)

Beispiel #1

Datei anzeigen

def playGame(DDPG_config,
             train_indicator=1):  #1 means Train, 0 means simply Run
    # SETUP STARTS HERE
    if train_indicator > 0:
        folder = setup_run(DDPG_config)
    elif train_indicator == 0:
        folder = DDPG_config['EXPERIMENT']

    if DDPG_config['RSEED'] == 0:
        DDPG_config['RSEED'] = None
    np.random.seed(DDPG_config['RSEED'])

    ACTIVE_NODES = DDPG_config['ACTIVE_NODES']

    # Generate an environment
    if DDPG_config['ENV'] == 'balancing':
        env = OmnetBalancerEnv(DDPG_config, folder)
    elif DDPG_config['ENV'] == 'label':
        env = OmnetLinkweightEnv(DDPG_config, folder)

    action_dim, state_dim = env.a_dim, env.s_dim

    MU = DDPG_config['MU']
    THETA = DDPG_config['THETA']
    SIGMA = DDPG_config['SIGMA']

    ou = OU(action_dim, MU, THETA, SIGMA)  #Ornstein-Uhlenbeck Process

    BUFFER_SIZE = DDPG_config['BUFFER_SIZE']
    BATCH_SIZE = DDPG_config['BATCH_SIZE']
    GAMMA = DDPG_config['GAMMA']
    EXPLORE = DDPG_config['EXPLORE']
    EPISODE_COUNT = DDPG_config['EPISODE_COUNT']
    MAX_STEPS = DDPG_config['MAX_STEPS']
    if EXPLORE <= 1:
        EXPLORE = EPISODE_COUNT * MAX_STEPS * EXPLORE
    # SETUP ENDS HERE

    reward = 0
    done = False
    wise = False
    step = 0
    epsilon = 1
    indicator = 0

    #Tensorflow GPU optimization
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    actor = ActorNetwork(sess, state_dim, action_dim, DDPG_config)
    critic = CriticNetwork(sess, state_dim, action_dim, DDPG_config)
    buff = ReplayBuffer(BUFFER_SIZE)  #Create replay buffer

    ltm = ['a_h0', 'a_h1', 'a_V', 'c_w1', 'c_a1', 'c_h1', 'c_h3', 'c_V']
    layers_to_mind = {}
    L2 = {}

    for k in ltm:
        layers_to_mind[k] = 0
        L2[k] = 0

    vector_to_file(ltm, folder + 'weightsL2' + 'Log.csv', 'w')

    #Now load the weight
    try:
        actor.model.load_weights(folder + "actormodel.h5")
        critic.model.load_weights(folder + "criticmodel.h5")
        actor.target_model.load_weights(folder + "actormodel.h5")
        critic.target_model.load_weights(folder + "criticmodel.h5")
        print("Weight load successfully")
    except:
        print("Cannot find the weight")

    print("OMNeT++ Experiment Start.")
    # initial state of simulator
    s_t = env.reset()
    loss = 0
    for i in range(EPISODE_COUNT):

        print("Episode : " + str(i) + " Replay Buffer " + str(buff.count()))

        total_reward = 0
        for j in range(MAX_STEPS):
            print('step ', j)
            epsilon -= 1.0 / EXPLORE
            a_t = np.zeros([1, action_dim])
            noise_t = np.zeros([1, action_dim])

            a_t_original = actor.model.predict(s_t.reshape(1, s_t.shape[0]))

            if train_indicator and epsilon > 0 and (step % 1000) // 100 != 9:
                noise_t[0] = epsilon * ou.evolve()

            a = a_t_original[0]
            n = noise_t[0]
            a_t[0] = np.where((a + n > 0) & (a + n < 1), a + n,
                              a - n).clip(min=0, max=1)

            # execute action
            s_t1, r_t, done = env.step(a_t[0], j)
            # print(s_t1)
            print('reward ', r_t)

            buff.add(s_t, a_t[0], r_t, s_t1, done)  #Add replay buffer

            scale = lambda x: x
            #Do the batch update
            batch = buff.getBatch(BATCH_SIZE)
            states = scale(np.asarray([e[0] for e in batch]))
            actions = scale(np.asarray([e[1] for e in batch]))
            rewards = scale(np.asarray([e[2] for e in batch]))
            new_states = scale(np.asarray([e[3] for e in batch]))
            dones = np.asarray([e[4] for e in batch])

            y_t = np.zeros([len(batch), action_dim])
            target_q_values = critic.target_model.predict(
                [new_states,
                 actor.target_model.predict(new_states)])

            for k in range(len(batch)):
                if dones[k]:
                    y_t[k] = rewards[k]
                else:
                    y_t[k] = rewards[k] + GAMMA * target_q_values[k]

            if train_indicator and len(batch) >= BATCH_SIZE:
                loss = critic.model.train_on_batch([states, actions], y_t)
                a_for_grad = actor.model.predict(states)
                grads = critic.gradients(states, a_for_grad)
                # does this give an output like train_on_batch above? NO
                actor.train(states, grads)
                actor.target_train()
                critic.target_train()
                with open(folder + 'lossLog.csv', 'a') as file:
                    file.write(pretty(loss) + '\n')

            total_reward += r_t
            s_t = s_t1

            for layer in actor.model.layers + critic.model.layers:
                if layer.name in layers_to_mind.keys():
                    L2[layer.name] = np.linalg.norm(
                        np.ravel(layer.get_weights()[0]) -
                        layers_to_mind[layer.name])
                    #                     vector_to_file(np.ravel(layer.get_weights()[0]), folder + 'weights_' + layer.name + 'Log.csv', 'a')
                    layers_to_mind[layer.name] = np.ravel(
                        layer.get_weights()[0])


#             if max(L2.values()) <= 0.02:
#                 wise = True

            if train_indicator and len(batch) >= BATCH_SIZE:
                vector_to_file([L2[x] for x in ltm],
                               folder + 'weightsL2' + 'Log.csv', 'a')

            vector_to_file(a_t_original[0], folder + 'actionLog.csv', 'a')
            vector_to_file(noise_t[0], folder + 'noiseLog.csv', 'a')

            if 'PRINT' in DDPG_config.keys() and DDPG_config['PRINT']:
                print("Episode", "%5d" % i, "Step", "%5d" % step, "Reward",
                      "%.6f" % r_t)
                print("Epsilon", "%.6f" % max(epsilon, 0))

                att_ = np.split(a_t[0], ACTIVE_NODES)
                for _ in range(ACTIVE_NODES):
                    att_[_] = np.insert(att_[_], _, -1)
                att_ = np.concatenate(att_)
                print("Action\n", att_.reshape(ACTIVE_NODES, ACTIVE_NODES))
                print(max(L2, key=L2.get), pretty(max(L2.values())))

            step += 1
            if done or wise:
                break

        if step % 1000 == 0:  # writes at every 1000 step
            if (train_indicator):
                actor.model.save_weights(folder + "actormodel.h5",
                                         overwrite=True)
                actor.model.save_weights(folder + "actormodel" + str(step) +
                                         ".h5")
                with open(folder + "actormodel.json", "w") as outfile:
                    outfile.write(actor.model.to_json(indent=4) + '\n')

                critic.model.save_weights(folder + "criticmodel.h5",
                                          overwrite=True)
                critic.model.save_weights(folder + "criticmodel" + str(step) +
                                          ".h5")
                with open(folder + "criticmodel.json", "w") as outfile:
                    outfile.write(critic.model.to_json(indent=4) + '\n')

        print("TOTAL REWARD @ " + str(i) + "-th Episode  : Reward " +
              str(total_reward))
        print("Total Step: " + str(step))
        print("")

    env.end()  # This is for shutting down
    print("Finish.")

Beispiel #2

Datei anzeigen

Datei: Traffic.py Projekt: zengman/DdpgLinkState

class Traffic():

    def __init__(self, nodes_num, type, capacity):
        self.nodes_num = nodes_num
        self.prev_traffic = None
        self.type = type
        self.capacity = capacity * nodes_num / (nodes_num - 1)
        self.dictionary = {}
        self.dictionary['NORM'] = self.normal_traffic
        self.dictionary['UNI'] = self.uniform_traffic
        self.dictionary['CONTROLLED'] = self.controlled_uniform_traffic
        self.dictionary['EXP'] = self.exp_traffic
        self.dictionary['OU'] = self.ou_traffic
        self.dictionary['STAT'] = self.stat_traffic
        self.dictionary['STATEQ'] = self.stat_eq_traffic
        self.dictionary['FILE'] = self.file_traffic
        self.dictionary['DIR'] = self.dir_traffic
        self.dictionary['STATIC'] = self.static_traffic
        if self.type.startswith('DIR:'):
            self.dir = sorted(listdir(self.type.split('DIR:')[-1]), key=lambda x: natural_key((x)))
        self.static = None
        self.total_ou = OU(1, self.capacity/2, 0.1, self.capacity/2)
        self.nodes_ou = OU(self.nodes_num**2, 1, 0.1, 1)

    def normal_traffic(self):
        t = np.random.normal(self.capacity/2, self.capacity/2)
        return np.asarray(t * softmax(np.random.randn(self.nodes_num, self.nodes_num))).clip(min=0.001)

    def uniform_traffic(self):
        t = np.random.uniform(0, self.capacity*1.25)
        return np.asarray(t * softmax(np.random.uniform(0, 1, size=[self.nodes_num]*2))).clip(min=0.001)

    def controlled_uniform_traffic(self):
        t = np.random.uniform(0, self.capacity*1.25)
        if self.prev_traffic is None:
            self.prev_traffic = np.asarray(t * softmax(np.random.uniform(0, 1, size=[self.nodes_num]*2))).clip(min=0.001)
        dist = [1]
        dist += [0]*(self.nodes_num**2 - 1)
        ch = np.random.choice(dist, [self.nodes_num]*2)

        tt = np.multiply(self.prev_traffic, 1 - ch)

        nt = np.asarray(t * softmax(np.random.uniform(0, 1, size=[self.nodes_num]*2))).clip(min=0.001)
        nt = np.multiply(nt, ch)

        self.prev_traffic = tt + nt

        return self.prev_traffic

    def exp_traffic(self):
        a = np.random.exponential(size=self.nodes_num)
        b = np.random.exponential(size=self.nodes_num)

        T = np.outer(a, b)

        np.fill_diagonal(T, -1)

        T[T!=-1] = np.asarray(np.random.exponential()*T[T!=-1]/np.average(T[T!=-1])).clip(min=0)

        return T
    def static_traffic(self):
        a = np.full([self.nodes_num]*2, 0, dtype=float)

    def stat_traffic(self):
        if self.static is None:
            string = self.type.split('STAT:')[-1]
            v = np.asarray(tuple(float(x) for x in string.split(',')[:self.nodes_num**2]))
            M = np.split(v, self.nodes_num)
            self.static = np.vstack(M)
        return self.static

    def stat_eq_traffic(self):
        if self.static is None:
            value = float(self.type.split('STATEQ:')[-1])
            self.static = np.full([self.nodes_num]*2, value, dtype=float)
        return self.static

    def ou_traffic(self):
        t = self.total_ou.evolve()[0]
        nt = t * softmax(self.nodes_ou.evolve())
        i = np.split(nt, self.nodes_num)
        return np.vstack(i).clip(min=0.001)

    def file_traffic(self):
        if self.static is None:
            fname = 'traffic/' + self.type.split('FILE:')[-1]
            v = np.loadtxt(fname, delimiter=',')
            self.static = np.split(v, self.nodes_num)
        return self.static

    def dir_traffic(self):
        while len(self.dir) > 0:
            tm = self.dir.pop(0)
            if not tm.endswith('.txt'):
                continue
            fname = self.type.split('DIR:')[-1] + '/' + tm
            v = np.loadtxt(fname, delimiter=',')
            return np.split(v, self.nodes_num)
        return False


    def generate(self):
        return self.dictionary[self.type.split(":")[0]]()