Exemple #1
0
def runner(filename, tracking=True):
    play = env(filename, specs, sizes, exploror_sample_size, keep_history=True)
    sampler = play.run_for_data()
    if tracking:
        print(play.file_id, end=' ', flush=True)
    del play
    return sampler
Exemple #2
0
 def init_infer_env(self, params):
     self.infer_environment = env(params, 'infer')
     self.rev_relation_vocab = self.infer_environment.grapher.rev_relation_vocab
     self.rev_entity_vocab = self.infer_environment.grapher.rev_entity_vocab
     self.max_hits_at_10 = 0
     self.ePAD = self.entity_vocab['PAD']
     self.rPAD = self.relation_vocab['PAD']
Exemple #3
0
 def init_dev_env(self, params):
     self.dev_test_environment = env(params, 'dev')
     self.test_environment = self.dev_test_environment
     self.rev_relation_vocab = self.test_environment.grapher.rev_relation_vocab
     self.rev_entity_vocab = self.test_environment.grapher.rev_entity_vocab
     self.max_hits_at_10 = 0
     self.ePAD = self.entity_vocab['PAD']
     self.rPAD = self.relation_vocab['PAD']
    def __init__(self, num_of_asset=10):
        self.num_of_asset = num_of_asset

        self.env = environment.env(train=0, number_of_asset=num_of_asset)
        self.w = np.ones(self.num_of_asset, np.float32) / self.num_of_asset

        self.UCRP_deque = deque()
        self.UBAH_deque = deque()
Exemple #5
0
    def __init__(self, params):

        # transfer parameters to self
        for key, val in params.items(): setattr(self, key, val);

        self.agent = Agent(params)
        self.save_path = None
        self.train_environment = env(params, 'train')
        self.dev_test_environment = env(params, 'dev')
        self.test_test_environment = env(params, 'test')
        self.test_environment = self.dev_test_environment
        self.rev_relation_vocab = self.train_environment.grapher.rev_relation_vocab
        self.rev_entity_vocab = self.train_environment.grapher.rev_entity_vocab
        self.max_hits_at_10 = 0
        self.ePAD = self.entity_vocab['PAD']
        self.rPAD = self.relation_vocab['PAD']
        # optimize
        self.baseline = ReactiveBaseline(l=self.Lambda)
        self.optimizer = tf.train.AdamOptimizer(self.learning_rate)
def main():
    f = open(map_path)
    zone = env(f)
    bond = Agent(zone)
    current_time = 0
    calculus = Evaluator()

    while current_time < life_time:
        run(zone, bond, calculus)
        current_time += 1
def main():
	f = open(map_path)
	zone = env(f)
	bond = Agent(zone)
	current_time = 0
	calculus = Evaluator()

	while current_time < life_time:
		run(zone,bond,calculus)
		current_time+=1
Exemple #8
0
 def init_train_env(self, params):
     self.train_environment = env(params, "train")
     self.rev_relation_vocab = self.train_environment.grapher.rev_relation_vocab
     self.rev_entity_vocab = self.train_environment.grapher.rev_entity_vocab
     self.max_hits_at_10 = 0
     self.ePAD = self.entity_vocab['PAD']
     self.rPAD = self.relation_vocab['PAD']
     # optimize
     self.baseline = ReactiveBaseline(l=self.Lambda)
     self.optimizer = tf.train.AdamOptimizer(self.learning_rate)
 def __init__(self):
     self.num_episodes = 5000
     self.epilson = 0.99
     self.decay_epilson = 0.9995
     self.Q = np.zeros([5, 460, 4])
     self.goal_num = 4
     self.lr = 0.001
     self.y = 0.9
     self.env = environment.env()
     self.path = []
     self.visit_state = []
     self.final_state = 0
     self.showmap = []
Exemple #10
0
def main():
    mapnikConfiguration = environment.require('MAPNIK_CONFIGURATION')
    logger.info('Using Mapnik configuration file %s', mapnikConfiguration)

    bbox = determineBoundingBox(environment.require('BBOX'))
    logger.info('Using bounding box %s', bbox)

    numThreads = int(environment.env('NUM_THREADS', 6))
    logger.info('Using %s threads', numThreads)

    minZoom = int(environment.env('MIN_ZOOM', 12))
    maxZoom = int(environment.env('MAX_ZOOM', 12))
    logger.info('Processing zoom levels [%s, %s]', minZoom, maxZoom)

    tmsScheme = environment.env('TMS_SCHEME', 'false') == 'true'
    if tmsScheme:
        logger.info('Using TMS scheme')

    skipIfExists = environment.env('SKIP_IF_EXISTS', 'true') != 'false'
    if skipIfExists:
        logger.info('Skipping tile generation if tile exists')

    renderTiles(bbox, mapnikConfiguration, OUTPUT_PATH, minZoom, maxZoom, numThreads, tmsScheme, skipIfExists)
Exemple #11
0
 def __init__(self):
     self.gameStart=False
     self.status=False
     self.reward=0
     super(view, self).__init__()
     self.n_actions = 361    #定义动作的可能个数
     self.n_features = 361
     self.doneList=[]
     self.allphoto=[]
     self.initView()
     self.env=env()
     self.wobservation=None
     self.wobservation_=None
     self.action1=None
     self.RL = DeepQNetwork(self.n_actions, self.n_features )
Exemple #12
0
    def initialize_inference_env(self, params):
        # transfer parameters to self
        for key, val in params.items():
            setattr(self, key, val)

        self.agent = Agent(params)
        self.save_path = None

        self.inference_environment = env(params, 'inference')
        self.rev_relation_vocab = self.inference_environment.grapher.rev_relation_vocab
        self.rev_entity_vocab = self.inference_environment.grapher.rev_entity_vocab
        self.max_hits_at_10 = 0
        self.ePAD = self.entity_vocab['PAD']
        self.rPAD = self.relation_vocab['PAD']
        # optimize
        self.baseline = ReactiveBaseline(l=self.Lambda)
def interactive_play(agent):
    input_code = input(
        'Please enter a input code code any pattern between 0000 - 5555')
    agent.reset_possible_states()
    guess = agent.get_best_action()
    envi = env(input_code)
    print(f"initial guess = {guess}")
    u = input('Press enter to let q-learning agent make the next guess')
    while guess != input_code:
        feedback = env.score(input_code, guess)
        agent.restrict_possible_states(guess, feedback)
        guess = agent.get_best_action()
        print(f"Next guess = {guess}")
        u = input()
    if guess == input_code:
        print("mastermind level maxx, guess is right!")
def train(agent, n_episodes):
    for _ in range(n_episodes):
        input_code = env._number_from_index(random.randint(0, 6**4 - 1))
        envi = env(input_code)
        agent.reset_possible_states()
        action = agent.random_action()  # init action

        if action == input_code:  # if init guess is correct skip this episode
            continue

        run = True
        while run:
            feedback = env.get_feedback(action)
            reward = env.reward(action)
            agent.learn_from_move(action, feedback, reward)
            if action == input_code:
                break  # correct guess stop episode
            else:
                action = agent.random_action()  # else next guess
Exemple #15
0
def samplex_objective_function(type):
    assert type in ["facet", "random"]
    env().model_gen_options["objf choice"] = type
Exemple #16
0
def samplex_add_noise(n=True):
    env().model_gen_options["add noise"] = n
Exemple #17
0
def samplex_random_seed(s):
    env().model_gen_options["rngseed"] = s
Exemple #18
0
def samplex_add_noise(n=True):
    env().model_gen_options['add noise'] = n
Exemple #19
0
def samplex_solution_type(type):
    assert type in ["vertex", "interior", "CLT", "CLTvertex"]
    env().model_gen_options["solution type"] = type
    def train(self):
        """
        Implement your training algorithm here
        """
        ###########################
        # YOUR IMPLEMENTATION HERE #

        # reward_buffer = deque([])
        current_loss = 0.0
        mean_reward = 0.0
        for i_episode in range(NUM_EPISODES):
            # Initialize the environment and state
            # self.env.reset()
            # last_screen = get_screen()
            # current_screen = get_screen()
            state = self.env.reset()
            # state = np.transpose(state,(2,0,1)) #New
            # state = torch.tensor([state])
            episode_Reward = 0.0
            for t in range(EPISODE_STEP_LIMIT):
                # Render here
                # self.env.env.render()
                self.steps_done += 1

                action = self.make_action(state, False)
                # 'Transition',('state', 'action', 'next_state', 'reward', 'done'))

                next_state, reward, done, _ = self.env.step(action)
                episode_Reward += reward

                state = np.transpose(state, (2, 0, 1))  #New
                next_state = np.transpose(next_state, (2, 0, 1))
                self.transition = (state, action, next_state, reward, done)
                self.push()

                # Move to the next state
                state = next_state

                # self.env.render()

                # Update the target network, copying all weights and biases in DQN
                # print("Steps : ",steps_done)
                if self.steps_done % TARGET_UPDATE == 0:
                    print("**********Updating Target********")
                    self.target_net.load_state_dict(
                        self.policy_net.state_dict())

                # Perform one step of the optimization (on the target network)
                # optimize step start
                # print("Memory Size", len(self.memory))
                # print("Completed 10,000 steps")
                if len(self.memory) > 10000 and len(self.memory) % 4 == 0:
                    if self.flag == 0:
                        print("Crossed 10000")
                        self.flag = 1

                    batch = self.replay_buffer(BATCH_SIZE)

                    # 'Transition',('state', 'action', 'next_state', 'reward', 'done'))
                    state_batch = torch.from_numpy(np.asarray(batch[0]))
                    action_batch = torch.from_numpy(np.asarray(batch[1]))
                    next_state_batch = torch.from_numpy(np.asarray(batch[2]))
                    reward_batch = torch.from_numpy(np.asarray(
                        batch[3])).to(device)
                    done_batch = torch.from_numpy(np.asarray(
                        batch[4])).to(device)

                    state_action_values = self.policy_net(
                        state_batch.to(device)).gather(
                            1, action_batch[:, None].to(device)).squeeze(1)

                    q_max = self.target_net(
                        next_state_batch.to(device)).max(1)[0].detach()

                    q_max[done_batch] = 0

                    expected_state_action_values = (
                        q_max) * GAMMA + reward_batch
                    #print (state_action_values.double().size())

                    #print (expected_state_action_values.double().size())
                    loss = F.smooth_l1_loss(
                        state_action_values.double(),
                        expected_state_action_values.double())

                    current_loss = loss
                    # print("Episode : ", i_episode, ", iteration : ",t, " Loss :  ", current_loss, " Steps : ", steps_done," Epsilon : ", self.eps_threshold, " Mean Reward : ", mean_reward)

                    #optimze the model
                    self.optimizer.zero_grad()
                    loss.backward()

                    self.optimizer.step()

                if done:
                    if len(self.reward_buffer) >= REWARD_BUFFER_SIZE:
                        self.reward_buffer.pop(0)
                    self.reward_buffer.append(episode_Reward)
                    mean_reward = np.mean(self.reward_buffer)
                    break

            if (i_episode % 500 == 0):
                env2 = env('BreakoutNoFrameskip-v4',
                           self.args,
                           atari_wrapper=True,
                           test=True)
                test(self, env2, total_episodes=100)
                writer.add_scalar('Test Mean Reward', self.test_mean_reward,
                                  i_episode)
                if self.test_mean_reward > self.max_reward_so_far:
                    torch.save(self.policy_net.state_dict(),
                               "best_weights_model.pt")
                    self.max_reward_so_far = self.test_mean_reward

            writer.add_scalar('Train Mean Reward', mean_reward, i_episode)
            writer.add_scalar('Training LOSS', current_loss, i_episode)

            # To calculate mean reward
            if i_episode % 100 == 0:
                # print("*****************")
                print("TRAIN Mean Reward after ", i_episode, " episodes is ",
                      mean_reward, " Epsilon ", self.eps_threshold)
            if i_episode % 500 == 0:
                torch.save(self.policy_net.state_dict(), "saved_model.pt")
                print("Saved Model after ", i_episode, " episodes")
        self.env.env.close()
        self.writer.close()
Exemple #21
0
parser = argparse.ArgumentParser()
parser.add_argument("--train", help="path of your actual train model")
parser.add_argument("--save",
                    default='new_policy_net.pth',
                    required=True,
                    help="path of your new train model")
parser.add_argument("--resolution",
                    default='1920x1080',
                    required=True,
                    help="insert your monitor 0 resolution")
args = parser.parse_args()
input_resolution = args.resolution.split('x')
path_save = args.save

resolution = [int(input_resolution[0]), int(input_resolution[1])]
env = environment.env(resolution)
time.sleep(3)

# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))


class ReplayMemory(object):
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0
#hyperparameters
input_day_size = 50
filter_size = 3
num_of_feature = 4
num_of_asset = 8
num_episodes = 10000 if is_train == 1 else 1
money = 1e+8

#saving
save_frequency = 100
save_path = './weights'
save_model = 1
load_model = 1
selecting_random = True
if is_train == 0:
    env = environment.env(train=0, number_of_asset=num_of_asset)
    load_model = 1
    selecting_random = False
else:
    env = environment.env(number_of_asset=num_of_asset)

config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)
config.gpu_options.allow_growth = True

a_loss_sum = 0
s_loss_sum = 0

sess = tf.Session(config=config)

with tf.variable_scope('ESM'):
    selector = network.select_network(sess)
Exemple #23
0
import numpy as np

import epi_model as em
from environment import env
import agent

world = env()
player = agent.Agent(8, 5, 0.0005, 0.99)
# player.load()

n_runs = 500
losses = []

for i in range(n_runs):
    done = False
    loss = 0.0
    obs = world.reset()

    while not done:
        action = player.act(obs)
        next, reward, done, info = world.step(action)
        loss += reward
        player.record(obs, next, action, reward, done)
        obs = next
        player.learn()

    losses.append(loss)
    avg_loss = np.mean(losses[max(0, i - 100):(i + 1)])
    print("run: ", i, " loss: %i" % int(loss), " avg: %i" % int(avg_loss))

    if i % 10 == 9:
import pygame
from time import sleep
import model
import environment
import patrol_learning_grid

patrol_env = environment.env()
f = open("./data/좌표1.txt", 'r')
lines = f.readlines()
f.close()
game = [[0 for j in range(50)] for i in range(50)]
temp_game = []
for line in lines:
    temp_game.append(list(map(int, line.split())))
k = 0
for i in range(49, 1, -1):
    for j in range(50):
        if j in temp_game[i]:
            game[k][j] = -1
    k = k + 1
n = 50  # represents no. of side squares(n*n total squares)
scrx = n * 15
scry = n * 15
background = (51, 51, 51)  # used to clear screen while rendering
screen = pygame.display.set_mode(
    (scrx, scry))  # creating a screen using Pygame
colors = [(51, 51, 51) for i in range(n**2)]
reward = patrol_env.goal_state
terminals = []
path = list(patrol_learning_grid.main())
            plt.imshow(copy_maze)
            plt.pause(0.1)
            plt.clf()

            if agent_.env.maze.ravel()[s_] != 0:
                break


if __name__ == '__main__':

    train_settings()

    seed()

    brain_ = brain(size=args.arena_size, gamma=0.9, l_r=0.9)
    env_ = env(size=args.arena_size, cat_r=[-10, -20], cheese_r=[10, 20])

    agent_ = agent(env=env_, brain=brain_)

    plt.imshow(env_.maze)
    plt.pause(1)

    for i in range(args.random_steps):

        agent_.step()

        if i % 10 == 0:

            plt.imshow(agent_.brain.q_mat)
            plt.pause(0.01)
            plt.clf()
Exemple #26
0
import random as rn
from environment import env
import brain
from DQN import DQN

### Setting the hyperparameters

max_memory = 3000
epochs = 1000
batch_size = 128
eps = 0.3
numb_actions = 5
direction_boundary = (numb_actions -1)/2 ### it will be the action corresponding to "do nothing"
temp_incr = 1.5  ### the difference of temperature between each action
### Creation of the environment
env = env(nb_users_ini = 20, data_transfer_ini = 30, starting_month = 0)
### Creation of the brain
brain = brain.NN(nb_actions = numb_actions)
model = brain.model
### Creation of the memory of the DQN Agent
DQN = DQN()

if(env.train):
    previous_loss = 0
    patience = 0
    for epoch in range(0,epochs):
        loss = 0
        time_step = 0
        game_over = False
        total_reward = 0
        new_month = np.random.randint(0,12)
Exemple #27
0
def samplex_random_seed(s):
    env().model_gen_options['rngseed'] = s
Exemple #28
0
def samplex_objective_function(type):
    assert type in ['facet', 'random']
    env().model_gen_options['objf choice'] = type
Exemple #29
0
def samplex_solution_type(type):
    assert type in ['vertex', 'interior']
    env().model_gen_options['solution type'] = type
Exemple #30
0
def samplex_objective_function(type):
    assert type in ['facet', 'random']
    env().model_gen_options['objf choice'] = type
Exemple #31
0
@author: pranavmanjunath
"""

import time
import matplotlib.pyplot as plt
import random
import pandas as pd
import numpy as np
import networkx as nx
import pylab
import matplotlib.pyplot as plt
import environment


df,data,myvalue,rewards_test,environment_rows1,environment_columns1=environment.env()


def get_next_action(current_row_index,epsilon,path):

    
    a=rewards_test[current_row_index]
    
    indices=[]
    #M looks at the q values
    m=[]
    max_values=[]
    for i in range(26):
        if a[0,i]!=0:
            if i != path[len(path)-2]:
                indices.append(i)
parser = argparse.ArgumentParser()
parser.add_argument("--train", help="path of your actual train model")
parser.add_argument("--test", help="path of your actual train model")
parser.add_argument("--save",
                    default='new_policy_net.pth',
                    help="path of your new train model")
parser.add_argument("--resolution",
                    default='1920x1080',
                    help="insert your monitor 0 resolution")
args = parser.parse_args()
input_resolution = args.resolution.split('x')
path_save = args.save

resolution = [int(input_resolution[0]), int(input_resolution[1])]
env = environment.env(resolution, noise=True, noiseType='gauss')

print("Go to the game screen!")
for i in tqdm(range(100)):
    time.sleep(0.03)

# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))


class ReplayMemory(object):
    def __init__(self, capacity):
        self.capacity = capacity
Exemple #33
0
#hyperparameters
input_day_size = 50
filter_size = 3
num_of_feature = 4
num_of_asset = 8
num_episodes = 5000
money = 1e+8

#saving
save_frequency = 100
save_path = './weights/AAM/m_'
save_model = 1
load_model = 0
selecting_random = True

env = environment.env(number_of_asset=num_of_asset)
env_val = environment.env(number_of_asset=num_of_asset, train=0, validation=1)

config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)
config.gpu_options.allow_growth = True

a_loss_sum = 0

sess = tf.Session(config=config)

with tf.variable_scope('AAM'):
    allocator = network.policy(sess, num_of_asset=num_of_asset)
#with tf.variable_scope('ESM'):
#    selector = network.select_network(sess)

sess.run(tf.global_variables_initializer())
Exemple #34
0
    return boundingBoxes


def mapDimensions(dimensions):
    width, height = dimensions
    print('Rendering map with page width paper size (%s m × %s m)' % (width, height))

    # Dots per inch (1 point = 1/72 inch, see https://pycairo.readthedocs.io/en/latest/reference/surfaces.html#class-pdfsurface-surface)
    dpi = 72
    # Dots per m
    dpm = dpi * 100 / 2.54

    mapWidth = int(width * dpm)
    mapHeight = int(height * dpm)

    return mapWidth, mapHeight


if __name__ == '__main__':
    boundingBox = determineBoundingBox(environment.require('BBOX'))
    pageOverlap = determinePageOverlap(environment.env('PAGE_OVERLAP', '5%'))
    # Default: 1 cm on the map is 1.5 km in the world
    scale = determineScale(environment.env('SCALE', '1:150000'))
    printPaperWidth, printPaperHeight = rotatePaper(
        determinePaperDimensions(environment.env('PAPER_SIZE', 'A4')),
        determineOrientation(environment.env('PAPER_ORIENTATION', ORIENTATION_PORTRAIT))
    )

    for bbox in boundingBoxes(boundingBox, pageOverlap, scale, (printPaperWidth, printPaperHeight)):
        print('%s:%s:%s:%s' % (bbox.minx, bbox.miny, bbox.maxx, bbox.maxy))
Exemple #35
0
import environment

if environment.env() == "production":
    from .production import *
elif environment.env() == "development":
    from .development import *
Exemple #36
0
def samplex_solution_type(type):
    assert type in ['vertex', 'interior', 'CLT', 'CLTvertex']
    env().model_gen_options['solution type'] = type
Exemple #37
0
            td_target = reward + discount_factor * Q[next_state][
                best_next_action]
            td_delta = td_target - Q[state][action]
            Q[state][action] += alpha * td_delta

            if done:
                break

            state = next_state
    return Q


if __name__ == '__main__':
    if sys.argv[1] == "--h":
        print("USAGE: python q_learning.py --<demo or train>")
        print("--demo  => run the demo app using CliffWalkingEnv")
        print("--train => run the demo app using RL")
        sys.exit()
    elif sys.argv[1] == "--train":
        n_episode = 5000
        env = env(n_episode)
        print("0:", datetime.datetime.now())
        Q = q_learning(env, n_episode)
        print("Q value: ", Q)
    elif sys.argv[1] == "--demo":
        from cliff_walking import CliffWalkingEnv
        n_episode = 5000
        print("Demo Training RUN")
        env = CliffWalkingEnv()
        Q = demo_q_learning(env, n_episode)
        print("Q value: ", Q)
Exemple #38
0
def samplex_stride(s=1):
    env().model_gen_options['stride'] = s