def __init__(self): self.paramSets = [parameters.ParameterSet()] self._current_set = 0 self.filename = None self.chemistry_db_path = None self._undo_action = None self._redo_action = None
def act(self, action): actions = [action, [1, 0, 0]] paddle_bot = self.paddles[1] player_bot_controller = EnvPongDraft.PaddleBot_Controller( self, paddle_bot, params.ParameterSet({})) actions[1] = player_bot_controller.act() self.updateEntities(actions) score_reward, hit_reward = self.detect_events() #reward = score_reward + 0.5 * hit_reward reward = score_reward return reward
def main(): dummy_params = params.ParameterSet({}) PongGame = EnvPongDraft.EnvPong() paddle_player = PongGame.paddles[0] paddle_bot = PongGame.paddles[1] paddle_learner_catch_controller = EnvPongDraft.PaddleBot_Controller( PongGame, paddle_player, dummy_params) paddle_learner_avoid_controller = EnvPongDraft.PaddleBotAvoid_Controller( PongGame, paddle_player, dummy_params) paddle_learner_rand_controller = EnvPongDraft.PaddleBotRandom_Controller( PongGame, paddle_player, dummy_params) paddle_bot_catch_controller = EnvPongDraft.PaddleBot_Controller( PongGame, paddle_bot, dummy_params) paddle_bot_avoid_controller = EnvPongDraft.PaddleBotAvoid_Controller( PongGame, paddle_bot, dummy_params) paddle_bot_rand_controller = EnvPongDraft.PaddleBotRandom_Controller( PongGame, paddle_bot, dummy_params) # testing # actions = [[0, 1, 0], [0, 0, 1]] # PongGame.step(actions) # about 80 ball exchanges correspond to 10000 performing an action # STEPS = 10000 # STEPS = 2500 # STEPS_RANGE = range(STEPS) actions = [[0, 0, 0], [0, 0, 0]] # cumulative reward (pos, neg) for two players (learning agent, opponent) cum_reward = [[0, 0], [0, 0]] # cumulative hits for learner and opponent paddle cum_hits = [0, 0] # for counting hits for a task scenario for learner and opponent paddle interm_hits = [0, 0] PLAYERS = 2 REWARD_TYPES = 2 PLAYERS_RANGE = range(PLAYERS) REWARD_TYPES_RANGE = range(REWARD_TYPES) cycle_counter = 0 # TODO: simulate full task switch procedure: # uniform intervall [min max] # corresponding to what? # hits on the paddle for classic; # ball crossing the moddle line work for both cases? # as for avoid case, hitting the paddle is not really helpful measure # TODO: in Pong Class, a counter variable for ball crossing # further, in detect events, rewards and punishments have to be adapted # depending on the task scenario # controller switch : can happen within or outside the env class; # more suitable for that is external experiment class # any advantage of the internal controller solution ? if TASK_MODE == 'classic': # PongGame.multi_task = False # setting active task (ID 0: classic, ID 1: avoid) PongGame.taskActive = [True, False] ball_exchange_counter = 0 # setting controllers paddle_learner_controller = paddle_learner_catch_controller paddle_bot_controller = paddle_bot_rand_controller print('EXECUTING CLASSIC Pong.') while ball_exchange_counter <= NUM_BALL_EXCHANGE: actions[0] = paddle_learner_controller.act() actions[1] = paddle_bot_controller.act() # Test print # print 'Cycle: ', ball_exchange_counter # print 'of : ', NUM_BALL_EXCHANGE # in observation, image_date from the screen is stored reward, hits, observation = PongGame.step(actions) # image_data = PongGame.render() for i, j in itertools.product(PLAYERS_RANGE, REWARD_TYPES_RANGE): cum_reward[i][j] += reward[i][j] for i in PLAYERS_RANGE: cum_hits[i] += hits[i] interm_hits[i] += hits[i] ball_exchange_counter = PongGame.ball_cross_counter elif TASK_MODE == 'avoid': # PongGame.multi_task = False # setting active task (ID 0: classic, ID 1: avoid) PongGame.taskActive = [False, True] ball_exchange_counter = 0 # set both controller to avoid paddle_learner_controller = paddle_learner_avoid_controller paddle_bot_controller = paddle_bot_rand_controller print('EXECUTING AVOID Pong.') while ball_exchange_counter <= NUM_BALL_EXCHANGE: actions[0] = paddle_learner_controller.act() actions[1] = paddle_bot_controller.act() # Test print # print 'Cycle: ', ball_exchange_counter # print 'of : ', NUM_BALL_EXCHANGE # in observation, image_date from the screen is stored reward, hits, observation = PongGame.step(actions) # image_data = PongGame.render() for i, j in itertools.product(PLAYERS_RANGE, REWARD_TYPES_RANGE): cum_reward[i][j] += reward[i][j] for i in PLAYERS_RANGE: cum_hits[i] += hits[i] interm_hits[i] += hits[i] ball_exchange_counter = PongGame.ball_cross_counter elif TASK_MODE == 'switch': print( 'EXECUTING Multi Task Pong (switching between classic and avoid).') # PongGame.multi_task = True # setting active task to start with (ID 0: classic, ID 1: avoid) # PongGame.taskActive = [True, False] # task switch inverts it PongGame.taskActive = [False, True] # print '-----------------' # how many task switches while cycle_counter < CYCLES: print('Cycle: ', cycle_counter) print # reset ball exchange counter for the task ball_exchange_counter = 0 # resetting task ball exchange counter PongGame.task_iter_counter = 0 # switch task # PongGame.taskActive = not PongGame.taskActive # choosing task by inverting False/True active flags # (only one task at time is set as True) # !! TEMPORARY solution for two tasks only !! if SWITCH_MODE == 'alternate': # inverting task flags to alternate tasks PongGame.taskActive[:] = [not t for t in PongGame.taskActive] elif SWITCH_MODE == 'random': PongGame.taskActive = [False, False] idx = random.randint(0, 1) PongGame.taskActive[idx] = True # returning index of the active task (only one marked as True) task_id = PongGame.taskActive.index(True) # drawing number of ball exchange times from interval min..max task_ball_exchange = random.randint(MIN_BALL_EXCHANGE, MAX_BALL_EXCHANGE) print('Task switching.') print('Ball exchanges to perform: ', task_ball_exchange) if task_id == 0: # classic on print('Switching to CLASSIC.') print('PongGame.taskActive : ', PongGame.taskActive) paddle_learner_controller = paddle_learner_catch_controller paddle_bot_controller = paddle_bot_rand_controller while ball_exchange_counter <= task_ball_exchange: actions[0] = paddle_learner_controller.act() actions[1] = paddle_bot_controller.act() # Test print # print 'Cycle: ', ball_exchange_counter # print 'of : ', NUM_BALL_EXCHANGE # in observation, image_date from the screen is stored reward, hits, observation = PongGame.step(actions) # image_data = PongGame.render() for i, j in itertools.product(PLAYERS_RANGE, REWARD_TYPES_RANGE): cum_reward[i][j] += reward[i][j] for i in PLAYERS_RANGE: cum_hits[i] += hits[i] interm_hits[i] += hits[i] ball_exchange_counter = PongGame.task_iter_counter elif task_id == 1: # avoid on print('Switching to AVOID.') print('PongGame.taskActive : ', PongGame.taskActive) paddle_learner_controller = paddle_learner_avoid_controller paddle_bot_controller = paddle_bot_catch_controller while ball_exchange_counter <= task_ball_exchange: actions[0] = paddle_learner_controller.act() actions[1] = paddle_bot_controller.act() # Test print # print 'Cycle: ', ball_exchange_counter # print 'of : ', NUM_BALL_EXCHANGE # in observation, image_date from the screen is stored reward, hits, observation = PongGame.step(actions) # image_data = PongGame.render() for i, j in itertools.product(PLAYERS_RANGE, REWARD_TYPES_RANGE): cum_reward[i][j] += reward[i][j] for i in PLAYERS_RANGE: cum_hits[i] += hits[i] interm_hits[i] += hits[i] ball_exchange_counter = PongGame.task_iter_counter cycle_counter += 1 print('---------------') print('---------------') print('Finished.') print('Cumulated reward: ', cum_reward) print('Cumulated hits: ', cum_hits) print('Class internal.') print('Cumulated reward : ', PongGame.total_reward) print('Cumulated hits : ', PongGame.total_hits) print('Ball crossings : ', PongGame.ball_cross_counter) print('Total ball crossings : ', sum(PongGame.ball_cross_counter)) return
# create logfile recording time in seconds for different simulation steps # (initialization, parameters, simulation etc.) tic = time() # import main parameters dictionary for simulation from example_parallel_network_parameters import PSET # modify parameters accd. to parameterspace id ps_id OUTPUT = 'output' JOBDIR = 'jobs' PSETDIR = 'parameters' # get parameterset id, and load corresponding parameterset file ps_id = sys.argv[-1] pset = ps.ParameterSet(os.path.join(PSETDIR, ps_id + '.txt')) # patch up main ParameterSet object with values from ParameterSpace PSET = ps.ParameterSet(PSET.copy()) PSET.update(pset) # compute dipole moment PSET.COMPUTE_P = PSET.COMPUTE_LFP # record population contributions to extracellular signals PSET.rec_pop_contribution = PSET.COMPUTE_LFP # compute ECoG PSET.COMPUTE_ECOG = PSET.COMPUTE_LFP # set reference network size
result = mean(losses) log_model(self.model, "eval", type="loss", loss=result, total=sum(totals), ntrain=self.model.META["ntrain"]) self.model.META["loss"] = result return result def train_dataset(self, dataset, ntrain=100000, options=None, batch_size=None): loader = torchdata.DataLoader(dataset, batch_size=batch_size, shuffle=True) return self.train_dataloader(loader, ntrain=ntrain, options=options) def evaluate_dataset(self, dataset, classification=False, batch_size=200): loader = torchdata.DataLoader(dataset, batch_size=batch_size, shuffle=False) return self.evaluate_dataloader(loader, classification=classification) default_parameters = params.ParameterSet( params.LogParameter("lr", 1e-6, 1e2), params.QuantizedLogParameter("batch_size", 5, 500) ) def strpar(p): def f(x): if isinstance(x, float) or (isinstance(x, int) and x >= 1000000): x = "%.2e" % x else: x = str(x)[:10] return x return " ".join( ["{}={}".format(f(k), f(v)) for k, v in p.items()]) def plot_log(log, ax=None, value="loss", key="ntrain", selector="train", **kw):
paramset.update({'random_seed': paramset['random_seed'] + i}) ps_id = get_unique_id(paramset) print(ps_id) ## Add parameters to string listing all process IDs by parameters with open(os.path.join(savefolder, 'id_parameters.txt'), 'a') as f: f.write(ps_id + '\n') f.write('%.3f, %.3f, %.3f, %.3f' % (paramset['eta'], paramset['g'], paramset['J'], paramset['sigma_factor']) + '\n') # put output_path into dictionary, as we now have a unique ID of # though this will not affect the parameter space object PS spike_output_path = os.path.join(nest_output, ps_id) if not os.path.isdir(spike_output_path): os.mkdir(spike_output_path) paramset.update({ 'ps_id': ps_id, 'spike_output_path': spike_output_path, 'savefolder': savefolder }) # write using ps.ParemeterSet native format parameterset_file = os.path.join(parameterset_dest, '{}.pset'.format(ps_id)) ps.ParameterSet(paramset).save(url=parameterset_file) # specify where to save output and errors nest_output_file = os.path.join(log_dir, ps_id + '.txt')
self._updateUndoRedo() return True def _loadFile(self, filename): try: case = open(filename, 'r') except: traceback.print_exc() msg = 'Could not open file ' + filename + "\n, got exception:" + \ traceback.format_exc() self._error(filename, 'Error opening file', msg) return p = parameters.ParameterSet() p.read(case) if not self.submitNew(p): pass # self._info("Load aborted", "file did not contain new values of parameters") db = p.getParamValue(parameters.CurrentDatabasePath) if db != "": self.importCurrentChemistryDB(p) self.setFilename(filename) def openFile(self, file):
BALL_COLOR = WHITE BALL_SHAPE = 'square' # speed settings of the paddle and ball PADDLE_SPEED = 1.5 BALL_X_SPEED = 3 BALL_Y_SPEED = 2 BALL_SPEED = np.sqrt(BALL_X_SPEED**2.0 + BALL_Y_SPEED**2.0) GAMMA = 0.8 EPSILON = 1.5 BACKGROUND_COLOR = BLACK # parameter set for the whole game params_set = params.ParameterSet({}, label="pong_test") BALLS_NUM = 1 PADDLES_NUM = 2 params_set['balls'] = params.ParameterSet({}) params_set['paddles'] = params.ParameterSet({}) # number of balls used in a game params_set['balls']['num'] = BALLS_NUM balls_list = [] params_set['balls']['list'] = balls_list for i in range(BALLS_NUM): ball_params = params.ParameterSet({})
def main(): dummy_params = params.ParameterSet({}) PongGame = EnvPongDraft.EnvPong() paddle_player = PongGame.paddles[0] paddle_bot = PongGame.paddles[1] # different controllers to test each different task setting paddle_learner_catch_controller = EnvPongDraft.PaddleBot_Controller(PongGame, paddle_player, dummy_params) paddle_learner_avoid_controller = EnvPongDraft.PaddleBotAvoid_Controller(PongGame, paddle_player, dummy_params) paddle_bot_catch_controller = EnvPongDraft.PaddleBot_Controller(PongGame, paddle_bot, dummy_params) paddle_bot_avoid_controller = EnvPongDraft.PaddleBotAvoid_Controller(PongGame, paddle_bot, dummy_params) # testing # actions = [[0, 1, 0], [0, 0, 1]] # PongGame.step(actions) STEPS = 10000 # STEPS = 2500 STEPS_RANGE = range(STEPS) actions = [[0, 0, 0], [0, 0, 0]] # cumulative reward (pos, neg) for two players (learning agent, opponent) cum_reward =[[0, 0], [0, 0]] # cumulative hits for learner and opponent paddle cum_hits = [0, 0] # for counting hits for a task scenario for learner and opponent paddle interm_hits = [0, 0] PLAYERS = 2 REWARD_TYPES = 2 PLAYERS_RANGE = range(PLAYERS) REWARD_TYPES_RANGE = range(REWARD_TYPES) # TODO: simulate full task switch procedure: # uniform intervall [min max] # corresponding to what? # hits on the paddle for classic; # ball crossing the moddle line work for both cases? # as for avoid case, hitting the paddle is not really helpful measure # TODO: in Pong Class, a counter variable for ball crossing # further, in detect events, rewards and punishments have to be adapted # depending on the task scenario # controller switch : can happen within or outside the env class; # more suitable for that is external experiment class # any advantage of the internal controller solution ? # switching is hard wired throught pre-defined SWITCH_MARKER - number of steps # here in example : switch each SWITCH_STEPS = 8 SWITCH_MARKER = STEPS // SWITCH_STEPS # preparing controller to test catch scenario (classical pong) paddle_learner_controller = paddle_learner_catch_controller paddle_bot_controller = paddle_bot_catch_controller # switching first to classical pong catch = True # switching is hard wired throught pre-defined SWITCH_MARKER - number of steps for i in STEPS_RANGE: if (i != 0) and (i % SWITCH_MARKER == 0): print('CONTROLLER SWITCH.') if catch: print('Was catch. Switching to avoid.') print ('Hits in the catch episode were : ', interm_hits) paddle_learner_controller = paddle_learner_avoid_controller paddle_bot_controller = paddle_bot_avoid_controller catch = False interm_hits = [0, 0] elif not catch: print ('Was avoid. Switching to catch.') print ('Hits in the avoid episode were :', interm_hits) paddle_learner_controller = paddle_learner_catch_controller paddle_bot_controller = paddle_bot_catch_controller catch = True interm_hits = [0, 0] # random action index (0, 1, 2) for a random paddle movement # act_1 = random.randint(0, 2) # act_2 = random.randint(0, 2) # random action for player paddle # actions[0][act_1] = 1 # random action for bot paddle # actions[1][act_2] = 1 # actions[0] = paddle_bot_controller.act() actions[0] = paddle_learner_controller.act() actions[1] = paddle_bot_controller.act() # print actions # in observation, image_date from the screen is stored reward, hits, observation, hit_reward = PongGame.step(actions) # image_data = PongGame.render() for i,j in itertools.product(PLAYERS_RANGE, REWARD_TYPES_RANGE): cum_reward[i][j] += reward[i][j] for i in PLAYERS_RANGE: cum_hits[i] += hits[i] interm_hits[i] += hits[i] # intermediate reward test # print 'reward: ', reward # reset the actions # actions[0][act_1] = 0 # actions[1][act_2] = 0 print('Finished.') print('Cumulated reward: ', cum_reward) print('Cumulated hits: ', cum_hits) print('Class internal.') print('Cumulated reward : ', PongGame.total_reward) print('Cumulated hits : ', PongGame.total_hits) print('Ball crossings : ', PongGame.ball_cross_counter) return