def action(): ckeck_training_deamon() query = db.Query() # remove existing sessions db.sessions.remove(query.client_id == request.client_id) # remove existing samples db.samples.remove(query.client_id == request.client_id) # record the session started document_id = db.sessions.insert({ 'client_id': request.client_id, 'start_time': datetime.now() }) # create the agent agent = agents.create_agent(request.client_id, request.options, request.recycle_agent) return { "client_id": request.client_id, "document_id": document_id, "model": agent.get_model_base64(), "runtime_parameters": agent.runtime_parameters.__dict__ }
def main(): args = parse_args() env = envs.create_env(args.domain, args.task, args.verbose) agent = agents.create_agent(args.model, env, args.verbose) if args.train: agent.train(env, args.save_model, args.verbose, args.display, args.save_training_curve) elif args.eval: agent.eval(env, args.verbose, args.display)
def main(): name = "UDP Reinforcement Learning Congestion Control Client" parser = argparse.ArgumentParser(name) args = parse_args(parser) os.environ["EPISODES"] = str(args.episodes) if args.verbose: os.environ["VERBOSE"] = str(args.verbose) nb_observation_dim = 3 history_horizon = 10 nb_observation_space_dim = nb_observation_dim * history_horizon nb_actions = 11 agents.create_agent(0, nb_observation_space_dim, nb_actions, args.train) agents.load_weights(0, "./", "weights.h5f") agents.reset(0, True) print("\n") if args.train: train_agent(args) else: run_episode(args)
def main(): args = TrainOptions().parse() device = torch.device('cuda') if (not args.no_cuda and torch.cuda.is_available()) else torch.device('cpu') if not os.path.exists(args.output): os.makedirs(args.output) env = create_env(args) network = create_network(args, env.action_space.n, env.observation_space.shape) network.to(device) optimizer = Adam(network.parameters(), lr=args.lr) policy = AnnealedEpsilonGreedyPolicy(epsilon_max=args.epsilon_max, epsilon_min=args.epsilon_min, exploration_steps=args.exp_steps) memory = SimpleExperienceReplay(max_size=args.mem_max, batch_size=args.batch_size) logger = Logger() agent = create_agent(args, env, network, policy, memory, optimizer, logger) # train agent agent.learn(n_episodes=args.n_ep, ep_max_step=args.ep_max_step, replay_start_size=args.replay_start, save_every=args.freq_save_model, update_target_every=args.freq_target_update, render_every=args.freq_render)
def __init__(self, opt): signal.signal(signal.SIGINT, signal.default_int_handler) if isinstance(opt, ParlaiParser): opt = opt.parse_args() # Possibly load from checkpoint trainstats_suffix = '.trainstats' if (opt.get('model_file') and isfile(opt['model_file'] + '.checkpoint')): opt['init_model'] = opt['model_file'] + '.checkpoint' trainstats_suffix = '.checkpoint.trainstats' else: pass # TODO for testing only # raise RuntimeError('WARNING: Reinforcement learning' # ' must be initialized by a model.checkpoint ' # 'file and {} does not exist.'.format( # opt['model_file'] + '.checkpoint')) # Possibly build a dictionary (not all models do this). if (opt['dict_build_first'] and not (opt.get('dict_file') or opt.get('model_file'))): raise RuntimeError('WARNING: For train_model, ' 'please specify either a ' 'model_file or dict_file.') if opt['dict_build_first'] and 'dict_file' in opt: if opt.get('pytorch_teacher_task'): opt['dict_file'] = get_pyt_dict_file(opt) elif opt['dict_file'] is None and opt.get('model_file'): opt['dict_file'] = opt['model_file'] + '.dict' print("[ building dictionary first... ]") build_dict(opt, skip_if_built=True) # Create model and assign it to the specified task self.agent = create_agent(opt) # Freeze the model for the static dialogue partner static_agent = copy.deepcopy(self.agent) self.agent.id = ACTIVE static_agent.id = STATIC freeze_agent(static_agent) self.world = create_task(opt, self.agent, static_agent) # set up timers self.train_time = Timer() self.validate_time = Timer() self.log_time = Timer() self.save_time = Timer() print('[ training... ]') self.parleys = 0 self.max_num_epochs = (opt['num_epochs'] if opt['num_epochs'] > 0 else float('inf')) self.max_train_time = (opt['max_train_time'] if opt['max_train_time'] > 0 else float('inf')) self.log_every_n_secs = (opt['log_every_n_secs'] if opt['log_every_n_secs'] > 0 else float('inf')) self.val_every_n_secs = (opt['validation_every_n_secs'] if opt['validation_every_n_secs'] > 0 else float('inf')) self.save_every_n_secs = (opt['save_every_n_secs'] if opt['save_every_n_secs'] > 0 else float('inf')) self.val_every_n_epochs = (opt['validation_every_n_epochs'] if opt['validation_every_n_epochs'] > 0 else float('inf')) # smart defaults for --validation-metric-mode if opt['validation_metric'] in {'loss', 'ppl', 'mean_rank'}: opt['validation_metric_mode'] = 'min' elif opt['validation_metric'] in { 'accuracy', 'hits@1', 'hits@5', 'f1', 'bleu' }: opt['validation_metric_mode'] = 'max' if opt.get('validation_metric_mode') is None: opt['validation_metric_mode'] = 'max' self.last_valid_epoch = 0 self.valid_optim = (1 if opt['validation_metric_mode'] == 'max' else -1) self.valid_reports = [] self.best_valid = None if (opt.get('model_file') and isfile(opt['model_file'] + '.best_valid')): with open(opt['model_file'] + ".best_valid", 'r') as f: x = f.readline() self.best_valid = float(x) f.close() self.impatience = 0 self.saved = False self.valid_world = None self.opt = opt # we may have been preempted, make sure we note that amount self._preempted_epochs = 0.0 if (opt.get('model_file') and isfile(opt['model_file'] + trainstats_suffix)): # looks like we were preempted. make sure we load up our total # training stats, etc with open(opt['model_file'] + trainstats_suffix) as ts: obj = json.load(ts) self._preempted_epochs = obj.get('total_epochs', 0) self.train_time.total = obj.get('train_time', 0) self.impatience = obj.get('impatience', 0) self.valid_reports = obj.get('valid_reports', []) if opt['tensorboard_log'] is True: self.writer = TensorboardLogger(opt)
"--episodes", type=int, default=2000, help="the number of episodes to run") parser.add_argument("-b", "--batch-size", type=int, default=50, help="the batch size") parser.add_argument("-t", "--target-dir", default="weights", help="the model weights will be saved to this directory") args = parser.parse_args() try: env = gym.make(args.env) except: print(f"Could not find env {args.env}") sys.exit(1) agent = agents.create_agent(args.agent, env) agent.train(args.episodes, args.batch_size) env.close() timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") file_name = f"{args.target_dir}/{args.agent}_{args.env}_{timestamp}.pt" torch.save(agent.state_dict(), file_name) print(f"Saved model weights to {file_name}")