コード例 #1
0
    def action():
        ckeck_training_deamon()

        query = db.Query()

        # remove existing sessions
        db.sessions.remove(query.client_id == request.client_id)

        # remove existing samples
        db.samples.remove(query.client_id == request.client_id)

        # record the session started
        document_id = db.sessions.insert({
            'client_id': request.client_id,
            'start_time': datetime.now()
        })

        # create the agent
        agent = agents.create_agent(request.client_id, request.options,
                                    request.recycle_agent)

        return {
            "client_id": request.client_id,
            "document_id": document_id,
            "model": agent.get_model_base64(),
            "runtime_parameters": agent.runtime_parameters.__dict__
        }
コード例 #2
0
def main():
    args = parse_args()
    env = envs.create_env(args.domain, args.task, args.verbose)
    agent = agents.create_agent(args.model, env, args.verbose)

    if args.train:
        agent.train(env, args.save_model, args.verbose, args.display,
                    args.save_training_curve)
    elif args.eval:
        agent.eval(env, args.verbose, args.display)
コード例 #3
0
def main():
    name = "UDP Reinforcement Learning Congestion Control Client"
    parser = argparse.ArgumentParser(name)
    args = parse_args(parser)
    os.environ["EPISODES"] = str(args.episodes)
    if args.verbose:
        os.environ["VERBOSE"] = str(args.verbose)
    nb_observation_dim = 3
    history_horizon = 10
    nb_observation_space_dim = nb_observation_dim * history_horizon
    nb_actions = 11
    agents.create_agent(0, nb_observation_space_dim, nb_actions, args.train)
    agents.load_weights(0, "./", "weights.h5f")
    agents.reset(0, True)
    print("\n")

    if args.train:
        train_agent(args)
    else:
        run_episode(args)
コード例 #4
0
def main():
    args = TrainOptions().parse()
    device = torch.device('cuda') if (not args.no_cuda and torch.cuda.is_available()) else torch.device('cpu')
    if not os.path.exists(args.output):
        os.makedirs(args.output)

    env = create_env(args)

    network = create_network(args, env.action_space.n, env.observation_space.shape)
    network.to(device)
    optimizer = Adam(network.parameters(), lr=args.lr)

    policy = AnnealedEpsilonGreedyPolicy(epsilon_max=args.epsilon_max,
                                         epsilon_min=args.epsilon_min, exploration_steps=args.exp_steps)
    memory = SimpleExperienceReplay(max_size=args.mem_max, batch_size=args.batch_size)
    logger = Logger()

    agent = create_agent(args, env, network, policy, memory, optimizer, logger)

    # train agent
    agent.learn(n_episodes=args.n_ep, ep_max_step=args.ep_max_step, replay_start_size=args.replay_start,
                save_every=args.freq_save_model, update_target_every=args.freq_target_update, render_every=args.freq_render)
コード例 #5
0
ファイル: train.py プロジェクト: telin0411/dialogue-reinforce
    def __init__(self, opt):
        signal.signal(signal.SIGINT, signal.default_int_handler)

        if isinstance(opt, ParlaiParser):
            opt = opt.parse_args()
        # Possibly load from checkpoint
        trainstats_suffix = '.trainstats'
        if (opt.get('model_file')
                and isfile(opt['model_file'] + '.checkpoint')):
            opt['init_model'] = opt['model_file'] + '.checkpoint'
            trainstats_suffix = '.checkpoint.trainstats'
        else:
            pass
            # TODO for testing only
            # raise RuntimeError('WARNING: Reinforcement learning'
            #                    ' must be initialized by a model.checkpoint '
            #                    'file and {} does not exist.'.format(
            #                        opt['model_file'] + '.checkpoint'))
        # Possibly build a dictionary (not all models do this).
        if (opt['dict_build_first']
                and not (opt.get('dict_file') or opt.get('model_file'))):
            raise RuntimeError('WARNING: For train_model, '
                               'please specify either a '
                               'model_file or dict_file.')

        if opt['dict_build_first'] and 'dict_file' in opt:
            if opt.get('pytorch_teacher_task'):
                opt['dict_file'] = get_pyt_dict_file(opt)
            elif opt['dict_file'] is None and opt.get('model_file'):
                opt['dict_file'] = opt['model_file'] + '.dict'
            print("[ building dictionary first... ]")
            build_dict(opt, skip_if_built=True)

        # Create model and assign it to the specified task
        self.agent = create_agent(opt)

        # Freeze the model for the static dialogue partner
        static_agent = copy.deepcopy(self.agent)
        self.agent.id = ACTIVE

        static_agent.id = STATIC
        freeze_agent(static_agent)

        self.world = create_task(opt, self.agent, static_agent)

        # set up timers
        self.train_time = Timer()
        self.validate_time = Timer()
        self.log_time = Timer()
        self.save_time = Timer()
        print('[ training... ]')

        self.parleys = 0
        self.max_num_epochs = (opt['num_epochs']
                               if opt['num_epochs'] > 0 else float('inf'))

        self.max_train_time = (opt['max_train_time']
                               if opt['max_train_time'] > 0 else float('inf'))

        self.log_every_n_secs = (opt['log_every_n_secs'] if
                                 opt['log_every_n_secs'] > 0 else float('inf'))

        self.val_every_n_secs = (opt['validation_every_n_secs']
                                 if opt['validation_every_n_secs'] > 0 else
                                 float('inf'))

        self.save_every_n_secs = (opt['save_every_n_secs']
                                  if opt['save_every_n_secs'] > 0 else
                                  float('inf'))

        self.val_every_n_epochs = (opt['validation_every_n_epochs']
                                   if opt['validation_every_n_epochs'] > 0 else
                                   float('inf'))

        # smart defaults for --validation-metric-mode
        if opt['validation_metric'] in {'loss', 'ppl', 'mean_rank'}:
            opt['validation_metric_mode'] = 'min'
        elif opt['validation_metric'] in {
                'accuracy', 'hits@1', 'hits@5', 'f1', 'bleu'
        }:
            opt['validation_metric_mode'] = 'max'
        if opt.get('validation_metric_mode') is None:
            opt['validation_metric_mode'] = 'max'

        self.last_valid_epoch = 0
        self.valid_optim = (1
                            if opt['validation_metric_mode'] == 'max' else -1)
        self.valid_reports = []
        self.best_valid = None
        if (opt.get('model_file')
                and isfile(opt['model_file'] + '.best_valid')):
            with open(opt['model_file'] + ".best_valid", 'r') as f:
                x = f.readline()
                self.best_valid = float(x)
                f.close()
        self.impatience = 0
        self.saved = False
        self.valid_world = None
        self.opt = opt

        # we may have been preempted, make sure we note that amount
        self._preempted_epochs = 0.0
        if (opt.get('model_file')
                and isfile(opt['model_file'] + trainstats_suffix)):
            # looks like we were preempted. make sure we load up our total
            # training stats, etc
            with open(opt['model_file'] + trainstats_suffix) as ts:
                obj = json.load(ts)
                self._preempted_epochs = obj.get('total_epochs', 0)
                self.train_time.total = obj.get('train_time', 0)
                self.impatience = obj.get('impatience', 0)
                self.valid_reports = obj.get('valid_reports', [])

        if opt['tensorboard_log'] is True:
            self.writer = TensorboardLogger(opt)
コード例 #6
0
                    "--episodes",
                    type=int,
                    default=2000,
                    help="the number of episodes to run")
parser.add_argument("-b",
                    "--batch-size",
                    type=int,
                    default=50,
                    help="the batch size")
parser.add_argument("-t",
                    "--target-dir",
                    default="weights",
                    help="the model weights will be saved to this directory")
args = parser.parse_args()

try:
    env = gym.make(args.env)
except:
    print(f"Could not find env {args.env}")
    sys.exit(1)

agent = agents.create_agent(args.agent, env)

agent.train(args.episodes, args.batch_size)
env.close()

timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
file_name = f"{args.target_dir}/{args.agent}_{args.env}_{timestamp}.pt"
torch.save(agent.state_dict(), file_name)
print(f"Saved model weights to {file_name}")