Beispiel #1
0
    def __init__(self, env_name, policy, n_trajectories, trajectory_len, state_filter=None,
                **env_args):
        self.env = np.asarray([gym.make(env_name, **env_args) for i in range(n_trajectories)])

        for env in self.env:
            env._max_episode_steps = trajectory_len

        self.action_space = self.env[0].action_space
        self.policy = policy
        self.n_trajectories = n_trajectories
        self.trajectory_len = trajectory_len
        self.state_filter = state_filter
        self.device = get_device()
Beispiel #2
0
def build_detector(name: str = "DSFDDetector",
                   confidence_threshold: float = 0.5,
                   nms_iou_threshold: float = 0.3,
                   device=get_device(),
                   max_resolution: int = None) -> Detector:
    assert name in available_detectors,\
        f"Detector not available. Chooce one of the following"+\
        ",".join(available_detectors)
    args = dict(type=name,
                confidence_threshold=confidence_threshold,
                nms_iou_threshold=nms_iou_threshold,
                device=device,
                max_resolution=max_resolution)
    detector = build_from_cfg(args, DETECTOR_REGISTRY)
    return detector
def cg_solver(Avp_fun, b, max_iter=10):
    '''
    Finds an approximate solution to a set of linear equations Ax = b

    Parameters
    ----------
    Avp_fun : callable
        a function that right multiplies a matrix A by a vector

    b : torch.FloatTensor
        the right hand term in the set of linear equations Ax = b

    max_iter : int
        the maximum number of iterations (default is 10)

    Returns
    -------
    x : torch.FloatTensor
        the approximate solution to the system of equations defined by Avp_fun
        and b
    '''

    device = get_device()
    x = torch.zeros_like(b).to(device)
    r = b.clone()
    p = b.clone()

    for i in range(max_iter):
        Avp = Avp_fun(p, retain_graph=True)

        alpha = torch.matmul(r, r) / torch.matmul(p, Avp)
        x += alpha * p

        if i == max_iter - 1:
            return x

        r_new = r - alpha * Avp
        beta = torch.matmul(r_new, r_new) / torch.matmul(r, r)
        r = r_new
        p = r + beta * p
Beispiel #4
0
    def __init__(self,
                 policy,
                 value_fun,
                 simulator,
                 max_kl_div=0.01,
                 max_value_step=0.01,
                 vf_iters=1,
                 vf_l2_reg_coef=1e-3,
                 discount=0.995,
                 lam=0.98,
                 cg_damping=1e-3,
                 cg_max_iters=10,
                 line_search_coef=0.9,
                 line_search_max_iter=10,
                 line_search_accept_ratio=0.1,
                 model_name=None,
                 continue_from_file=False,
                 save_every=1):
        '''
        Parameters
        ----------

        policy : torch.nn.Sequential
            the policy to be optimized

        value_fun : torch.nn.Sequential
            the value function to be optimized and used when calculating the advantages

        simulator : Simulator
            the simulator to be used when generating training experiences

        max_kl_div : float
            the maximum kl divergence of the policy before and after each step
            (default is 0.01)

        max_value_step : float
            the learning rate for the value function (default is 0.01)

        vf_iters : int
            the number of times to optimize the value function over each set of
            training experiences (default is 1)

        vf_l2_reg_coef : float
            the regularization term when calculating the L2 loss of the value function
            (default is 0.001)

        discount : float
            the coefficient to use when discounting the rewards (discount is 0.995)

        lam : float
            the bias reduction parameter to use when calculating advantages using GAE
            (default is 0.98)

        cg_damping : float
            the multiple of the identity matrix to add to the Hessian when calculating
            Hessian-vector products (default is 0.001)

        cg_max_iters : int
            the maximum number of iterations to use when solving for the optimal
            search direction using the conjugate gradient method (default is 10)

        line_search_coef : float
            the proportion by which to reduce the step length on each iteration of
            the line search (default is 0.9)

        line_search_max_iters : int
            the maximum number of line search iterations before returning 0.0 as the
            step length (default is 10)

        line_search_accept_ratio : float
            the minimum proportion of error to accept from linear extrapolation when
            doing the line search (default is 0.1)

        model_name : str
            an identifier for the model to be used when generating filepath names
            (default is None)

        continue_from_file : bool
            whether to continue training from a previous saved session (default is False)

        save_every : int
            the number of training iterations to go between saving the training session
            (default is 1)
        '''

        self.policy = policy
        self.value_fun = value_fun
        self.simulator = simulator
        self.max_kl_div = max_kl_div
        self.max_value_step = max_value_step
        self.vf_iters = vf_iters
        self.vf_l2_reg_coef = vf_l2_reg_coef
        self.discount = discount
        self.lam = lam
        self.cg_damping = cg_damping
        self.cg_max_iters = cg_max_iters
        self.line_search_coef = line_search_coef
        self.line_search_max_iter = line_search_max_iter
        self.line_search_accept_ratio = line_search_accept_ratio
        self.mse_loss = MSELoss(reduction='mean')
        self.value_optimizer = LBFGS(self.value_fun.parameters(),
                                     lr=max_value_step,
                                     max_iter=25)
        self.model_name = model_name
        self.continue_from_file = continue_from_file
        self.save_every = save_every
        self.episode_num = 0
        self.elapsed_time = timedelta(0)
        self.device = get_device()
        self.mean_rewards = []

        if not model_name and continue_from_file:
            raise Exception('Argument continue_from_file to __init__ method of ' \
                            'TRPO case was set to True but model_name was not ' \
                            'specified.')

        if not model_name and save_every:
            raise Exception('Argument save_every to __init__ method of TRPO ' \
                            'was set to a value greater than 0 but model_name ' \
                            'was not specified.')

        if continue_from_file:
            self.load_session()
Beispiel #5
0
parser.add_argument('--model-name', type=str, dest='model_name', required=True,
                    help='The entry in trpo_experiments.yaml from which settings' \
                    'should be loaded.')
parser.add_argument('--simulator', dest='simulator_type', type=str, default='single-path',
                    choices=['single-path', 'vine'], help='The type of simulator' \
                    ' to use when collecting training experiences.')

args = parser.parse_args()
continue_from_file = args.continue_from_file
model_name = args.model_name
simulator_type = args.simulator_type

all_configs = load(open(config_filename, 'r'), Loader=yaml.FullLoader)
config = all_configs[model_name]

device = get_device()

# Find the input size, hidden dim sizes, and output size
env_name = config['env_name']
# env = gym.make(env_name)
data = read_file('./data/train.csv')
embeddings = Embeddings(read_embeddings('./data/embeddings.csv'))
env = gym.make(env_name,
               data=data,
               embeddings=embeddings,
               alpha=0.5,
               gamma=0.9,
               fixed_length=True,
               trajectory_length=5)
action_space = env.action_space
observation_space = env.observation_space