def init_algorithm_params(self): '''Initialize other algorithm parameters''' # set default util.set_attr(self, dict( action_pdtype='default', action_policy='default', )) util.set_attr(self, self.algorithm_spec, [ 'policy_name', 'action_pdtype', 'action_policy', ]) self.action_policy = getattr(policy_util, self.action_policy) self.policy = None if 'word_policy' in self.algorithm_spec: params = deepcopy(ps.get(self.algorithm_spec, 'word_policy')) PolicyClass = getattr(word_policy, params.pop('name')) elif 'e2e' in self.algorithm_spec: params = deepcopy(ps.get(self.algorithm_spec, 'e2e')) PolicyClass = getattr(e2e, params.pop('name')) else: params = deepcopy(ps.get(self.algorithm_spec, 'policy')) PolicyClass = getattr(policy, params.pop('name')) self.policy = PolicyClass(**params)
def init_algorithm_params(self): '''Initialize other algorithm parameters.''' # set default util.set_attr( self, dict( action_pdtype='default', action_policy='default', explore_var_spec=None, )) util.set_attr( self, self.algorithm_spec, [ 'action_pdtype', 'action_policy', # explore_var is epsilon, tau or etc. depending on the action policy # these control the trade off between exploration and exploitaton 'explore_var_spec', 'gamma', # the discount factor 'training_frequency', # how often to train for batch training (once each training_frequency time steps) ]) self.to_train = 0 self.action_policy = getattr(policy_util, self.action_policy) self.explore_var_scheduler = policy_util.VarScheduler( self.explore_var_spec) self.body.explore_var = self.explore_var_scheduler.start_val
def init_algorithm_params(self): '''Initialize other algorithm parameters''' # set default util.set_attr( self, dict( action_pdtype='default', action_policy='default', explore_var_spec=None, entropy_coef_spec=None, policy_loss_coef=1.0, val_loss_coef=1.0, )) util.set_attr( self, self.algorithm_spec, [ 'action_pdtype', 'action_policy', # theoretically, AC does not have policy update; but in this implementation we have such option 'explore_var_spec', 'gamma', # the discount factor 'lam', 'num_step_returns', 'entropy_coef_spec', 'policy_loss_coef', 'val_loss_coef', 'sil_policy_loss_coef', 'sil_val_loss_coef', 'training_frequency', 'training_batch_iter', 'training_iter', ]) super().init_algorithm_params()
def init_algorithm_params(self): # set default util.set_attr( self, dict( action_pdtype='Argmax', action_policy='epsilon_greedy', explore_var_spec=None, )) util.set_attr( self, self.algorithm_spec, [ 'action_pdtype', 'action_policy', # explore_var is epsilon, tau or etc. depending on the action policy # these control the trade off between exploration and exploitaton 'explore_var_spec', 'gamma', # the discount factor 'training_batch_iter', # how many gradient updates per batch 'training_iter', # how many batches to train each time 'training_frequency', # how often to train (once a few timesteps) 'training_start_step', # how long before starting training ]) super().init_algorithm_params()
def init_algorithm_params(self): '''Initialize other algorithm parameters''' # set default util.set_attr(self, dict( action_pdtype='default', action_policy='default', explore_var_spec=None, entropy_coef_spec=None, policy_loss_coef=1.0, )) util.set_attr(self, self.algorithm_spec, [ 'action_pdtype', 'action_policy', # theoretically, REINFORCE does not have policy update; but in this implementation we have such option 'explore_var_spec', 'gamma', # the discount factor 'entropy_coef_spec', 'policy_loss_coef', 'training_frequency', ]) self.to_train = 0 self.action_policy = getattr(policy_util, self.action_policy) self.explore_var_scheduler = policy_util.VarScheduler(self.explore_var_spec) self.body.explore_var = self.explore_var_scheduler.start_val if self.entropy_coef_spec is not None: self.entropy_coef_scheduler = policy_util.VarScheduler(self.entropy_coef_spec) self.body.entropy_coef = self.entropy_coef_scheduler.start_val
def __init__(self, net_spec, in_dim, out_dim): assert len(in_dim) == 3 # image shape (c,w,h) nn.Module.__init__(self) Net.__init__(self, net_spec, in_dim, out_dim) # set default util.set_attr(self, dict( init_fn=None, normalize=False, batch_norm=False, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'conv_hid_layers', 'fc_hid_layers', 'hid_layers_activation', 'init_fn', 'normalize', 'batch_norm', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # Guard against inappropriate algorithms and environments assert isinstance(out_dim, int) # conv body self.conv_model = self.build_conv_layers(self.conv_hid_layers) self.conv_out_dim = self.get_conv_output_size() # fc body if ps.is_empty(self.fc_hid_layers): tail_in_dim = self.conv_out_dim else: # fc layer from flattened conv self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers, self.hid_layers_activation) tail_in_dim = self.fc_hid_layers[-1] # tails. avoid list for single-tail for compute speed self.v = nn.Linear(tail_in_dim, 1) # state value self.adv = nn.Linear(tail_in_dim, out_dim) # action dependent raw advantage self.model_tails = nn.ModuleList(self.v, self.adv) net_util.init_layers(self, self.init_fn) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.to(self.device) self.train()
def __init__(self, agent, global_nets=None): super().__init__(agent, global_nets) util.set_attr(self, self.algorithm_spec, [ 'warmup_epi', ]) # create the extra replay memory for warm-up MemoryClass = getattr(memory, self.memory_spec['warmup_name']) self.body.warmup_memory = MemoryClass(self.memory_spec, self.body)
def set_global_nets(algorithm, global_nets): '''For Hogwild, set attr built in init_global_nets above. Use in algorithm init.''' # set attr first so algorithm always has self.global_{net} to pass into train_step for net_name in algorithm.net_names: setattr(algorithm, f'global_{net_name}', None) # set attr created in init_global_nets if global_nets is not None: util.set_attr(algorithm, global_nets) logger.info(f'Set global_nets attr {list(global_nets.keys())} for Hogwild')
def __init__(self, memory_spec, body): super().__init__(memory_spec, body) # NOTE for OnPolicy replay, frequency = episode; for other classes below frequency = frames util.set_attr(self, self.body.agent.agent_spec['algorithm'], ['training_frequency']) # Don't want total experiences reset when memory is self.is_episodic = True self.size = 0 # total experiences stored self.seen_size = 0 # total experiences seen cumulatively # declare what data keys to store self.data_keys = ['states', 'actions', 'rewards', 'next_states', 'dones'] self.reset()
def __init__(self, agent, global_nets=None): super().__init__(agent, global_nets) util.set_attr(self, self.algorithm_spec, [ 'warmup_epi', ]) # create the extra replay memory for warm-up MemoryClass = getattr(memory, self.memory_spec['warmup_name']) self.body.warmup_memory = MemoryClass(self.memory_spec, self.body) if self.memory_spec['warmup_memory_path'] != '': import pickle self.body.warmup_memory = pickle.load( open(self.memory_spec['warmup_memory_path'], 'rb'))
def __init__(self, var_decay_spec=None): self._updater_name = 'no_decay' if var_decay_spec is None else var_decay_spec[ 'name'] self._updater = getattr(math_util, self._updater_name) util.set_attr(self, dict(start_val=np.nan, )) util.set_attr(self, var_decay_spec, [ 'start_val', 'end_val', 'start_step', 'end_step', ]) if not getattr(self, 'end_val', None): self.end_val = self.start_val
def __init__(self, spec, e=None): super(MultiWozEnv, self).__init__(spec, e) self.action_dim = self.observation_dim = 0 util.set_attr(self, self.env_spec, [ 'observation_dim', 'action_dim', ]) worker_id = int(f'{os.getpid()}{self.e+int(ps.unique_id())}'[-4:]) self.u_env = MultiWozEnvironment(self.env_spec, worker_id, self.action_dim) self.evaluator = self.u_env.evaluator self.patch_gym_spaces(self.u_env) self._set_attr_from_u_env(self.u_env) logger.info(util.self_desc(self))
def init_algorithm_params(self): '''Initialize other algorithm parameters''' # set default util.set_attr( self, dict( action_pdtype='default', action_policy='default', explore_var_spec=None, entropy_coef_spec=None, policy_loss_coef=1.0, val_loss_coef=1.0, )) util.set_attr( self, self.algorithm_spec, [ 'action_pdtype', 'action_policy', # theoretically, AC does not have policy update; but in this implementation we have such option 'explore_var_spec', 'gamma', # the discount factor 'lam', 'num_step_returns', 'entropy_coef_spec', 'policy_loss_coef', 'val_loss_coef', 'training_frequency', ]) self.to_train = 0 self.action_policy = getattr(policy_util, self.action_policy) self.explore_var_scheduler = policy_util.VarScheduler( self.explore_var_spec) self.body.explore_var = self.explore_var_scheduler.start_val if self.entropy_coef_spec is not None: self.entropy_coef_scheduler = policy_util.VarScheduler( self.entropy_coef_spec) self.body.entropy_coef = self.entropy_coef_scheduler.start_val # Select appropriate methods to calculate advs and v_targets for training if self.lam is not None: self.calc_advs_v_targets = self.calc_gae_advs_v_targets elif self.num_step_returns is not None: self.calc_advs_v_targets = self.calc_nstep_advs_v_targets else: self.calc_advs_v_targets = self.calc_ret_advs_v_targets
def __init__(self, memory_spec, body): util.set_attr(self, memory_spec, [ 'alpha', 'epsilon', 'batch_size', 'max_size', 'use_cer', ]) super().__init__(memory_spec, body) self.epsilon = np.full((1, ), self.epsilon) self.alpha = np.full((1, ), self.alpha) # adds a 'priorities' scalar to the data_keys and call reset again self.data_keys = [ 'states', 'actions', 'rewards', 'next_states', 'dones', 'priorities' ] self.reset()
def __init__(self, memory_spec, body): super().__init__(memory_spec, body) util.set_attr(self, self.memory_spec, [ 'batch_size', 'max_size', 'use_cer', ]) self.is_episodic = False self.batch_idxs = None self.size = 0 # total experiences stored self.seen_size = 0 # total experiences seen cumulatively self.head = -1 # index of most recent experience # generic next_state buffer to store last next_states (allow for multiple for venv) # self.ns_idx_offset = self.body.env.num_envs if body.env.is_venv else 1 # self.ns_buffer = deque(maxlen=self.ns_idx_offset) # declare what data keys to store self.data_keys = ['states', 'actions', 'rewards', 'next_states', 'dones'] self.reset()
def __init__(self, net_spec, in_dim, out_dim): nn.Module.__init__(self) Net.__init__(self, net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( init_fn=None, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'shared', 'hid_layers', 'hid_layers_activation', 'init_fn', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # Guard against inappropriate algorithms and environments # Build model body dims = [self.in_dim] + self.hid_layers self.model_body = net_util.build_fc_model(dims, self.hid_layers_activation) # output layers self.v = nn.Linear(dims[-1], 1) # state value self.adv = nn.Linear(dims[-1], out_dim) # action dependent raw advantage net_util.init_layers(self, self.init_fn) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.to(self.device)
def init_algorithm_params(self): '''Initialize other algorithm parameters''' # set default util.set_attr( self, dict( action_pdtype='default', action_policy='default', explore_var_spec=None, entropy_coef_spec=None, minibatch_size=4, val_loss_coef=1.0, )) util.set_attr( self, self.algorithm_spec, [ 'action_pdtype', 'action_policy', # theoretically, PPO does not have policy update; but in this implementation we have such option 'explore_var_spec', 'gamma', 'lam', 'clip_eps_spec', 'entropy_coef_spec', 'val_loss_coef', 'minibatch_size', 'training_frequency', # horizon 'training_epoch', ]) self.to_train = 0 self.action_policy = getattr(policy_util, self.action_policy) self.explore_var_scheduler = policy_util.VarScheduler( self.explore_var_spec) self.body.explore_var = self.explore_var_scheduler.start_val # extra variable decays for PPO self.clip_eps_scheduler = policy_util.VarScheduler(self.clip_eps_spec) self.body.clip_eps = self.clip_eps_scheduler.start_val if self.entropy_coef_spec is not None: self.entropy_coef_scheduler = policy_util.VarScheduler( self.entropy_coef_spec) self.body.entropy_coef = self.entropy_coef_scheduler.start_val # PPO uses GAE self.calc_advs_v_targets = self.calc_gae_advs_v_targets
def __init__(self, spec, e=None): self.e = e or 0 # for multi-env self.done = False self.env_spec = spec['env'][self.e] # set default util.set_attr( self, dict( log_frequency=None, # default to log at epi done frame_op=None, frame_op_len=None, normalize_state=False, reward_scale=None, num_envs=None, )) util.set_attr(self, spec['meta'], [ 'log_frequency', 'eval_frequency', ]) util.set_attr(self, self.env_spec, [ 'name', 'frame_op', 'frame_op_len', 'normalize_state', 'reward_scale', 'num_envs', 'max_t', 'max_frame', ]) seq_len = ps.get(spec, 'agent.0.net.seq_len') if seq_len is not None: # infer if using RNN self.frame_op = 'stack' self.frame_op_len = seq_len if util.in_eval_lab_modes(): # use singleton for eval self.num_envs = 1 self.log_frequency = None if spec['meta'][ 'distributed'] != False: # divide max_frame for distributed self.max_frame = int(self.max_frame / spec['meta']['max_session']) self.is_venv = (self.num_envs is not None and self.num_envs > 1) if self.is_venv: assert self.log_frequency is not None, f'Specify log_frequency when using venv' self.clock_speed = 1 * ( self.num_envs or 1 ) # tick with a multiple of num_envs to properly count frames self.clock = Clock(self.max_frame, self.clock_speed) self.to_render = util.to_render()
def __init__(self, net_spec, in_dim, out_dim): ''' net_spec: cell_type: any of RNN, LSTM, GRU fc_hid_layers: list of fc layers preceeding the RNN layers hid_layers_activation: activation function for the fc hidden layers out_layer_activation: activation function for the output layer, same shape as out_dim rnn_hidden_size: rnn hidden_size rnn_num_layers: number of recurrent layers bidirectional: if RNN should be bidirectional seq_len: length of the history of being passed to the net init_fn: weight initialization function clip_grad_val: clip gradient norm if value is not None loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_scheduler_spec: Pytorch optim.lr_scheduler update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing ''' nn.Module.__init__(self) super().__init__(net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( out_layer_activation=None, cell_type='GRU', rnn_num_layers=1, bidirectional=False, init_fn=None, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'cell_type', 'fc_hid_layers', 'hid_layers_activation', 'out_layer_activation', 'rnn_hidden_size', 'rnn_num_layers', 'bidirectional', 'seq_len', 'init_fn', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # restore proper in_dim from env stacked state_dim (stack_len, *raw_state_dim) self.in_dim = in_dim[1:] if len(in_dim) > 2 else in_dim[1] # fc body: state processing model if ps.is_empty(self.fc_hid_layers): self.rnn_input_dim = self.in_dim else: fc_dims = [self.in_dim] + self.fc_hid_layers self.fc_model = net_util.build_fc_model(fc_dims, self.hid_layers_activation) self.rnn_input_dim = fc_dims[-1] # RNN model self.rnn_model = getattr(nn, net_util.get_nn_name(self.cell_type))( input_size=self.rnn_input_dim, hidden_size=self.rnn_hidden_size, num_layers=self.rnn_num_layers, batch_first=True, bidirectional=self.bidirectional) # tails. avoid list for single-tail for compute speed if ps.is_integer(self.out_dim): self.model_tail = net_util.build_fc_model( [self.rnn_hidden_size, self.out_dim], self.out_layer_activation) else: if not ps.is_list(self.out_layer_activation): self.out_layer_activation = [self.out_layer_activation ] * len(out_dim) assert len(self.out_layer_activation) == len(self.out_dim) tails = [] for out_d, out_activ in zip(self.out_dim, self.out_layer_activation): tail = net_util.build_fc_model([self.rnn_hidden_size, out_d], out_activ) tails.append(tail) self.model_tails = nn.ModuleList(tails) net_util.init_layers(self, self.init_fn) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.to(self.device) self.train()
def __init__(self, net_spec, in_dim, out_dim): ''' Multi state processing heads, single shared body, and multi action tails. There is one state and action head per body/environment Example: env 1 state env 2 state _______|______ _______|______ | head 1 | | head 2 | |______________| |______________| | | |__________________| ________________|_______________ | Shared body | |________________________________| | ________|_______ | | _______|______ ______|_______ | tail 1 | | tail 2 | |______________| |______________| | | env 1 action env 2 action ''' nn.Module.__init__(self) super().__init__(net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( out_layer_activation=None, init_fn=None, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', 'out_layer_activation', 'init_fn', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) assert len( self.hid_layers ) == 3, 'Your hidden layers must specify [*heads], [body], [*tails]. If not, use MLPNet' assert isinstance(self.in_dim, list), 'Hydra network needs in_dim as list' assert isinstance(self.out_dim, list), 'Hydra network needs out_dim as list' self.head_hid_layers = self.hid_layers[0] self.body_hid_layers = self.hid_layers[1] self.tail_hid_layers = self.hid_layers[2] if len(self.head_hid_layers) == 1: self.head_hid_layers = self.head_hid_layers * len(self.in_dim) if len(self.tail_hid_layers) == 1: self.tail_hid_layers = self.tail_hid_layers * len(self.out_dim) self.model_heads = self.build_model_heads(in_dim) heads_out_dim = np.sum( [head_hid_layers[-1] for head_hid_layers in self.head_hid_layers]) dims = [heads_out_dim] + self.body_hid_layers self.model_body = net_util.build_fc_model(dims, self.hid_layers_activation) self.model_tails = self.build_model_tails(self.out_dim, self.out_layer_activation) net_util.init_layers(self, self.init_fn) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.to(self.device) self.train()
def __init__(self, net_spec, in_dim, out_dim): ''' net_spec: hid_layers: list containing dimensions of the hidden layers hid_layers_activation: activation function for the hidden layers out_layer_activation: activation function for the output layer, same shape as out_dim init_fn: weight initialization function clip_grad_val: clip gradient norm if value is not None loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_scheduler_spec: Pytorch optim.lr_scheduler update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing ''' nn.Module.__init__(self) super().__init__(net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( out_layer_activation=None, init_fn=None, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'shared', 'hid_layers', 'hid_layers_activation', 'out_layer_activation', 'init_fn', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) dims = [self.in_dim] + self.hid_layers self.model = net_util.build_fc_model(dims, self.hid_layers_activation) # add last layer with no activation # tails. avoid list for single-tail for compute speed if ps.is_integer(self.out_dim): self.model_tail = net_util.build_fc_model( [dims[-1], self.out_dim], self.out_layer_activation) else: if not ps.is_list(self.out_layer_activation): self.out_layer_activation = [self.out_layer_activation ] * len(out_dim) assert len(self.out_layer_activation) == len(self.out_dim) tails = [] for out_d, out_activ in zip(self.out_dim, self.out_layer_activation): tail = net_util.build_fc_model([dims[-1], out_d], out_activ) tails.append(tail) self.model_tails = nn.ModuleList(tails) net_util.init_layers(self, self.init_fn) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.to(self.device) self.train()
def __init__(self, agent, global_nets=None): super().__init__(agent, global_nets) util.set_attr(self, self.algorithm_spec, [ 'warmup_epi', ])
def __init__(self, net_spec, in_dim, out_dim): ''' net_spec: conv_hid_layers: list containing dimensions of the convolutional hidden layers, each is a list representing hid_layer = out_d, kernel, stride, padding, dilation. Asssumed to all come before the flat layers. Note: a convolutional layer should specify the in_channel, out_channels, kernel_size, stride (of kernel steps), padding, and dilation (spacing between kernel points) E.g. [3, 16, (5, 5), 1, 0, (2, 2)] For more details, see http://pytorch.org/docs/master/nn.html#conv2d and https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md fc_hid_layers: list of fc layers following the convolutional layers hid_layers_activation: activation function for the hidden layers out_layer_activation: activation function for the output layer, same shape as out_dim init_fn: weight initialization function normalize: whether to divide by 255.0 to normalize image input batch_norm: whether to add batch normalization after each convolutional layer, excluding the input layer. clip_grad_val: clip gradient norm if value is not None loss_spec: measure of error between model predictions and correct outputs optim_spec: parameters for initializing the optimizer lr_scheduler_spec: Pytorch optim.lr_scheduler update_type: method to update network weights: 'replace' or 'polyak' update_frequency: how many total timesteps per update polyak_coef: ratio of polyak weight update gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing ''' assert len(in_dim) == 3 # image shape (c,w,h) nn.Module.__init__(self) super().__init__(net_spec, in_dim, out_dim) # set default util.set_attr( self, dict( out_layer_activation=None, init_fn=None, normalize=False, batch_norm=True, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, optim_spec={'name': 'Adam'}, lr_scheduler_spec=None, update_type='replace', update_frequency=1, polyak_coef=0.0, gpu=False, )) util.set_attr(self, self.net_spec, [ 'conv_hid_layers', 'fc_hid_layers', 'hid_layers_activation', 'out_layer_activation', 'init_fn', 'normalize', 'batch_norm', 'clip_grad_val', 'loss_spec', 'optim_spec', 'lr_scheduler_spec', 'update_type', 'update_frequency', 'polyak_coef', 'gpu', ]) # conv body self.conv_model = self.build_conv_layers(self.conv_hid_layers) self.conv_out_dim = self.get_conv_output_size() # fc body if ps.is_empty(self.fc_hid_layers): tail_in_dim = self.conv_out_dim else: # fc body from flattened conv self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers, self.hid_layers_activation) tail_in_dim = self.fc_hid_layers[-1] # tails. avoid list for single-tail for compute speed if ps.is_integer(self.out_dim): self.model_tail = net_util.build_fc_model( [tail_in_dim, self.out_dim], self.out_layer_activation) else: if not ps.is_list(self.out_layer_activation): self.out_layer_activation = [self.out_layer_activation ] * len(out_dim) assert len(self.out_layer_activation) == len(self.out_dim) tails = [] for out_d, out_activ in zip(self.out_dim, self.out_layer_activation): tail = net_util.build_fc_model([tail_in_dim, out_d], out_activ) tails.append(tail) self.model_tails = nn.ModuleList(tails) net_util.init_layers(self, self.init_fn) self.loss_fn = net_util.get_loss_fn(self, self.loss_spec) self.to(self.device) self.train()