def train_eval(train, targets): features = train.columns folds = KFold(n_splits=5, shuffle=True, random_state=1420) oof = np.zeros(len(train)) for fold_, (trn_idx, val_idx) in enumerate(folds.split(train.values, targets.values)): if fold_ > 0: break print("Fold {}".format(fold_)) #n=len(trn_idx) trn_data, trn_label = train.iloc[trn_idx][ features].values, targets.iloc[trn_idx].values val_data, val_label = train.iloc[val_idx][ features].values, targets.iloc[val_idx].values model = MLPModel(args) model.train(trn_data, trn_label, val_data, val_label) oof[val_idx] = model.evaluate(val_data, val_label) # fold_importance_df = pd.DataFrame() # fold_importance_df["Feature"] = features # fold_importance_df["importance"] = clf.feature_importance() # fold_importance_df["fold"] = fold_ + 1 # feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0) mae = mean_absolute_error(targets.iloc[val_idx], oof[val_idx]) mse = mean_squared_error(targets.iloc[val_idx], oof[val_idx]) logging.info('mae: {:<8.5f}'.format(mae)) logging.info('mse: {:<8.5f}'.format(mse))
def run_submission(self): self.debug_print( "Use the print function `self.debug_print(...)` for debugging purposes, do NOT use the default `print(...)`" ) # create model model = MLPModel(time_step=15, window_size=10, weights='model.hdf5') while (True): """ NOTE: Only one of (get_next_data_as_string, get_next_data_as_list, get_next_data_as_numpy_array) can be used to get the row of data, please refer to the `OVERVIEW OF DATA` section above. Uncomment the one that will be used, and comment the others. """ # data = self.get_next_data_as_list() data = self.get_next_data_as_numpy_array() # data = self.get_next_data_as_string() # prediction = self.get_prediction(data) prediction = model.predict(data) """ submit_prediction(prediction) MUST be used to submit your prediction for the current row of data """ self.submit_prediction(prediction)
def __init__(self, state_size: int, action_size: int, seed: int): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # Q-Network self.q_local = MLPModel(state_size, action_size, seed).to(device) self.q_target = MLPModel(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.q_local.parameters(), lr=LR) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def graph_builder(opts, observed=None, ground_truth=None, learning_rate=0.001, mode=util.Modes.TRAIN): # Build the neural network predictions = MLPModel(opts, mode=mode)(observed) # Loss loss = opts.loss_scaling * tf.cast(tf.losses.absolute_difference( ground_truth, predictions, reduction=tf.losses.Reduction.MEAN), dtype=getattr(tf, opts.dtypes[0])) # Error metric rmse_metric = util.exp_rmspe(ground_truth, predictions) if mode == util.Modes.TRAIN: # Training optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) # Wrap in a CrossReplica if we're replicating across multiple IPUs if opts.replication_factor > 1: optimizer = cross_replica_optimizer.CrossReplicaOptimizer( optimizer) # Batch norm variable update dependency update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # Op to calculate every variable gradient grads = tf.gradients(loss, tf.trainable_variables()) grads = list(zip(grads, tf.trainable_variables())) # Loss scaling grads = [(grad / opts.loss_scaling, var) for grad, var in grads] # Apply weight_decay directly to gradients if opts.weight_decay != 0: grads = [(grad + (opts.weight_decay * var), var) if 'l2tag' in var.name and 'kernel' in var.name else (grad, var) for grad, var in grads] # clip gradients if opts.gradient_clipping: grads = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in grads] # Op to update all variables according to their gradient apply_grads = optimizer.apply_gradients(grads_and_vars=grads) return loss / opts.loss_scaling, rmse_metric, apply_grads elif mode == util.Modes.VALID: return loss / opts.loss_scaling, rmse_metric, None
def main(_): tf.logging.set_verbosity(tf.logging.INFO) graph = tf.Graph() with graph.as_default(): env = gym.make("CartPole-v0") model = MLPModel(config=config.CartPoleConfig()) agent = Agent(model=model) logdir = "/tmp/cart_pole" if not os.path.isdir(logdir): os.makedirs(logdir) game = Game(env=env, agent=agent, logdir=logdir, should_render=True, should_load=FLAGS.load) game.train(500) game.play(50)
if not os.path.exists('./iter_num/' + args.model_name): os.makedirs('./iter_num/' + args.model_name) if not os.path.exists('./logs/' + args.model_name): os.makedirs('./logs/' + args.model_name) if not os.path.exists('./labels/' + args.model_name): os.makedirs('./labels/' + args.model_name) if not os.path.exists('./c/'): os.makedirs('./c/') dataset = Dataset(args) change_itr = range(8000, 100000, 4000) logger = Logger('./logs/' + args.model_name) if args.env_name == 'bimgame': model = ConvModel(3, args.num_subgoals, use_rnn=False).to(device) else: model = MLPModel(46, args.num_subgoals, use_rnn=False).to(device) start_itr = 0 c = [] if args.one_class: if args.pretrained_ckpt is not None: model.load_state_dict( torch.load('./ckpt/' + args.pretrained_ckpt + '.pkl')) start_itr = np.load('./iter_num/' + args.pretrained_ckpt + '.npy') c = torch.from_numpy( np.load('./c/' + args.pretrained_ckpt + '.npy')).float().to(device) # computing initial c for one-class out-of-set estimation if len(c) == 0: c = get_c(dataset, model, args)
def mlp_model_func(ob_space, ac_space): return MLPModel(ob_space, ac_space, ob_filter=True, gaussian_fixed_var=True)
class Agent: """Interacts with and learns from the environment.""" def __init__(self, state_size: int, action_size: int, seed: int): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # Q-Network self.q_local = MLPModel(state_size, action_size, seed).to(device) self.q_target = MLPModel(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.q_local.parameters(), lr=LR) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0 def step(self, state: np.ndarray, action: int, reward: float, next_state: np.ndarray, done: bool): """Save data and maybe train the model""" # Save experience in replay memory self.memory.add(state, action, reward, next_state, done) # Learn every UPDATE_EVERY time steps. self.t_step = (self.t_step + 1) % UPDATE_EVERY if self.t_step == 0: # If enough samples are available in memory, get random subset and learn if len(self.memory) > BATCH_SIZE: experiences = self.memory.sample() self.learn(experiences, GAMMA) def act(self, state: np.ndarray, eps: float = 0.) -> int: """Returns actions for given state as per current policy. Params ====== state (array_like): current state eps (float): epsilon, for epsilon-greedy action selection """ state = torch.from_numpy(state).float().unsqueeze(0).to(device) self.q_local.eval() with torch.no_grad(): action_values = self.q_local(state) self.q_local.train() # Epsilon-greedy action selection if random.random() > eps: action = np.argmax(action_values.cpu().data.numpy()) else: action = random.choice(np.arange(self.action_size)) return action def learn(self, experiences: Tuple[Tensor, ...], gamma: float): """Update value parameters using given batch of experience tuples. Params ====== experiences (Tuple[torch.Variable]): tuple of (s, a, r, s', done) tuples gamma (float): discount factor """ states, actions, rewards, next_states, dones = experiences # 64 x 8, 64 x 1, 64 x 1, 64 x 8, 64 x 1 q_current = self.q_local(states) # B x A = 64 x 4 v_current = torch.gather(q_current, dim=1, index=actions) q_next = self.q_target(next_states) q_max, _ = q_next.max(dim=1) q_max = q_max.view(-1, 1) loss = (rewards + gamma * q_max * (1 - dones) - v_current)**2 loss = loss.mean() self.optimizer.zero_grad() loss.backward() self.optimizer.step() # ------------------- update target network ------------------- # self.soft_update(TAU) def soft_update(self, tau: float): """Soft update model parameters. θ_target = τ*θ_local + (1 - τ)*θ_target Params ====== local_model (PyTorch model): weights will be copied from target_model (PyTorch model): weights will be copied to tau (float): interpolation parameter """ for target_param, local_param in zip(self.q_target.parameters(), self.q_local.parameters()): target_param.data.copy_(tau * local_param.data + (1.0 - tau) * target_param.data)