def generate_grid_data(path, N=30, M=30, num_env=10000, traj_per_env=5, Pmove_succ=1.0, Pobs_succ=1.0): """ :param path: path for data file. use separate folders for training and test data :param N: grid rows :param M: grid columnts :param num_env: number of environments in the dataset (grids) :param traj_per_env: number of trajectories per environment (different initial state, goal, initial belief) :param Pmove_succ: probability of transition succeeding, otherwise stays in place :param Pobs_succ: probability of correct observation, independent in each direction """ params = dotdict({ 'grid_n': N, 'grid_m': M, 'Pobst': 0.25, # probability of obstacles in random grid 'R_obst': -10, 'R_goal': 20, 'R_step': -0.1, 'discount': 0.99, 'Pmove_succ': Pmove_succ, 'Pobs_succ': Pobs_succ, 'num_action': 5, 'moves': [[0, 1], [1, 0], [0, -1], [-1, 0], [0, 0]], # right, down, left, up, stay 'stayaction': 4, 'num_obs': 16, 'observe_directions': [[0, 1], [1, 0], [0, -1], [-1, 0]], }) params['obs_len'] = len(params['observe_directions']) params['num_state'] = params['grid_n'] * params['grid_m'] params['traj_limit'] = 4 * (params['grid_n'] + params['grid_m']) params['R_step'] = [params['R_step']] * params['num_action'] # save params if not os.path.isdir(path): os.mkdir(path) pickle.dump(dict(params), open(path + "/params.pickle", 'w'), -1) # randomize seeds, set to previous value to determinize random numbers np.random.seed() random.seed() # grid domain object domain = GridBase(params) # make database file db = GridBase.create_db(path + "data.hdf5", params, num_env, traj_per_env) for env_i in range(num_env): print("Generating env %d with %d trajectories " % (env_i, traj_per_env)) domain.generate_trajectories(db, num_traj=traj_per_env) print("Done.")
def parse_args(arglist): parser = argparse.ArgumentParser(description='Run training on gridworld') parser.add_argument( 'path', help='Path to data folder containing train and test subfolders') parser.add_argument('--logpath', default='./log/', help='Path to save log and trained model') parser.add_argument('--loadmodel', nargs='*', help='Load model weights from checkpoint') parser.add_argument( '--eval_samples', type=int, default=100, help='Number of samples to evaluate the learned policy on') parser.add_argument( '--eval_repeats', type=int, default=1, help= 'Repeat simulating policy for a given number of times. Use 5 for stochastic domains' ) parser.add_argument('--batch_size', type=int, default=100, help='Size of minibatches for training') parser.add_argument( '--training_envs', type=float, default=0.9, help= 'Proportion of training data used for trianing. Remainder will be used for validation' ) parser.add_argument( '--step_size', type=int, default=4, help='Number of maximum steps for backpropagation through time') parser.add_argument('--lim_traj_len', type=int, default=100, help='Clip trajectories to a maximum length') parser.add_argument( '--includefailed', action='store_true', help= 'Include unsuccessful demonstrations in the training and validation set.' ) parser.add_argument('--learning_rate', type=float, default=0.001, help='Initial learning rate') parser.add_argument( '--patience_first', type=int, default=30, help= 'Start decaying learning rate if no improvement for a given number of steps' ) parser.add_argument('--patience_rest', type=int, default=5, help='Patience after decay started') parser.add_argument('--decaystep', type=int, default=15, help='Total number of learning rate decay steps') parser.add_argument('--epochs', type=int, default=1000, help='Maximum number of epochs') parser.add_argument( '--cache', nargs='*', default=['steps', 'envs', 'bs'], help='Cache nodes from pytable dataset. Default: steps, envs, bs') parser.add_argument( '-K', '--K', type=int, default=-1, help= 'Number of iterations of value iteration in QMDPNet. Compute from grid size if negative.' ) args = parser.parse_args(args=arglist) # load domain parameters params = dotdict( pickle.load(open(os.path.join(args.path, 'train/params.pickle'), 'rb'))) # set default K if args.K < 0: args.K = 3 * params.grid_n # combine all parameters to a single dotdict for key in vars(args): params[key] = getattr(args, key) return params
import sys import numpy as np import torch from utils.dotdict import dotdict from AmazonNet import AmazonNet as annet from utils.bar import Bar import torch.optim as optim from utils.AverageMeter import AverageMeter import torch.nn.functional as f sys.path.append('../../') args = dotdict({ 'lr': 0.001, # 学习率or步长 'dropout': 0.3, # dropout率 'epochs': 10, # 每次新传入数据后神经网络的训练次数 'batch_size': 64, # 'cuda': torch.cuda.is_available(), 'num_channels': 512, # 通道数 }) class NNet: """ 神经网络训练类 """ def __init__(self, game): self.board_size = game.board_size self.nnet = annet(game, args) self.board_x, self.board_y = game.get_board_size() self.action_size = game.get_action_size()
def parse_logfile(filenames, source='real', model='lin1'): trajectories = [] states = None actions = None linear_velocity_scaler = 0.5 angular_velocity_scaler = 1.5707963267948966 if isinstance(filenames, str): filenames = [filenames] t = 0 re_spin = re.compile(r"^([0-9]+): spin.*: ([0-9\-\.]+)") re_action = re.compile( r"^([0-9]+): .* POS ([0-9\-\.]+) ([0-9\-\.]+) --.*\] ([0-9\-\.]+) besti" ) re_sys = re.compile( r"\[real\]\[info\] action step: ([0-9]+) \| output: \[ *([0-9\-\.]+) +([0-9\-\.]+) *\]" ) re_sym_action1 = re.compile( r"^([0-9]+): .* POS ([0-9\-\.]+) ([0-9\-\.]+) --.*\] ([0-9\-\.]+) besti.* \| \[([0-9\-\.]+) ([0-9\-\.]+)\]" ) re_sym_action2 = re.compile( r"^([0-9]+): delay[0-9]+ ([0-9\-\.]+) ([0-9\-\.]+) ([0-9\-\.]+) act.* act ([0-9\-\.]+) ([0-9\-\.]+)" ) # extract the real action re_sym_action3 = re.compile( r"^([0-9]+): delay[0-9]+ ([0-9\-\.]+) ([0-9\-\.]+) ([0-9\-\.]+) act ([0-9\-\.]+) ([0-9\-\.]+)" ) # extract the intended action for filename in filenames: with open(filename, 'r') as file: for line_i in range(100000): line = file.readline() # Reset m_reset = re.match(r"^Resetting agent", line) if m_reset: if states is not None: trajectories.append((states[:t + 1], actions[:t + 1])) states = np.ones((500, 3), np.float) * np.nan actions = np.ones((500, 2), np.float) * np.nan t = 0 # Steps if source == 'sim': m_sym_action = re_sym_action3.match( line) # intended action if m_sym_action is not None: t, x, y, yaw, act_fwd, act_rot = m_sym_action.groups() t = int(t) assert np.all(np.isnan(states[t])) states[t] = [ float(x), float(y), np.deg2rad(float(yaw)) ] assert np.all(np.isnan(actions[t])) actions[t] = (float(act_fwd), float(act_rot)) # print(line, states[t]) else: m_spin = re_spin.match(line) m_action = re_action.match(line) m_sys = re_sys.match(line) m_sym_action = re_sym_action1.match(line) if m_spin is not None: t, yaw = m_spin.groups() t = int(t) assert np.all(np.isnan(states[t])) states[t, 2] = float(yaw) print(line, states[t]) if m_sym_action is not None: t, x, y, yaw, act_fwd, act_rot = m_sym_action.groups() t = int(t) assert np.all(np.isnan(states[t])) states[t] = [ float(x), float(y), np.deg2rad(float(yaw)) ] assert np.all(np.isnan(actions[t])) actions[t] = (float(act_fwd), float(act_rot)) # print(line, states[t]) else: if m_action is not None: t, x, y, yaw = m_action.groups() t = int(t) assert np.all(np.isnan(states[t])) states[t] = [ float(x), float(y), np.deg2rad(float(yaw)) ] print(line, states[t]) if m_sys is not None: assert m_spin is None and m_action is None t, act_fwd, act_rot = m_sys.groups() t = int(t) - 1 assert np.all(np.isnan(actions[t])) actions[t] = (float(act_fwd), float(act_rot)) print(line, actions[t]) print("done") clean_trajectories = [] for states, actions in trajectories: if np.all(np.isnan(states)): continue if len(states) < 2: continue # traj = np.concatenate([states, actions], axis=-1) lin_vel = np.linalg.norm(states[1:, :2] - states[:-1, :2], axis=-1) ang_vel = states[1:, 2] - states[:-1, 2] ang_vel = (ang_vel + np.pi) % (2 * np.pi) - np.pi act_fwd = actions[:, 0] # * linear_velocity_scaler * 0.1 act_rot = actions[:, 1] # * angular_velocity_scaler * 0.1 traj = dotdict( dict( x=states[:, 0], y=states[:, 1], yaw=states[:, 2], lin_vel=lin_vel, ang_vel=ang_vel, act_fwd=act_fwd, act_rot=act_rot, trajlen=len(states), )) clean_trajectories.append(traj) # print (traj) # time_delay = 2 # action_fwd_rescaler = 0.3 # action_rot_rescaler = 0.5 errors = [] scalers = [] for time_delay in range(5): lin_vel_list = [] act_fwd_list = [] ang_vel_list = [] act_rot_list = [] for traj in clean_trajectories: valid_part_start = np.min(np.flatnonzero(np.isfinite( traj.lin_vel))) # act_t is the reference velocity at t+delay. For one step prediction, we want pairs act[i], vel[i+delay] lin_vel = traj.lin_vel[valid_part_start + time_delay:] act_fwd = traj.act_fwd[valid_part_start:traj.act_fwd.shape[0] - time_delay - 1] ang_vel = traj.ang_vel[valid_part_start + time_delay:] act_rot = traj.act_rot[valid_part_start:traj.act_rot.shape[0] - time_delay - 1] assert len(lin_vel) == len(act_fwd) assert len(ang_vel) == len(act_rot) assert len(ang_vel) == len(lin_vel) lin_vel_list.append(lin_vel) act_fwd_list.append(act_fwd) ang_vel_list.append(ang_vel) act_rot_list.append(act_rot) lin_vel = np.concatenate(lin_vel_list) act_fwd = np.concatenate(act_fwd_list) ang_vel = np.concatenate(ang_vel_list) act_rot = np.concatenate(act_rot_list) pred_func, sc, err = sysid(lin_vel, ang_vel, act_fwd, act_rot, model=model) scalers.append(sc) errors.append(err) ang_error_scaler = np.mean(np.abs(lin_vel)) / np.mean(np.abs(ang_vel)) errors = errors * np.array([1., np.square(ang_error_scaler)])[None] time_delay = np.argmin(np.sum(errors, axis=-1)) print(errors) print(time_delay, scalers[time_delay], errors[time_delay]) # (3, (0.6540917264778654, 0.07866921965909571), array([42.36897857, 36.7125886])) best_scalers = scalers[ time_delay] # 0.6540917264778654, 0.07866921965909571 fwd_scaler, rot_scaler = 0.6540917264778654, 0.07866921965909571 # plots one-step predictions with time-delay plt.close('all') for traj in clean_trajectories[:10]: # act_t is the reference velocity at t+delay. we want pairs act[i], vel[i+delay] act_fwd = np.pad(traj.act_fwd, [[time_delay, 0]], 'constant') # add zeros to beginning act_rot = np.pad(traj.act_rot, [[time_delay, 0]], 'constant') act_fwd = act_fwd[:-time_delay - 1] # drop last actions, we have not seen their effect since the episode terminated act_rot = act_rot[:-time_delay - 1] # lin_vel_tmo[t] = vel[t-1] lin_vel_tmo = np.pad(traj.lin_vel[:-1], [ [1, 0], ], 'constant') ang_vel_tmo = np.pad(traj.ang_vel[:-1], [ [1, 0], ], 'constant') pred_xy, pred_yaw = pred_func(best_scalers, lin_vel_tmo, act_fwd, ang_vel_tmo, act_rot) plt.figure() plt.plot(np.arange(len(traj.lin_vel)), np.zeros_like(traj.lin_vel), color='black', marker='', linestyle='-', linewidth=1) plt.plot(np.arange(len(traj.lin_vel)), traj.lin_vel, color='blue', marker='.', linestyle='-', linewidth=1.2) plt.plot(np.arange(len(pred_xy)), pred_xy, color='green', marker='.', linestyle='-', linewidth=1.2) plt.plot(np.arange(len(act_fwd)), act_fwd * fwd_scaler, color='red', marker='.', linestyle='-', linewidth=1.2) plt.ylim([-0.1, 1.5]) plt.figure() plt.plot(np.arange(len(traj.ang_vel)), np.zeros_like(traj.ang_vel), color='black', marker='', linestyle='-', linewidth=1) plt.plot(np.arange(len(traj.ang_vel)), np.rad2deg(traj.ang_vel), color='blue', marker='.', linestyle='-', linewidth=1.2) plt.plot(np.arange(len(pred_yaw)), np.rad2deg(pred_yaw), color='green', marker='.', linestyle='-', linewidth=1.2) plt.plot(np.arange(len(act_rot)), np.rad2deg(act_rot * rot_scaler), color='red', marker='.', linestyle='-', linewidth=1.2) plt.ylim([-10, 10]) plt.show() pdb.set_trace()
def __init__( self, model_path, model_param_path, update_freq, filter_tuning, imu_calib: Optional[ImuCalib] = None, force_cpu=False, ): config_from_network = dotdict({}) with open(model_param_path) as json_file: data_json = json.load(json_file) config_from_network["imu_freq_net"] = data_json["imu_freq"] config_from_network["past_time"] = data_json["past_time"] config_from_network["window_time"] = data_json["window_time"] config_from_network["arch"] = data_json["arch"] # frequencies and sizes conversion if not (config_from_network.past_time * config_from_network.imu_freq_net).is_integer(): raise ValueError( "past_time cannot be represented by integer number of IMU data." ) if not (config_from_network.window_time * config_from_network.imu_freq_net).is_integer(): raise ValueError( "window_time cannot be represented by integer number of IMU data." ) self.imu_freq_net = (config_from_network.imu_freq_net ) # imu frequency as input to the network self.past_data_size = int(config_from_network.past_time * config_from_network.imu_freq_net) self.disp_window_size = int(config_from_network.window_time * config_from_network.imu_freq_net) self.net_input_size = self.disp_window_size + self.past_data_size # EXAMPLE : # if using 200 samples with step size 10, inference at 20 hz # we do update between clone separated by 19=update_distance_num_clone-1 other clone # if using 400 samples with 200 past data and clone_every_n_netimu_sample 10, inference at 20 hz # we do update between clone separated by 19=update_distance_num_clone-1 other clone if not (config_from_network.imu_freq_net / update_freq).is_integer(): raise ValueError("update_freq must be divisible by imu_freq_net.") if not (config_from_network.window_time * update_freq).is_integer(): raise ValueError( "window_time cannot be represented by integer number of updates." ) self.update_freq = update_freq self.clone_every_n_netimu_sample = int( config_from_network.imu_freq_net / update_freq) # network inference/filter update interval assert (config_from_network.imu_freq_net % update_freq == 0 ) # imu frequency must be a multiple of update frequency self.update_distance_num_clone = int(config_from_network.window_time * update_freq) # time self.dt_interp_us = int(1.0 / self.imu_freq_net * 1e6) self.dt_update_us = int(1.0 / self.update_freq * 1e6) # multiple of interpolation interval # logging logging.info( f"Network Input Time: {config_from_network.past_time + config_from_network.window_time} = {config_from_network.past_time} + {config_from_network.window_time} (s)" ) logging.info( f"Network Input size: {self.net_input_size} = {self.past_data_size} + {self.disp_window_size} (samples)" ) logging.info("IMU interpolation frequency: %s (Hz)" % self.imu_freq_net) logging.info("Measurement update frequency: %s (Hz)" % self.update_freq) logging.info("Filter update stride state number: %i" % self.update_distance_num_clone) logging.info( f"Interpolating IMU measurement every {self.dt_interp_us}us for the network input" ) # IMU initial calibration self.icalib = imu_calib # MSCKF self.filter = ImuMSCKF(filter_tuning) net_config = { "in_dim": (self.past_data_size + self.disp_window_size) // 32 + 1 } self.meas_source = MeasSourceNetwork(model_path, config_from_network["arch"], net_config, force_cpu) # self.meas_source = MeasSourceTorchScript(model_path, force_cpu) self.imu_buffer = ImuBuffer() # This callback is called at first update if set self.callback_first_update = None # This callback can be use to bypass network use for measurement self.debug_callback_get_meas = None # keep track of past timestamp and measurement self.last_t_us, self.last_acc, self.last_gyr = -1, None, None self.next_interp_t_us = None self.next_aug_t_us = None self.has_done_first_update = False
""" path, filename = os.path.split(fullpath) filename, ext = os.path.splitext(filename) sys.path.insert(0, path) module = importlib.import_module(filename, path) #importlib.reload(module) # Might be out of date del sys.path[0] return module userSettings = import_path(os.path.join('.', 'settings')) symbolSettings = None symbol = sys.argv[1] if len(sys.argv) > 1 else None if symbol: print("Importing symbol settings for %s..." % symbol) try: symbolSettings = import_path(os.path.join('..', 'settings-%s' % symbol)) except Exception as e: print("Unable to find settings-%s.py." % symbol) # Assemble settings. settings = {} settings.update(vars(baseSettings)) settings.update(vars(userSettings)) if symbolSettings: settings.update(vars(symbolSettings)) # Main export settings = dotdict.dotdict(settings)
def __init__(self, args, dataset): # initialize data IO self.input = DataIO() self.input.load_all(dataset, args) self.input.load_vio(dataset, args) # log file initialization outdir = os.path.join(args.out_dir, dataset) if os.path.exists(outdir) is False: os.mkdir(outdir) outfile = os.path.join(outdir, args.out_filename) if os.path.exists(outfile): if not args.erase_old_log: logging.warning(f"{outfile} already exists, skipping") raise FileExistsError else: os.remove(outfile) logging.warning("previous log file erased") self.outfile = os.path.join(outdir, args.out_filename) self.f_state = open(outfile, "w") self.f_debug = open(os.path.join(outdir, "debug.txt"), "w") logging.info(f"writing to {outfile}") imu_calib = ImuCalib.from_attitude_file(dataset, args) filter_tuning = dotdict({ "g_norm": args.g_norm, "sigma_na": args.sigma_na, "sigma_ng": args.sigma_ng, "ita_ba": args.ita_ba, "ita_bg": args.ita_bg, "init_attitude_sigma": args.init_attitude_sigma, # rad "init_yaw_sigma": args.init_yaw_sigma, # rad "init_vel_sigma": args.init_vel_sigma, # m/s "init_pos_sigma": args.init_pos_sigma, # m "init_bg_sigma": args.init_bg_sigma, # rad/s "init_ba_sigma": args.init_ba_sigma, # m/s^2 "meascov_scale": args.meascov_scale, "use_const_cov": args.use_const_cov, "const_cov_val_x": args.const_cov_val_x, # sigma^2 "const_cov_val_y": args.const_cov_val_y, # sigma^2 "const_cov_val_z": args.const_cov_val_z, # sigma^2 "add_sim_meas_noise": args.add_sim_meas_noise, "sim_meas_cov_val": args.sim_meas_cov_val, "sim_meas_cov_val_z": args.sim_meas_cov_val_z, "mahalanobis_fail_scale": args.mahalanobis_fail_scale, }) # ImuTracker object self.tracker = ImuTracker( model_path=args.model_path, model_param_path=args.model_param_path, update_freq=args.update_freq, filter_tuning=filter_tuning, imu_calib=imu_calib, ) # output self.log_output_buffer = None
from utils.PrintBoard import PrintBoard BLACK = -2 WHITE = 2 EMPTY = 0 ARROW = 1 # 训练模式的参数 args = dotdict({ 'num_iter': 10, # 神经网络训练次数 'num_play_game': 20, # 下“num_play_game”盘棋训练一次NNet 'max_len_queue': 200000, # 双向列表最大长度 'num_mcts_search': 5, # 从某状态模拟搜索到叶结点次数 'max_batch_size': 20, # NNet每次训练的最大数据量 'Cpuct': 1, # 置信上限函数中的“温度”超参数 'arenaCompare': 40, 'tempThreshold': 35, # 探索效率 'updateThreshold': 0.55, # 新旧网络更新阈值 'checkpoint': './temp/', 'load_model': False, 'load_folder_file': ('/models/', 'best.pth.tar'), }) class TrainMode: """ 自博弈类 """ def __init__(self, game, nnet):