def train(args): if torch.cuda.is_available(): device = torch.device("cuda") torch.cuda.set_device(args.cuda) else: device = torch.device("cpu") if args.net == "pnet": model = PNet(device) elif args.net == "rnet": model = RNet() elif args.net == "onet": model = ONet() else: raise Exception("Net Type Error!") loss_func = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), args.lr, args.momentum) transformed_data = WIDER_Dataset( data_path, anno_filename, transforms.Compose([Resize((12, 12)), Normalize(), To_Tensor()])) trainloader = DataLoader(transformed_data, batch_size=1, shuffle=True, collate_fn=transformed_data.collate_fn, num_workers=4, pin_memory=True) #model.to(device=device) for epoch in range(args.epoch): model.train() for i_batch, (images, boxes) in enumerate(trainloader): images.type(torch.DoubleTensor) images.to(device=device) boxes[0].to(device=device, dtype=torch.float) output = model(images) ptint(output.cpu())
def main(_): device = get_device(FLAGS.use_gpu) print("Use device: {}".format(device)) # モデル保存用フォルダ生成 data_dir = FLAGS.data_dir create_directory(data_dir) create_directory(os.path.join(data_dir, "models")) env = gym.make(FLAGS.env) p_net = PNet(env.observation_space, env.action_space, FLAGS.hid_num) v_net = VNet(env.observation_space, FLAGS.hid_num) print(p_net) print(v_net) p_net.to(device) v_net.to(device) optim_p = ralamb.Ralamb(p_net.parameters(), lr=FLAGS.lr, weight_decay=FLAGS.weight_decay) optim_v = ralamb.Ralamb(v_net.parameters(), lr=FLAGS.lr, weight_decay=FLAGS.weight_decay) agent = Agent(p_net, v_net, optim_p, optim_v, device) if FLAGS.use_discrim: expert_filename = os.path.join(FLAGS.data_dir, "expert_data", "taxi_expert.pkl") print("Load expert data: ", expert_filename) with open(expert_filename, "rb") as f: expert_traj = Trajectory() expert_epis = pickle.load(f) for epi in expert_epis: epi["next_obs"] = np.append(epi["obs"][1:], epi["obs"][0]) expert_traj.append(epi) expert_traj.to_tensor(device) pseudo_rew_net = VNet(env.observation_space, FLAGS.hid_num) shaping_val_net = VNet(env.observation_space, FLAGS.hid_num) print(pseudo_rew_net) print(shaping_val_net) pseudo_rew_net.to(device) shaping_val_net.to(device) optim_discrim = ralamb.Ralamb( list(pseudo_rew_net.parameters()) + list(shaping_val_net.parameters()), lr=FLAGS.lr, weight_decay=FLAGS.weight_decay, ) discrim = Discriminator(pseudo_rew_net, shaping_val_net, optim_discrim, device) else: discrim = None expert_traj = None # モデルの途中状態を読込む max_rew = -1e6 model_filename_base = os.path.join( FLAGS.data_dir, "models", "model_" + FLAGS.env + "_PPO_H" + str(FLAGS.hid_num)) discrim_filename_base = None if FLAGS.resume: print("Load last model") load_info = agent.load_model(model_filename_base, "last") if load_info: max_rew = load_info["max_rew"] print("Max reward: {0}".format(max_rew)) else: print("Model file not found") if FLAGS.use_discrim: discrim_filename_base = os.path.join( FLAGS.data_dir, "models", "discrim_" + FLAGS.env + "_AIRL_H" + str(FLAGS.hid_num)) discrim.load_model(discrim_filename_base, "last") train(env, agent, max_rew, model_filename_base, device, discrim, discrim_filename_base, expert_traj) test(env, agent, device)