Beispiel #1
0
def train(args):
    if torch.cuda.is_available():
        device = torch.device("cuda")
        torch.cuda.set_device(args.cuda)
    else:
        device = torch.device("cpu")

    if args.net == "pnet":
        model = PNet(device)
    elif args.net == "rnet":
        model = RNet()
    elif args.net == "onet":
        model = ONet()
    else:
        raise Exception("Net Type Error!")

    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), args.lr, args.momentum)

    transformed_data = WIDER_Dataset(
        data_path, anno_filename,
        transforms.Compose([Resize((12, 12)),
                            Normalize(),
                            To_Tensor()]))
    trainloader = DataLoader(transformed_data,
                             batch_size=1,
                             shuffle=True,
                             collate_fn=transformed_data.collate_fn,
                             num_workers=4,
                             pin_memory=True)

    #model.to(device=device)
    for epoch in range(args.epoch):
        model.train()
        for i_batch, (images, boxes) in enumerate(trainloader):
            images.type(torch.DoubleTensor)
            images.to(device=device)
            boxes[0].to(device=device, dtype=torch.float)

            output = model(images)
            ptint(output.cpu())
Beispiel #2
0
def main(_):
    device = get_device(FLAGS.use_gpu)
    print("Use device: {}".format(device))

    # モデル保存用フォルダ生成
    data_dir = FLAGS.data_dir
    create_directory(data_dir)
    create_directory(os.path.join(data_dir, "models"))

    env = gym.make(FLAGS.env)

    p_net = PNet(env.observation_space, env.action_space, FLAGS.hid_num)
    v_net = VNet(env.observation_space, FLAGS.hid_num)
    print(p_net)
    print(v_net)
    p_net.to(device)
    v_net.to(device)
    optim_p = ralamb.Ralamb(p_net.parameters(),
                            lr=FLAGS.lr,
                            weight_decay=FLAGS.weight_decay)
    optim_v = ralamb.Ralamb(v_net.parameters(),
                            lr=FLAGS.lr,
                            weight_decay=FLAGS.weight_decay)
    agent = Agent(p_net, v_net, optim_p, optim_v, device)

    if FLAGS.use_discrim:
        expert_filename = os.path.join(FLAGS.data_dir, "expert_data",
                                       "taxi_expert.pkl")
        print("Load expert data: ", expert_filename)
        with open(expert_filename, "rb") as f:
            expert_traj = Trajectory()
            expert_epis = pickle.load(f)
            for epi in expert_epis:
                epi["next_obs"] = np.append(epi["obs"][1:], epi["obs"][0])
                expert_traj.append(epi)
            expert_traj.to_tensor(device)

        pseudo_rew_net = VNet(env.observation_space, FLAGS.hid_num)
        shaping_val_net = VNet(env.observation_space, FLAGS.hid_num)
        print(pseudo_rew_net)
        print(shaping_val_net)
        pseudo_rew_net.to(device)
        shaping_val_net.to(device)
        optim_discrim = ralamb.Ralamb(
            list(pseudo_rew_net.parameters()) +
            list(shaping_val_net.parameters()),
            lr=FLAGS.lr,
            weight_decay=FLAGS.weight_decay,
        )
        discrim = Discriminator(pseudo_rew_net, shaping_val_net, optim_discrim,
                                device)
    else:
        discrim = None
        expert_traj = None

    # モデルの途中状態を読込む
    max_rew = -1e6
    model_filename_base = os.path.join(
        FLAGS.data_dir, "models",
        "model_" + FLAGS.env + "_PPO_H" + str(FLAGS.hid_num))
    discrim_filename_base = None
    if FLAGS.resume:
        print("Load last model")
        load_info = agent.load_model(model_filename_base, "last")
        if load_info:
            max_rew = load_info["max_rew"]
            print("Max reward: {0}".format(max_rew))
        else:
            print("Model file not found")

        if FLAGS.use_discrim:
            discrim_filename_base = os.path.join(
                FLAGS.data_dir, "models",
                "discrim_" + FLAGS.env + "_AIRL_H" + str(FLAGS.hid_num))
            discrim.load_model(discrim_filename_base, "last")

    train(env, agent, max_rew, model_filename_base, device, discrim,
          discrim_filename_base, expert_traj)
    test(env, agent, device)