for i in range(args.total_classes)
]

# Declaring train and test datasets
train_set = None
test_set = iDataset(args,
                    mean_image,
                    data_generators=[],
                    max_data_size=max_test_data_size,
                    job='test')

# Conditional variable, shared memory for synchronization
cond_var = mp.Condition()
train_counter = mp.Value("i", 0)
test_counter = mp.Value("i", 0)
dataQueue = mp.Queue()
all_done = mp.Event()
data_mgr = mp.Manager()
expanded_classes = data_mgr.list([None for i in range(args.test_freq)])

if args.resume:
    print("resuming model from %s-model.pth.tar" %
          os.path.splitext(args.outfile)[0])

    model = torch.load("%s-model.pth.tar" % os.path.splitext(args.outfile)[0],
                       map_location=lambda storage, loc: storage)
    model.device = train_device

    model.exemplar_means = []
    model.compute_means = True
Exemple #2
0
def main_eval(args, create_shared_model, init_agent):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    random.seed(args.seed)

    if args.gpu_ids == -1:
        args.gpu_ids = [-1]
    else:
        torch.cuda.manual_seed(args.seed)
        try:
            mp.set_start_method("spawn")
        except RuntimeError:
            pass

    model_to_open = args.load_model

    processes = []

    res_queue = mp.Queue()
    if args.model == "BaseModel" or args.model == "GCN":
        args.learned_loss = False
        args.num_steps = 50
        target = nonadaptivea3c_val
    else:
        args.learned_loss = True
        args.num_steps = 6
        target = savn_val

    rank = 0
    args.scene_types = ['living_room']
    for scene_type in args.scene_types:
        p = mp.Process(
            target=target,
            args=(
                rank,
                args,
                model_to_open,
                create_shared_model,
                init_agent,
                res_queue,
                250,
                scene_type,
            ),
        )
        p.start()
        processes.append(p)
        time.sleep(0.1)
        rank += 1

    count = 0
    end_count = 0
    train_scalars = ScalarMeanTracker()

    proc = len(args.scene_types)
    pbar = tqdm(total=250 * proc)

    try:
        while end_count < proc:
            train_result = res_queue.get()
            pbar.update(1)
            count += 1
            if "END" in train_result:
                end_count += 1
                continue
            train_scalars.add_scalars(train_result)

        tracked_means = train_scalars.pop_and_reset()

    finally:
        for p in processes:
            time.sleep(0.1)
            p.join()

    with open(args.results_json, "w") as fp:
        json.dump(tracked_means, fp, sort_keys=True, indent=4)
Exemple #3
0
                self.gnet.save_trans((s, a, r, s_next, done_flag))

                s = s_next
                if total_step % UPDATE_GLOBAL_ITER == 0 or done:
                    self.gnet.train_net(gamma=self.gamma, ac_net=self.lnet)
                    if done:
                        self.res_queue.put(score)
                        Total_epoch += 1
                        print("Epoch:{}   score:{}".format(Total_epoch, score))
                        break
            self.res_queue.put(None)

if __name__ == "__main__":
    gnet = Net(N_S, N_A)
    gnet.share_memory()
    global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue()

    # parallel training
    workers = [Worker(gnet,None, global_ep, global_ep_r, res_queue, i, gamma=GAMMA) for i in range(mp.cpu_count())]
    [w.start() for w in workers]
    res = []
    while True:
        r = res_queue.get()
        if r is not None:
            res.append(r)
        else:
            break
    [w.join() for w in workers]

    import matplotlib.pyplot as plt
    plt.plot(res)
Exemple #4
0
                shared_state["targetQ"].load_state_dict(targetQ.state_dict())
                for i in range(max_id):
                    shared_state["update"][i] = True
            if block == False:
                return 0
    except Exception as e:
        print(e)

if __name__ == '__main__':
    os.system('cls')

    vis.close()

    num_processes = 2

    shared_queue = mp.Queue()
    shared_state = dict()

    shared_state["mainQ"] = DQN(s_dim, a_dim, dev).share_memory()
    shared_state["targetQ"] = DQN(s_dim, a_dim, dev).share_memory()

    shared_state["update"] = mp.Array('i', [0 for i in range(num_processes)])
    #    shared_state["wait"] = mp.Array('i', [0 for i in range(num_processes)])
    shared_state["vis"] = mp.Value('i', 0)
    shared_state["wait"] = mp.Value('i', 0)
    shared_state["wait"].value = start_frame // 10

    #    for i in range(100):
    #        actor_process(0,num_frames,shared_state,shared_queue,False)
    #        actor_process(0,num_frames,shared_state,shared_queue,False)
    #        learner_process(1,num_frames,shared_state,shared_queue,False)
Exemple #5
0
        f.write("model: " + model_name + "\n")

    print("using model {}".format(model_name))

    tgt_net = ptan.agent.TargetNet(net)

    tm_net = dqn_model.TMPredict(env.observation_space.spaces['image'].shape,
                                       env.observation_space.spaces['logic'].nvec,
                                       env.action_space.n).to(device)

    buffer = ptan.experience.ExperienceReplayBuffer(experience_source=None, buffer_size=params['replay_size'])
    # optimizer = optim.Adam(net.parameters(), lr=params['learning_rate'])
    optimizer_tm = optim.Adam(tm_net.parameters(), lr=params['learning_rate'])
    optimizer = optim.RMSprop(net.parameters(), lr=params['learning_rate'], momentum=0.95, eps=0.01)

    exp_queue = mp.Queue(maxsize=PLAY_STEPS * 2)

    if args.fsa:
        fsa_nvec = env.observation_space.spaces['logic'].nvec
        logic_dim = int(fsa_nvec.shape[0] / env.observation_space.spaces['image'].shape[0])
        fsa_nvec = fsa_nvec[-logic_dim:]
        play_proc = mp.Process(target=play_func,
                               args=(params, net, args.cuda, args.fsa, exp_queue,
                                     fsa_nvec))
    else:
        play_proc = mp.Process(target=play_func, args=(params, net, args.cuda, args.fsa, exp_queue))

    play_proc.start()

    frame_idx = 0
Exemple #6
0
def main():
    setproctitle.setproctitle("Train/Test Manager")
    args = flag_parser.parse_arguments()

    if args.model == "BaseModel" or args.model == "GCN":
        args.learned_loss = False
        args.num_steps = 50
        target = nonadaptivea3c_val if args.eval else nonadaptivea3c_train
    else:
        args.learned_loss = True
        args.num_steps = 6
        target = savn_val if args.eval else savn_train

    create_shared_model = model_class(args.model)
    init_agent = agent_class(args.agent_type)
    optimizer_type = optimizer_class(args.optimizer)

    if args.eval:
        main_eval(args, create_shared_model, init_agent)
        return

    start_time = time.time()
    local_start_time_str = time.strftime("%Y-%m-%d_%H:%M:%S",
                                         time.localtime(start_time))
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    random.seed(args.seed)

    if args.log_dir is not None:
        tb_log_dir = args.log_dir + "/" + args.title + "-" + local_start_time_str
        log_writer = SummaryWriter(log_dir=tb_log_dir)
    else:
        log_writer = SummaryWriter(comment=args.title)

    if args.gpu_ids == -1:
        args.gpu_ids = [-1]
    else:
        torch.cuda.manual_seed(args.seed)
        mp.set_start_method("spawn")

    shared_model = create_shared_model(args)

    train_total_ep = 0
    n_frames = 0

    if shared_model is not None:
        shared_model.share_memory()
        optimizer = optimizer_type(
            filter(lambda p: p.requires_grad, shared_model.parameters()), args)
        optimizer.share_memory()
        print(shared_model)
    else:
        assert (args.agent_type == "RandomNavigationAgent"
                ), "The model is None but agent is not random agent"
        optimizer = None

    processes = []

    print('Start Loading!')
    optimal_action_path = './data/AI2thor_Combine_Dataset/Optimal_Path_Combine.json'
    with open(optimal_action_path, 'r') as read_file:
        optimal_action_dict = json.load(read_file)
    manager = Manager()
    optimal_action = manager.dict()
    optimal_action.update(optimal_action_dict)
    glove_file_path = './data/AI2thor_Combine_Dataset/det_feature_512_train.hdf5'
    glove_file = hdf5_to_dict(glove_file_path)
    # det_gt_path = './data/AI2thor_Combine_Dataset/Instance_Detection_Combine.pkl'
    # with open(det_gt_path, 'rb') as read_file:
    #     det_gt = pickle.load(read_file)
    print('Loading Success!')

    end_flag = mp.Value(ctypes.c_bool, False)

    train_res_queue = mp.Queue()

    for rank in range(0, args.workers):
        p = mp.Process(
            target=target,
            args=(
                rank,
                args,
                create_shared_model,
                shared_model,
                init_agent,
                optimizer,
                train_res_queue,
                end_flag,
                glove_file,
                optimal_action,
                # det_gt,
            ),
        )
        p.start()
        processes.append(p)
        time.sleep(0.1)

    print("Train agents created.")

    train_thin = args.train_thin
    train_scalars = ScalarMeanTracker()

    # start_ep_time = time.time()

    try:
        while train_total_ep < args.max_ep:

            train_result = train_res_queue.get()
            train_scalars.add_scalars(train_result)
            train_total_ep += 1
            n_frames += train_result["ep_length"]
            # if train_total_ep % 10 == 0:
            #     print(n_frames / train_total_ep)
            #     print((time.time() - start_ep_time) / train_total_ep)
            if (train_total_ep % train_thin) == 0:
                log_writer.add_scalar("n_frames", n_frames, train_total_ep)
                tracked_means = train_scalars.pop_and_reset()
                for k in tracked_means:
                    log_writer.add_scalar(k + "/train", tracked_means[k],
                                          train_total_ep)

            if (train_total_ep % args.ep_save_freq) == 0:

                print(n_frames)
                if not os.path.exists(args.save_model_dir):
                    os.makedirs(args.save_model_dir)
                state_to_save = shared_model.state_dict()
                save_path = os.path.join(
                    args.save_model_dir,
                    "{0}_{1}_{2}_{3}.dat".format(args.title, n_frames,
                                                 train_total_ep,
                                                 local_start_time_str),
                )
                torch.save(state_to_save, save_path)

    finally:
        log_writer.close()
        end_flag.value = True
        for p in processes:
            time.sleep(0.1)
            p.join()
Exemple #7
0
                        record(self.g_ep, self.g_ep_r, ep_r, self.res_queue,
                               self.name)
                        break
                s = s_
                total_step += 1
        self.res_queue.put(None)


if __name__ == "__main__":
    gnet = Net(N_S, N_A)  # global network
    gnet.share_memory()  # share the global parameters in multiprocessing
    opt = SharedAdam(gnet.parameters(), lr=1e-4,
                     betas=(0.92, 0.999))  # global optimizer
    global_ep, global_ep_r, res_queue = mp.Value('i',
                                                 0), mp.Value('d',
                                                              0.), mp.Queue()

    # parallel training
    workers = [
        Worker(gnet, opt, global_ep, global_ep_r, res_queue, i)
        for i in range(mp.cpu_count())
    ]
    [w.start() for w in workers]
    res = []  # record episode reward to plot
    while True:
        r = res_queue.get()
        if r is not None:
            res.append(r)
        else:
            break
    [w.join() for w in workers]
Exemple #8
0
                    if done:  # done and print information
                        record(self.g_ep, self.g_ep_r, ep_r, self.res_queue,
                               self.name, self.gnet, self.global_record)
                        break
                s = s_
                total_step += 1
        self.res_queue.put(None)


if __name__ == "__main__":
    gnet = Net(N_S, N_A)  # global network
    #gnet = torch.load("./data/model.pt")
    gnet.share_memory()  # share the global parameters in multiprocessing
    opt = SharedAdam(gnet.parameters(), lr=0.0001)  # global optimizer
    global_ep, global_ep_r, res_queue, global_record = mp.Value(
        'i', 0), mp.Value('d', 0.), mp.Queue(), mp.Value('d', -100.)

    # parallel training
    #workers = [Worker(gnet, opt, global_ep, global_ep_r, res_queue, i) for i in range(mp.cpu_count())]
    workers = [
        Worker(gnet, opt, global_ep, global_ep_r, res_queue, i, global_record)
        for i in range(6)
    ]
    [w.start() for w in workers]
    res = []  # record episode reward to plot
    while True:
        r = res_queue.get()
        if r is not None:
            res.append(r)
        else:
            break
Exemple #9
0
 def _test_empty_tensor_sharing(self, dtype, device):
     q = mp.Queue()
     empty = torch.tensor([], dtype=dtype, device=device)
     q.put(empty)
     out = q.get(timeout=1)
     self.assertEqual(out, empty)
Exemple #10
0
    for i in range(population_size):
        agent = AACAgent(obs_space.shape[0], act_space.shape[0])
        hparams = Hparams(
            make_int_range(a_param),
            make_int_range(c_param),
            make_int_range(k_param),
            make_float_range(h_param),
            make_float_range(g_param),
        )
        member = Member(i, agent, hparams)
        population.append(member)

    # Moving replay buffer to shared memory allows us to share it among workers without copying.
    replay_buffer.share_memory_()
    # Separate queue is created for each member to ensure correct member is sent to each worker.
    member_queues = {m.id: mp.Queue() for m in population}
    # Queue for sharing collect experiences
    exp_queue = mp.Queue()

    # Events for synchronization. Two different events are used to avoid possible race conditions.
    # Specifically, we need to clear each event before reusing it, but we don't want to clear it too early.
    step_events = (mp.Event(), mp.Event())
    epoch_events = (mp.Event(), mp.Event())
    num_gpus = torch.cuda.device_count()

    # Initialize workers.
    workers = [
        Worker(
            i,
            make_env_function,
            max_episode_steps,
Exemple #11
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    use_cuda = torch.cuda.is_available()

    tcn = create_model(use_cuda)
    tcn = torch.nn.DataParallel(
        tcn, device_ids=(range(torch.cuda.device_count())
                         ))  # Wrapper to distribute load on multiple GPUs
    attribute_classifier = DenseClassifier(num_classes=5).to(
        device)  # load labeling network

    triplet_builder = builder(args.n_views, \
        args.train_directory, args.labels_train_directory, IMAGE_SIZE, args, sample_size=32)

    queue = multiprocessing.Queue(1)
    dataset_builder_process = multiprocessing.Process(target=build_set,
                                                      args=(queue,
                                                            triplet_builder,
                                                            logger),
                                                      daemon=True)
    dataset_builder_process.start()

    optimizer = optim.SGD(list(tcn.parameters()) +
                          list(attribute_classifier.parameters()),
                          lr=args.lr_start,
                          momentum=0.9)
    # This will diminish the learning rate at the milestones.
    # 0.1, 0.01, 0.001
    learning_rate_scheduler = lr_scheduler.MultiStepLR(
        optimizer, milestones=[100, 200, 500], gamma=0.1)

    criterion = nn.CrossEntropyLoss()

    trn_losses_ = []
    val_losses_ = []
    val_acc_margin_ = []
    val_acc_no_margin_ = []

    for epoch in range(args.start_epoch, args.start_epoch + args.epochs):
        losses = []

        print("=" * 20)
        logger.info("Starting epoch: {0} learning rate: {1}".format(
            epoch, learning_rate_scheduler.get_lr()))
        learning_rate_scheduler.step()

        dataset = queue.get()
        data_loader = DataLoader(
            dataset=dataset,
            batch_size=args.
            minibatch_size,  # batch_size(epoch, args.max_minibatch_size),
            shuffle=True,
            pin_memory=use_cuda,
        )
        for _ in range(0, ITERATE_OVER_TRIPLETS):

            for i, minibatch in enumerate(data_loader):

                frames = minibatch[0]
                captions = minibatch[1]
                if use_cuda:
                    frames = frames.cuda()
                    captions = captions.to(device)
                print(captions)
                print(len(data_loader))
                anchor_frames = frames[:, 0, :, :, :]
                positive_frames = frames[:, 1, :, :, :]
                negative_frames = frames[:, 2, :, :, :]
                anchor_output, unnormalized, mixed = tcn(anchor_frames)
                positive_output, _, _ = tcn(positive_frames)
                negative_output, _, _ = tcn(negative_frames)

                d_positive = distance(anchor_output, positive_output)
                d_negative = distance(anchor_output, negative_output)
                # features = encoder(anchor_frames)
                loss_triplet = torch.clamp(args.margin + d_positive -
                                           d_negative,
                                           min=0.0).mean()

                label_outputs_1, label_outputs_2 = attribute_classifier(mixed)
                labels_1 = captions[:, 0]
                # labels_2 = captions[:, 1]
                loss_1 = criterion(label_outputs_1, labels_1)
                # loss_2 = criterion(label_outputs_2, labels_2)
                loss_language = loss_1  #+ loss_2

                # loss = loss_triplet + args.alpha * loss_language
                loss = loss_language
                # loss = loss_triplet
                losses.append(loss.data.cpu().numpy())

                tcn.zero_grad()
                attribute_classifier.zero_grad()
                loss.backward()
                optimizer.step()
        trn_losses_.append(np.mean(losses))
        logger.info('train loss: ', np.mean(losses))

        if epoch % 1 == 0:
            acc_margin, acc_no_margin, loss = validate(tcn,
                                                       attribute_classifier,
                                                       criterion, use_cuda,
                                                       args)
            val_losses_.append(loss)
            val_acc_margin_.append(acc_margin)
            val_acc_no_margin_.append(acc_no_margin)

        if epoch % args.save_every == 0 and epoch != 0:
            logger.info('Saving model.')
            save_model(tcn, model_filename(args.model_name, epoch),
                       args.model_folder)
        plot_mean(trn_losses_, args.model_folder, 'train_loss')
        plot_mean(val_losses_, args.model_folder, 'validation_loss')
        # plot_mean(train_acc_, args.model_folder, 'train_acc')
        plot_mean(val_acc_margin_, args.model_folder,
                  'validation_accuracy_margin')
        plot_mean(val_acc_no_margin_, args.model_folder,
                  'validation_accuracy_no_margin')
Exemple #12
0
        gamma=args.lr_critic_gamma)

    # training log
    logger = Logger.Logger(path=os.path.join(path_results_folder,
                                             "training.csv"),
                           column_names=[
                               "time", "iterations", "episodes",
                               "training reward", "validation reward",
                               "validation reward std",
                               "validation reward min",
                               "validation reward max", "loss policy",
                               "loss critic", "sigma"
                           ])

    # create trainer workers
    rollouts = mp.Queue()
    flag_close = mp.Value("i", 0)
    l1_locks = [mp.Lock() for i in range(args.num_workers)]
    l2_locks = [mp.Lock() for i in range(args.num_workers)]

    for lock in l1_locks:
        lock.acquire()

    processes = []
    for i in range(args.num_workers):
        print("(main) creating worker process number %d" % i)
        p = mp.Process(target=a2c_worker,
                       args=(i, l1_locks[i], l2_locks[i], flag_close, rollouts,
                             net_policy, net_critic, args))
        p.start()
        processes.append(p)
Exemple #13
0
def main():
    print('Starting.')

    setproctitle.setproctitle('A3C Manager')
    args = flag_parser.parse_arguments()

    create_shared_model = model.Model
    init_agent = agent.A3CAgent
    optimizer_type = optimizer_class(args.optimizer)

    start_time = time.time()
    local_start_time_str = \
        time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime(start_time))

    # Seed sources of randomness.
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    random.seed(args.seed)

    if args.enable_logging:
        from tensorboardX import SummaryWriter
        log_dir =  'runs/' + args.title + '-' + local_start_time_str
        log_writer = SummaryWriter(log_dir=log_dir)

    if args.gpu_ids == -1:
        args.gpu_ids = [-1]
    else:
        torch.cuda.manual_seed(args.seed)
        mp.set_start_method('spawn', force=True)

    print('=> Creating the shared model and optimizer.')
    shared_model = create_shared_model(args)

    shared_model.share_memory()
    optimizer = optimizer_type(
        filter(lambda p: p.requires_grad, shared_model.parameters()), 
        args)
    optimizer.share_memory()

    if (args.resume):
        shared_model.load_state_dict(torch.load('./models/last_model'))
    elif (args.load_model!=''):
        shared_model.load_state_dict(torch.load(args.load_model))


    print('=> Creating the agents.')
    processes = []

    end_flag = mp.Value(ctypes.c_bool, False)

    train_res_queue = mp.Queue()
    for rank in range(0, args.workers):
        p = mp.Process(target=train.train, args=(
            rank, args, create_shared_model, 
            shared_model, init_agent,
            optimizer, train_res_queue, end_flag))
        p.start()
        processes.append(p)
        print('* Agent created.')
        time.sleep(0.1)

    train_total_ep = 0
    n_frames = 0

    train_thin = args.train_thin
    train_scalars = ScalarMeanTracker()

    success_tracker = []
    
    try:
        while train_total_ep < args.num_train_episodes:
            train_result = train_res_queue.get()
            train_scalars.add_scalars(train_result)
            train_total_ep += 1
            n_frames += train_result["ep_length"]
            if train_total_ep % 100 == 0:
                torch.save(shared_model.state_dict(), './models/model_{}'.format(train_total_ep))
            if args.enable_logging and train_total_ep % train_thin == 0:
                log_writer.add_scalar("n_frames", n_frames, train_total_ep)
                tracked_means = train_scalars.pop_and_reset()
                for k in tracked_means:
                    log_writer.add_scalar(
                        k + "/train", tracked_means[k], train_total_ep
                    )
            success_tracker.append(train_result["success"])
            if len(success_tracker) > 100:
                success_tracker.pop(0)
            if len(success_tracker) >= 100 and sum(success_tracker) / len(success_tracker) > args.train_threshold:
                break
    finally:
        if args.enable_logging:
            log_writer.close()
        end_flag.value = True
        for p in processes:
            time.sleep(0.1)
            p.join()

    torch.save(shared_model.state_dict(), './models/last_model')
Exemple #14
0
    def __init__(self,
                 input_source,
                 detector,
                 cfg,
                 opt,
                 mode='image',
                 batchSize=1,
                 queueSize=128):
        self.cfg = cfg
        self.opt = opt
        self.mode = mode
        self.device = opt.device

        if mode == 'image':
            self.img_dir = opt.inputpath
            self.imglist = [
                os.path.join(self.img_dir,
                             im_name.rstrip('\n').rstrip('\r'))
                for im_name in input_source
            ]
            self.datalen = len(input_source)
        elif mode == 'video':
            stream = cv2.VideoCapture(input_source)
            assert stream.isOpened(), 'Cannot capture source'
            self.path = input_source
            self.datalen = int(stream.get(cv2.CAP_PROP_FRAME_COUNT))
            self.fourcc = int(stream.get(cv2.CAP_PROP_FOURCC))
            self.fps = stream.get(cv2.CAP_PROP_FPS)
            self.frameSize = (int(stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
                              int(stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
            self.videoinfo = {
                'fourcc': self.fourcc,
                'fps': self.fps,
                'frameSize': self.frameSize
            }
            stream.release()

        self.detector = detector
        self.batchSize = batchSize
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover

        self._input_size = cfg.DATA_PRESET.IMAGE_SIZE
        self._output_size = cfg.DATA_PRESET.HEATMAP_SIZE

        self._sigma = cfg.DATA_PRESET.SIGMA

        if cfg.DATA_PRESET.TYPE == 'simple':
            self.transformation = SimpleTransform(
                self,
                scale_factor=0,
                input_size=self._input_size,
                output_size=self._output_size,
                rot=0,
                sigma=self._sigma,
                train=False,
                add_dpg=False,
                gpu_device=self.device)

        # initialize the queue used to store data
        """
        image_queue: the buffer storing pre-processed images for object detection
        det_queue: the buffer storing human detection results
        pose_queue: the buffer storing post-processed cropped human image for pose estimation
        """
        if opt.sp:
            self._stopped = False
            self.image_queue = Queue(maxsize=queueSize)
            self.det_queue = Queue(maxsize=10 * queueSize)
            self.pose_queue = Queue(maxsize=10 * queueSize)
        else:
            self._stopped = mp.Value('b', False)
            self.image_queue = mp.Queue(maxsize=queueSize)
            self.det_queue = mp.Queue(maxsize=10 * queueSize)
            self.pose_queue = mp.Queue(maxsize=10 * queueSize)
Exemple #15
0
def test(cfg):
    if cfg.ckpt is not None:
        if not os.path.exists(cfg.ckpt):
            print('Invalid ckpt path:', cfg.ckpt)
            exit(1)
        ckpt = torch.load(cfg.ckpt, map_location=lambda storage, loc: storage)
        print(cfg.ckpt, 'loaded')
        loaded_cfg = ckpt['cfg'].__dict__

        del loaded_cfg['num_workers']
        del loaded_cfg['test_set']
        del loaded_cfg['log_dir']
        del loaded_cfg['prediction_file']
        del loaded_cfg['num_episodes']
        del loaded_cfg['use_pretrain']
        del loaded_cfg['memory_num']
        del loaded_cfg['memory_len']
        del loaded_cfg['prepro_dir']
        del loaded_cfg['debug']

        cfg.__dict__.update(loaded_cfg)
        cfg.model = cfg.model.upper()

        print('Merged Config')
        pprint(cfg.__dict__)

        os.makedirs(cfg.log_dir)

        model = create_a3c_model(cfg)
        model.load_state_dict(ckpt['model'])
    else:
        os.makedirs(cfg.log_dir)
        model = create_a3c_model(cfg)

        print(
            "LOAD pretrain parameter for BERT from ./pretrain/pytorch_model.bin..."
        )
        pretrain_param = torch.load('./pretrain/pytorch_model.bin',
                                    map_location=lambda storage, loc: storage)
        missing_keys = []
        unexpected_keys = []
        error_msgs = []
        new_pretrain_param = pretrain_param.copy()
        for k, v in pretrain_param.items():
            new_key = 'model.' + k
            new_pretrain_param[new_key] = v
            del new_pretrain_param[k]
        pretrain_param = new_pretrain_param.copy()

        metadata = getattr(pretrain_param, '_metadata', None)
        if metadata is not None:
            pretrain_param._metadata = metadata

        def load(module, prefix=''):
            local_metadata = {} if metadata is None else metadata.get(
                prefix[:-1], {})
            module._load_from_state_dict(pretrain_param, prefix,
                                         local_metadata, True, missing_keys,
                                         unexpected_keys, error_msgs)
            for name, child in module._modules.items():
                if child is not None:
                    load(child, prefix + name + '.')

        load(model, prefix='')
        print("Weights of {} not initialized from pretrained model: {}".format(
            model.__class__.__name__, missing_keys))
        print("Weights from pretrained model not used in {}: {}".format(
            model.__class__.__name__, unexpected_keys))

    tokenizer = BertTokenizer.from_pretrained(cfg.bert_model)

    env = Environment(cfg, cfg.test_set, tokenizer, shuffle=False)
    print(env.dataset.path, 'loaded')

    queue = mp.Queue()

    procs = []
    for i in range(cfg.num_workers):
        p = TestWorker(cfg, i, model, env, queue, tokenizer)
        if cfg.debug:
            p.run()
        else:
            p.start()
        procs.append(p)

    results = []
    for p in procs:
        while True:
            running = p.is_alive()
            if not queue.empty():
                result = queue.get()
                results.append(result)
            else:
                if not running:
                    break

    for p in procs:
        p.join()

    exact_list = []
    f1_list = []
    full_action = [0 for _ in range(cfg.memory_num)]
    full_solvable = []
    id_list = []
    for i in range(len(results)):
        id_list.append(results[i]['doc'])
        full_solvable.append(results[i]['solvable'])
        exact_list.append(results[i]['exact'])
        f1_list.append(results[i]['f1'])
        for j in range(cfg.memory_num):
            full_action[j] += results[i]['actions'][j]
    qa_list = list(
        set(['_'.join(doc_id.split('_')[:-1]) for doc_id in id_list]))
    answers = dict()
    for qa_id in qa_list:
        answers[qa_id] = ('', -100000000)

    for i in range(len(results)):
        qa_id = '_'.join(id_list[i].split('_')[:-1])
        score = results[i]['score']
        answer = results[i]['answer']

        if answers[qa_id][1] < score:
            answers[qa_id] = (answer, score)

    for qa_id in answers.keys():
        answers[qa_id] = answers[qa_id][0]

    key_list = list(set(answers.keys()))
    solvables = [[] for i in range(len(key_list))]
    for i in range(len(full_solvable)):
        id_ = '_'.join(id_list[i].split('_')[:-1])
        solv = full_solvable[i]
        idx = key_list.index(id_)
        solvables[idx].append(solv)

    for i in range(len(solvables)):
        if 1 in solvables[i]:
            solvables[i] = 1
        else:
            solvables[i] = 0

    with open(cfg.prediction_file, 'w', encoding='utf-8') as f:
        print(json.dumps(answers), file=f)
    results = get_score_from_trivia(cfg, cfg.test_set)
    exact = results['exact_match']
    f1 = results['f1']

    total_action_num = 0
    for i in range(cfg.memory_num):
        total_action_num += full_action[i]
    avg_action = [0 for _ in range(cfg.memory_num)]
    for i in range(cfg.memory_num):
        avg_action[i] += full_action[i] / total_action_num
    print('All processes is finished.')
    print('ExactMatch: %.2f' % (sum(exact_list) / len(exact_list) * 100))
    print('F1score: %.2f' % (sum(f1_list) / len(f1_list) * 100))
    print()
    print('ExactMatch: %.2f' % (exact * 100))
    print('F1score: %.2f' % (f1 * 100))
    print()
    print('Solvables: %.2f' % (sum(full_solvable) / len(full_solvable) * 100))
    print('Non duplicated Solvables: %.2f' %
          (sum(solvables) / len(solvables) * 100))
    print()
    print('Total number of actions: %d' % (total_action_num))
    for i in range(cfg.memory_num):
        print('Action %d : %.2f' % (i, avg_action[i] * 100))
if __name__ == "__main__":

    env = gym.make(ENV_NAME)
    #env.seed(2)
    MPS = 2 # meta population size
    meta_population = [Model(env.observation_space.shape[0],env.action_space.n, idx=i) for i in range(MPS)]

    # create arcive for models
    archive = []
    writer = SummaryWriter()
    iterations = 300 #1500 # max iterations to run

    delta_reward_buffer = deque(maxlen=10)  # buffer to store the reward gradients to see if rewards stay constant over a defined time horizont ~> local min
    W = 1

    params_queues = [mp.Queue(maxsize=1) for _ in range(PROCESSES_COUNT)]
    rewards_queue = mp.Queue(maxsize=ITERS_PER_UPDATE)
    workers = []

    for idx, params_queue in enumerate(params_queues):
        proc = mp.Process(target=worker_func, args=(idx, params_queue, rewards_queue, NOISE_STD))
        proc.start()
        workers.append(proc)

    print("All started!")
    step_idx = 0
    reward_history = []
    reward_max =[]
    reward_min = []
    reward_std = []
import torch
from torch.optim import Adam
import torch.nn as nn
import time
from .atari import create_atari_env
from .models import Agent
from tqdm import tqdm
import torch.multiprocessing as mp

LEARNING_RATE = 1e-4
WORKERS = 4
JOB_BLOCK = 50
ACTOR_WEIGHT = 0.5
MAX_PLAY_STEPS = 20

train_progress_queue = mp.Queue(1000)


def play(env,
         agent,
         first_state,
         max_steps=20,
         render=False,
         action_code=(0, 2, 3)):
    done = False
    steps = 0
    state = first_state
    trajectory = {
        'states': [],
        'rewards': [],
        'actions_logprob': [],
Exemple #18
0
    def __init__(self, loader):
        self.dataset = loader.dataset
        self.scale = loader.scale
        self.collate_fn = loader.collate_fn
        self.batch_sampler = loader.batch_sampler
        self.num_workers = loader.num_workers
        self.pin_memory = loader.pin_memory and torch.cuda.is_available()
        self.timeout = loader.timeout
        self.done_event = threading.Event()

        self.sample_iter = iter(self.batch_sampler)

        if self.num_workers > 0:
            self.worker_init_fn = loader.worker_init_fn
            self.index_queues = [
                multiprocessing.Queue() for _ in range(self.num_workers)
            ]
            self.worker_queue_idx = 0
            self.worker_result_queue = multiprocessing.Queue()
            self.batches_outstanding = 0
            self.worker_pids_set = False
            self.shutdown = False
            self.send_idx = 0
            self.rcvd_idx = 0
            self.reorder_dict = {}

            base_seed = torch.LongTensor(1).random_()[0]
            self.workers = [
                multiprocessing.Process(
                    target=_ms_loop,
                    args=(self.dataset, self.index_queues[i],
                          self.worker_result_queue, self.collate_fn,
                          self.scale, base_seed + i, self.worker_init_fn, i))
                for i in range(self.num_workers)
            ]

            if self.pin_memory or self.timeout > 0:
                self.data_queue = queue.Queue()
                if self.pin_memory:
                    maybe_device_id = torch.cuda.current_device()
                else:
                    # do not initialize cuda context if not necessary
                    maybe_device_id = None
                self.worker_manager_thread = threading.Thread(
                    target=_pin_memory_loop,
                    args=(
                        self.worker_result_queue,
                        self.data_queue,
                        maybe_device_id,
                        self.done_event,
                    ))
                self.worker_manager_thread.daemon = True
                self.worker_manager_thread.start()
            else:
                self.data_queue = self.worker_result_queue

            for w in self.workers:
                w.daemon = True  # ensure that the worker exits on process exit
                w.start()

            _update_worker_pids(id(self), tuple(w.pid for w in self.workers))
            _set_SIGCHLD_handler()
            self.worker_pids_set = True

            # prime the prefetch loop
            for _ in range(2 * self.num_workers):
                self._put_indices()
Exemple #19
0
def main():
    parser = argparse.ArgumentParser(description='Train Hyperbolic Embeddings')
    parser.add_argument('-checkpoint',
                        default='/tmp/hype_embeddings.pth',
                        help='Where to store the model checkpoint')
    parser.add_argument('-dset',
                        type=str,
                        required=True,
                        help='Dataset identifier')
    parser.add_argument('-dim',
                        type=int,
                        default=20,
                        help='Embedding dimension')
    parser.add_argument('-manifold',
                        type=str,
                        default='poincare',
                        choices=MANIFOLDS.keys(),
                        help='Embedding manifold')
    parser.add_argument('-lr', type=float, default=1000, help='Learning rate')
    parser.add_argument('-epochs',
                        type=int,
                        default=100,
                        help='Number of epochs')
    parser.add_argument('-batchsize', type=int, default=512, help='Batchsize')
    parser.add_argument('-negs',
                        type=int,
                        default=50,
                        help='Number of negatives')
    parser.add_argument('-burnin',
                        type=int,
                        default=20,
                        help='Epochs of burn in')
    parser.add_argument('-dampening',
                        type=float,
                        default=0.75,
                        help='Sample dampening during burnin')
    parser.add_argument('-ndproc',
                        type=int,
                        default=4,
                        help='Number of data loading processes')
    parser.add_argument('-eval_each',
                        type=int,
                        default=1,
                        help='Run evaluation every n-th epoch')
    parser.add_argument('-fresh',
                        action='store_true',
                        default=False,
                        help='Override checkpoint')
    parser.add_argument('-debug',
                        action='store_true',
                        default=False,
                        help='Print debuggin output')
    parser.add_argument('-gpu',
                        default=0,
                        type=int,
                        help='Which GPU to run on (-1 for no gpu)')
    parser.add_argument('-sym',
                        action='store_true',
                        default=False,
                        help='Symmetrize dataset')
    parser.add_argument('-maxnorm',
                        '-no-maxnorm',
                        default='500000',
                        action=Unsettable,
                        type=int)
    parser.add_argument('-sparse',
                        default=False,
                        action='store_true',
                        help='Use sparse gradients for embedding table')
    parser.add_argument('-burnin_multiplier', default=0.01, type=float)
    parser.add_argument('-neg_multiplier', default=1.0, type=float)
    parser.add_argument('-quiet', action='store_true', default=False)
    parser.add_argument('-lr_type',
                        choices=['scale', 'constant'],
                        default='constant')
    parser.add_argument('-train_threads',
                        type=int,
                        default=1,
                        help='Number of threads to use in training')

    parser.add_argument('-logfolder',
                        type=str,
                        default='./log/',
                        help='Path of log folder with a back slash')

    opt = parser.parse_args()

    # setup debugging and logigng
    log_file = opt.logfolder + "logging.txt"
    log_level = logging.DEBUG if opt.debug else logging.INFO
    log = logging.getLogger('lorentz')
    logging.basicConfig(level=log_level,
                        format='%(message)s',
                        filename=log_file)

    if opt.gpu >= 0 and opt.train_threads > 1:
        opt.gpu = -1
        log.warning(
            f'Specified hogwild training with GPU, defaulting to CPU...')

    # set default tensor type
    th.set_default_tensor_type('torch.DoubleTensor')
    # set device
    device = th.device(f'cuda:{opt.gpu}' if opt.gpu >= 0 else 'cpu')

    # select manifold to optimize on
    manifold = MANIFOLDS[opt.manifold](debug=opt.debug, max_norm=opt.maxnorm)
    opt.dim = manifold.dim(opt.dim)

    if 'csv' in opt.dset:
        log.info('Using edge list dataloader')
        idx, objects, weights = load_edge_list(opt.dset, opt.sym)
        model, data, model_name, conf = initialize(manifold,
                                                   opt,
                                                   idx,
                                                   objects,
                                                   weights,
                                                   sparse=opt.sparse)
    else:
        log.info('Using adjacency matrix dataloader')
        dset = load_adjacency_matrix(opt.dset, 'hdf5')
        log.info('Setting up dataset...')
        data = AdjacencyDataset(dset,
                                opt.negs,
                                opt.batchsize,
                                opt.ndproc,
                                opt.burnin > 0,
                                sample_dampening=opt.dampening)
        model = Embedding(data.N, opt.dim, manifold, sparse=opt.sparse)
        objects = dset['objects']

    # set burnin parameters
    data.neg_multiplier = opt.neg_multiplier
    train._lr_multiplier = opt.burnin_multiplier

    # Build config string for log
    log.info(f'json_conf: {json.dumps(vars(opt))}')

    if opt.lr_type == 'scale':
        opt.lr = opt.lr * opt.batchsize

    # setup optimizer
    optimizer = RiemannianSGD(model.optim_params(manifold), lr=opt.lr)

    # setup checkpoint
    checkpoint = LocalCheckpoint(opt.checkpoint,
                                 include_in_all={
                                     'conf': vars(opt),
                                     'objects': objects
                                 },
                                 start_fresh=opt.fresh)

    # get state from checkpoint
    state = checkpoint.initialize({'epoch': 0, 'model': model.state_dict()})
    model.load_state_dict(state['model'])
    opt.epoch_start = state['epoch']

    adj = {}
    for inputs, _ in data:
        for row in inputs:
            x = row[0].item()
            y = row[1].item()
            if x in adj:
                adj[x].add(y)
            else:
                adj[x] = {y}

    controlQ, logQ = mp.Queue(), mp.Queue()
    control_thread = mp.Process(target=async_eval,
                                args=(adj, controlQ, logQ, opt))
    control_thread.start()

    # control closure
    def control(model, epoch, elapsed, loss):
        """
        Control thread to evaluate embedding
        """
        lt = model.w_avg if hasattr(model, 'w_avg') else model.lt.weight.data
        manifold.normalize(lt)

        checkpoint.path = f'{opt.checkpoint}.{epoch}'
        checkpoint.save({
            'model': model.state_dict(),
            'embeddings': lt,
            'epoch': epoch,
            'manifold': opt.manifold,
        })

        controlQ.put((epoch, elapsed, loss, checkpoint.path))

        while not logQ.empty():
            lmsg, pth = logQ.get()
            shutil.move(pth, opt.checkpoint)
            log.info(f'json_stats: {json.dumps(lmsg)}')

    control.checkpoint = True
    model = model.to(device)
    if hasattr(model, 'w_avg'):
        model.w_avg = model.w_avg.to(device)
    if opt.train_threads > 1:
        threads = []
        model = model.share_memory()
        args = (device, model, data, optimizer, opt, log)
        kwargs = {'ctrl': control, 'progress': not opt.quiet}
        for i in range(opt.train_threads):
            kwargs['rank'] = i
            threads.append(
                mp.Process(target=train.train, args=args, kwargs=kwargs))
            threads[-1].start()
        [t.join() for t in threads]
    else:
        train.train(device,
                    model,
                    data,
                    optimizer,
                    opt,
                    log,
                    ctrl=control,
                    progress=not opt.quiet)
    controlQ.put(None)
    control_thread.join()
    while not logQ.empty():
        lmsg, pth = logQ.get()
        shutil.move(pth, opt.checkpoint)
        log.info(f'json_stats: {json.dumps(lmsg)}')
Exemple #20
0
    obs = share_memory(np.zeros(dimensions + env.observation_space.shape))
    actions = share_memory(np.zeros(dimensions + env.action_space.shape))
    logprobs = share_memory(np.zeros(dimensions))
    rewards = share_memory(np.zeros(dimensions))
    dones = share_memory(np.zeros(dimensions))
    values = share_memory(np.zeros(dimensions))
    traj_availables = share_memory(np.ones(dimensions))
    raise

    actor_processes = []
    data_processor_processes = []
    ctx = mp.get_context("forkserver")
    stats_queue = MpQueue()
    # stats_queue = mp.Queue(1000)
    rollouts_queue = mp.Queue(1000)
    data_process_queue = mp.Queue(1000)
    data_process_back_queues = []

    for i in range(args.num_rollout_workers):
        actor = mp.Process(
            target=act,
            args=[
                args, experiment_name, i, lock, stats_queue, device, obs,
                actions, logprobs, rewards, dones, values, traj_availables
            ],
        )
        actor.start()
        actor_processes.append(actor)

    # learner = ctx.Process(
Exemple #21
0
    def train(self):
        print('A3C算法股票交易系统 v0.0.0_1(Pong)')

        mp.set_start_method('spawn')
        os.environ['OMP_NUM_THREADS'] = "1"
        '''
        parser = argparse.ArgumentParser()
        parser.add_argument("--cuda", default=False,
                            action="store_true", help="Enable cuda")
        parser.add_argument("-n", "--name", required=True,
                            help="Name of the run")
        args = parser.parse_args()
        '''
        device = 'cuda:0'
        run_name = 'a3c'

        env, env_val, env_tst = A3cApp.make_env()
        #env = A3cApp.make_env()
        print('shape: {0}; n: {1};'.format(env.observation_space.shape,
                                           env.action_space.n))
        net = A2cConv1dModel((1, env.observation_space.shape[0]),
                             env.action_space.n)  #.to(device)
        net.share_memory()
        optimizer = optim.Adam(net.parameters(),
                               lr=AppConfig.a3c_config['LEARNING_RATE'],
                               eps=1e-3)

        train_queue = mp.Queue(maxsize=AppConfig.a3c_config['PROCESSES_COUNT'])
        data_proc_list = []
        for proc_idx in range(AppConfig.a3c_config['PROCESSES_COUNT']):
            proc_name = f"-a3c-grad_pong_{run_name}#{proc_idx}"
            p_args = (proc_name, net, device, train_queue)
            data_proc = mp.Process(target=A3cApp.grads_func, args=p_args)
            data_proc.start()
            data_proc_list.append(data_proc)

        batch = []
        step_idx = 0
        grad_buffer = None
        try:
            while True:
                train_entry = train_queue.get()
                if train_entry is None:
                    break
                step_idx += 1
                if grad_buffer is None:
                    grad_buffer = train_entry
                else:
                    for tgt_grad, grad in zip(grad_buffer, train_entry):
                        tgt_grad += grad
                if step_idx % AppConfig.a3c_config['TRAIN_BATCH'] == 0:
                    net.zero_grad()  #yt
                    for param, grad in zip(net.parameters(), grad_buffer):
                        v1 = torch.FloatTensor(grad).to(device)
                        if param.grad is not None:
                            param.grad = torch.FloatTensor(grad).to(device)

                    nn_utils.clip_grad_norm_(net.parameters(),
                                             AppConfig.a3c_config['CLIP_GRAD'])
                    optimizer.step()
                    grad_buffer = None
        finally:
            for p in data_proc_list:
                p.terminate()
                p.join()
    parser.add_argument("--cuda",
                        default=False,
                        action="store_true",
                        help="Enable cuda")
    parser.add_argument("-n", "--name", required=True, help="Name of the run")
    args = parser.parse_args()
    device = "cuda" if args.cuda else "cpu"

    env = make_env()
    net = common.AtariA2C(env.observation_space.shape,
                          env.action_space.n).to(device)
    net.share_memory()

    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3)

    train_queue = mp.Queue(maxsize=PROCESSES_COUNT)
    data_proc_list = []
    for proc_idx in range(PROCESSES_COUNT):
        proc_name = f"-a3c-grad_pong_{args.name}#{proc_idx}"
        p_args = (proc_name, net, device, train_queue)
        data_proc = mp.Process(target=grads_func, args=p_args)
        data_proc.start()
        data_proc_list.append(data_proc)

    batch = []
    step_idx = 0
    grad_buffer = None

    try:
        while True:
            train_entry = train_queue.get()
    td3_trainer=TD3_Trainer(replay_buffer, hidden_dim=hidden_dim, policy_target_update_interval=policy_target_update_interval, action_range=action_range )


    if args.train:
        td3_trainer.load_model(model_path)
        td3_trainer.q_net1.share_memory()
        td3_trainer.q_net2.share_memory()
        td3_trainer.target_q_net1.share_memory()
        td3_trainer.target_q_net2.share_memory()
        td3_trainer.policy_net.share_memory()
        td3_trainer.target_policy_net.share_memory()
        ShareParameters(td3_trainer.q_optimizer1)
        ShareParameters(td3_trainer.q_optimizer2)
        ShareParameters(td3_trainer.policy_optimizer)

        rewards_queue=mp.Queue()  # used for get rewards from all processes and plot the curve

<<<<<<< HEAD
        num_workers=2  # or: mp.cpu_count()
=======
        num_workers=6  # or: mp.cpu_count()
>>>>>>> a3bb147233dc6db8197439d80a187a2749327b3f
        processes=[]
        rewards=[]

        for i in range(num_workers):
            process = Process(target=worker, args=(i, td3_trainer, rewards_queue, replay_buffer, max_episodes, max_steps, batch_size, explore_steps, \
            update_itr, explore_noise_scale, eval_noise_scale, reward_scale, DETERMINISTIC, hidden_dim, model_path))  # the args contain shared and not shared
            process.daemon=True  # all processes closed when the main stops
            processes.append(process)
Exemple #24
0
    def train(
            self,
            args: Namespace,
            env_builder: Callable[[], Env],
            algo: RLAlgo
        ) -> None:
        """
        Trains the algorithm on the environment given using the argument
        namespace as parameters.
        
        "args" must have the following attributes:
        {
            experiment_path (str): The path to save experiment results and
                models.
            render (bool): Render the environment.
            steps_per_episode (Optional[int]): The number of steps in each
                episode.
            silent (bool): Will run without standard output from agents.
            action_mask (Optional[Tuple[bool, ...]]): The action mask to mask or
                unmask.
            masked (Optional[bool]): If an action mask is given, should be True
                if the returned agent actions are already masked.
            default_action (Optional[Tuple[float, ...]]): If an action mask is
                given and going from masked -> unmasked, this should be the
                default values for the actions.
            decay (float): The gamma decay for the target Q-values.
            n_steps (int): The number of decay steps.
            num_agents (int): The number of agents to run concurrently, 0 is
                single process.
            model_sync_interval (int): The number of training steps between
                agent model syncs, if 0, all processes will share the same
                model.
            num_prefetch_batches (int): The number of batches to prefetch to the
                learner in distributed learning.
            local_batch_size (int): The number of experiences the agent sends at
                once in distributed learning.
            vectorized (bool): If the environment is vectorized.
            recurrent (bool),Make the network recurrent (using LSTM)
            play (bool): Runs the environment using the model instead of
                training.
            exploration (str, ["rnd", "munchausen"]): The type of exploration to
                use.
		    episodes (int): The number of episodes to play for if playing.
            er_capacity (int): The alpha value for PER.
            batch_size (int): The batch size of the training set.
            training_steps (int): The number of training steps to train for.
            start_size (int): The size of the replay buffer before training.
            er_alpha (float): The alpha value for PER.
            er_beta (float): The alpha value for PER.
            er_beta_increment (float): The increment of the beta value on each
                sample for PER.
            er_epsilon (float): The epsilon value for PER.
            burn_in_length (int): If recurrent, the number of burn in samples
                for R2D2.
            sequence_length (int): If recurrent, the length of the sequence to
                train on.
            max_factor (int): If recurrent, factor of max priority to mean
                priority for R2D2.
        }

        Args:
            args: The namespace of arguments for training.
            env_builder: The nullary function to create the environment.
            algo: The algorithm to train.
        """
        logs_path = None
        save_path = None

        if args.experiment_path is not None:
            logs_path = Path(args.experiment_path, "logs")
            logs_path.mkdir(parents=True, exist_ok=True)
            logs_path = str(logs_path)

            save_path = Path(args.experiment_path, "models")
            save_path.mkdir(parents=True, exist_ok=True)
            save_path = str(save_path)

        # Create agent class
        agent_builder = partial(
            OffPolicyAgent, algo=algo, render=args.render, silent=args.silent
        )

        steps_per_episode = (
            args.steps_per_episode if "steps_per_episode" in args else None
        )

        agent_builder = compose(
            agent_builder,
            partial(TimeLimitAgent, max_steps=steps_per_episode)
        )

        if not args.play:
            # Experience replay
            # Won't increment in multiple processes to keep it consistent
            # across actors
            er_beta_increment = (
                args.er_beta_increment if args.num_agents == 0 else 0
            )

            if args.recurrent:
                experience_replay_func = partial(
                    TorchR2D2, alpha=args.er_alpha, beta=args.er_beta,
                    beta_increment=er_beta_increment, epsilon=args.er_epsilon,
                    max_factor=args.max_factor
                )
            else:
                experience_replay_func = partial(
                    TorchPER, alpha=args.er_alpha, beta=args.er_beta,
                    beta_increment=er_beta_increment, epsilon=args.er_epsilon
                )

            if args.num_agents > 0:
                recv_pipes = []
                send_pipes = []

                prestart_func = None

                if args.model_sync_interval == 0:
                    self._start_training(algo, args)
                    algo.share_memory()

                    recv_pipes = [None] * args.num_agents
                else:
                    prestart_func = partial(
                        self._start_training, algo=algo, args=args
                    )

                    # Force CPU for now to avoid re-instantiating cuda in
                    # subprocesses
                    algo.device = torch.device("cpu")
                    algo = algo.to(algo.device)

                    for i in range(args.num_agents):
                        param_pipe = mp.Pipe(duplex=False)

                        recv_pipes.append(param_pipe[0])
                        send_pipes.append(param_pipe[1])

                # Just needed to get the error/priority calculations
                dummy_experience_replay = experience_replay_func(capacity=1)

                # Must come before the other wrapper since there are infinite
                # recursion errors
                # TODO come up with a better way to implement wrappers
                agent_builder = compose(
                    agent_builder,
                    partial_iterator(
                        QueueAgent,
                        agent_id=(iter(range(args.num_agents)), True),
                        experience_replay=(dummy_experience_replay, False),
                        param_pipe=(iter(recv_pipes), True)
                    )
                )

        agent_builder = compose(
            agent_builder,
            partial(TorchRLAgent, batch_state=not args.vectorized)
        )
        
        if "action_mask" in args and args.action_mask:
            # TODO: Will have to add an action mask wrapper later
            if args.masked:
                agent_builder = compose(
                    agent_builder,
                    partial(
                        UnmaskedActionAgent, action_mask=args.action_mask,
                        default_action=args.default_action
                    )
                )

        agent_builder = compose(agent_builder, TorchOffPolicyAgent)

        if args.recurrent:
            agent_builder = compose(
                agent_builder, SequenceInputAgent, TorchRecurrentAgent
            )

        if args.play:
            algo = algo.to(args.device)
            algo.eval()

            agent_logger = (
                None if logs_path is None
                else TensorboardLogger(logs_path + "/play-agent")
            )

            agent = agent_builder(env=env_builder(), logger=agent_logger)
            agent.play(args.episodes)
        else:
            if args.exploration == "rnd":
                agent_builder = compose(agent_builder, IntrinsicRewardAgent)
            elif args.exploration == "munchausen":
                agent_builder = compose(
                    agent_builder, partial(MunchausenAgent, alpha=0.9)
                )

            algo.train()

            if args.recurrent:
                agent_builder = compose(
                    agent_builder,
                    partial(
                        ExperienceSequenceAgent,
                        sequence_length=(
                            args.burn_in_length + args.sequence_length
                        ),
                        overlap=args.burn_in_length
                    )
                )

            experience_replay = experience_replay_func(
                capacity=args.er_capacity
            )

            base_agent_logs_path = None
            if logs_path is not None:
                base_agent_logs_path = logs_path + "/train-agent"

            # Single process
            if args.num_agents == 0:
                self._start_training(algo, args)

                agent_logger = None
                if base_agent_logs_path is not None:
                    agent_logger = TensorboardLogger(base_agent_logs_path)

                agent = agent_builder(env=env_builder(), logger=agent_logger)

                agent.train(
                    args.episodes, 1, args.discount, args.n_steps,
                    experience_replay, args.batch_size, args.start_size,
                    save_path, args.save_interval
                )

            # Multiple processes
            else:
                done_event = mp.Event()

                # Number of agents + worker + learner
                queue_barrier = mp.Barrier(args.num_agents + 2)

                agent_queue = mp.Queue(
                    maxsize=args.num_prefetch_batches * args.num_agents * 4
                )
                sample_queue = mp.Queue(maxsize=args.num_prefetch_batches)
                priority_queue = mp.Queue(maxsize=args.num_prefetch_batches)

                learner_args = (dummy_experience_replay,)
                learner_train_args = (
                    algo, done_event, queue_barrier, args.training_steps,
                    sample_queue, priority_queue, send_pipes,
                    args.model_sync_interval, save_path, args.save_interval
                )

                worker = TorchApexWorker()
                worker_args = (
                    experience_replay, done_event, queue_barrier, agent_queue,
                    sample_queue, priority_queue, args.batch_size,
                    args.start_size
                )

                agent_builders = []
                agent_train_args = []
                agent_train_kwargs = []

                for i in range(args.num_agents):
                    agent_logger = None
                    if base_agent_logs_path is not None:
                        agent_logs_path = (
                            base_agent_logs_path + "-" + str(i + 1)
                        )
                        agent_logger = TensorboardLogger(agent_logs_path)

                    agent_builders.append(
                        partial(agent_builder, logger=agent_logger)
                    )

                    agent_train_args.append((
                        1, args.local_batch_size, args.discount, args.n_steps,
                        agent_queue, queue_barrier
                    ))
                    agent_train_kwargs.append({
                        "exit_condition": done_event.is_set
                    })

                runner = ApexRunner(done_event)
                runner.start(
                    learner_args, learner_train_args, worker, worker_args,
                    env_builder, agent_builders, agent_train_args,
                    agent_train_kwargs, prestart_func
                )
Exemple #25
0
    def __init__(self, loader):
        super(_MultiProcessingDataLoaderIter, self).__init__(loader)

        assert self.num_workers > 0

        self.worker_init_fn = loader.worker_init_fn
        self.worker_queue_idx_cycle = itertools.cycle(range(self.num_workers))
        self.worker_result_queue = multiprocessing.Queue()
        self.worker_pids_set = False
        self.shutdown = False
        self.send_idx = 0  # idx of the next task to be sent to workers
        self.rcvd_idx = 0  # idx of the next task to be returned in __next__
        # information about data not yet yielded, i.e., tasks w/ indices in range [rcvd_idx, send_idx).
        # map: task idx => - (worker_id,)        if data isn't fetched (outstanding)
        #                  \ (worker_id, data)   if data is already fetched (out-of-order)
        self.task_info = {}
        self.tasks_outstanding = 0  # always equal to count(v for v in task_info.values() if len(v) == 1)
        self.workers_done_event = multiprocessing.Event()

        self.index_queues = []
        self.workers = []
        # A list of booleans representing whether each worker still has work to
        # do, i.e., not having exhausted its iterable dataset object. It always
        # contains all `True`s if not using an iterable-style dataset
        # (i.e., if kind != Iterable).
        self.workers_status = []
        for i in range(self.num_workers):
            index_queue = multiprocessing.Queue()
            # index_queue.cancel_join_thread()
            w = multiprocessing.Process(
                target=_utils.worker._worker_loop,
                args=(self.dataset_kind, self.dataset, index_queue,
                      self.worker_result_queue, self.workers_done_event,
                      self.auto_collation, self.collate_fn, self.drop_last,
                      self.base_seed + i, self.worker_init_fn, i,
                      self.num_workers))
            w.daemon = True
            # NB: Process.start() actually take some time as it needs to
            #     start a process and pass the arguments over via a pipe.
            #     Therefore, we only add a worker to self.workers list after
            #     it started, so that we do not call .join() if program dies
            #     before it starts, and __del__ tries to join but will get:
            #     AssertionError: can only join a started process.
            w.start()
            self.index_queues.append(index_queue)
            self.workers.append(w)
            self.workers_status.append(True)

        if self.pin_memory:
            self.pin_memory_thread_done_event = threading.Event()
            self.data_queue = queue.Queue()
            pin_memory_thread = threading.Thread(
                target=_utils.pin_memory._pin_memory_loop,
                args=(self.worker_result_queue, self.data_queue,
                      torch.cuda.current_device(),
                      self.pin_memory_thread_done_event))
            pin_memory_thread.daemon = True
            pin_memory_thread.start()
            # Similar to workers (see comment above), we only register
            # pin_memory_thread once it is started.
            self.pin_memory_thread = pin_memory_thread
        else:
            self.data_queue = self.worker_result_queue

        _utils.signal_handling._set_worker_pids(
            id(self), tuple(w.pid for w in self.workers))
        _utils.signal_handling._set_SIGCHLD_handler()
        self.worker_pids_set = True

        # prime the prefetch loop
        for _ in range(2 * self.num_workers):
            self._try_put_index()
Exemple #26
0
                        self.global_episode.value += 1
                        self.lock.release()
                        state_collections = torch.FloatTensor([[]])
                        action_collections = torch.FloatTensor([])
                        reward_collections = torch.FloatTensor([])
                        time.sleep(0.5)
                        break


if __name__ == '__main__':
    env = gym.make('CartPole-v1')
    LEARN = True
    NUMBER = int(mp.cpu_count() / 2)
    global_episode = mp.Value('i', 0)
    MAX_EPISODE = 30000
    queue = mp.Queue()
    lock = mp.Lock()

    policy = PolicyNet(n_state=2 * env.observation_space.shape[0],
                       n_action=env.action_space.n)
    policy.share_memory()

    agent = Agent(policy=policy,
                  n_state=2 * env.observation_space.shape[0],
                  n_action=env.action_space.n,
                  learn=LEARN,
                  queue=queue,
                  global_episode=global_episode)
    workers = [
        Worker(policy=policy,
               worker_id=i,
Exemple #27
0
def ppo_learn(replay_buffer,replay_buffer_reward,env,model,cov_matrix,model_optim):

    #some problem with multiprocessing and this is the solution
    #https://github.com/pytorch/pytorch/issues/973#issuecomment-346405667
    rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
    resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))

    np.random.seed(0)
    current_best_reward = float('-inf')
    global_iteration_counter = 0
    optimization_history_list = []

    while True:

        new_samples = []

        #multiprocessing
        q = mp.Queue(maxsize = C.max_worker)

        sample_counter = 0
        for iteration_index in range(0,int(C.max_new_episode/C.max_worker)+1):
            p_list = []
            for worker in range(0,C.max_worker):
                try:
                    if sample_counter < C.max_new_episode:
                        p = mp.Process(target = roll_out_once.roll_out_once,\
                                                    args = (q,env,model,cov_matrix))
                        p.start()
                        p_list.append(p)
                        sample_counter += 1
                    else:
                        raise Exception("Don't need to start new thread")
                except:
                    pass

            for j in range(len(p_list)):
                res = q.get()
                new_samples.append(res[0])




        # observation_list,action_list,log_prob_action_list,reward_list = \
        # roll_out_once.roll_out_once(env,model,cov_matrix)
        new_sample_reward = []
        for new_sample in new_samples:

            #drop old simulation experience
            if len(replay_buffer) > C.replay_buffer_size:
                drop_index = np.argmin(replay_buffer_reward)
                replay_buffer.pop(drop_index)
                replay_buffer_reward.pop(drop_index)

            #add the new simulation result to the replay buffer
            total_reward = np.sum(new_sample['reward_list'])
            replay_buffer_reward.append(total_reward)
            replay_buffer.append(new_sample)

            new_sample_reward.append(new_sample['reward_list'])

        global_iteration_counter += 1
        print('this is global iteration ',global_iteration_counter)
        print('the current reward is',np.mean(new_sample_reward))
        
        #record the optimization process
        optimization_history_list.append(np.mean(new_sample_reward))
        optimization_history = {}
        optimization_history['objective_history'] = optimization_history_list
        cwd = os.getcwd()
        #cwd = os.path.join(cwd, 'data_folder')
        parameter_file = 'optimization_history.json'
        cwd = os.path.join(cwd,parameter_file)
        with open(cwd, 'w') as statusFile:
            statusFile.write(jsonpickle.encode(optimization_history))

        if np.mean(new_sample_reward) > current_best_reward:
            current_best_reward = np.mean(new_sample_reward)
            #save the neural network model
            cwd = os.getcwd()
            parameter_file = 'pendulum_nn_trained_model.pt'
            cwd = os.path.join(cwd,parameter_file)
            torch.save(model.state_dict(),cwd)


        
        #we can update the model more than once because we are using off-line data
        for update_iteration in range(0,10):
            #sample experience from the replay buffer for training
            # new_replay_buffer_reward = []
            # for entry in replay_buffer_reward:
            #     new_replay_buffer_reward.append(np.log(entry))
            # sample_probability = (np.exp(new_replay_buffer_reward))/np.sum(np.exp(new_replay_buffer_reward)) #apply softmax to the total_reward list
            sampled_off_line_data = []
            for sample_counter in range(0,C.training_batch_size):
                #sampled_index = np.random.choice(np.arange(0, len(replay_buffer)), p=sample_probability.tolist())
                sampled_index = np.random.randint(0,len(replay_buffer)-1)
                sampled_off_line_data.append(replay_buffer[sampled_index])

        
            #compute the loss and update model
            #total_loss = torch.tensor([0.0], requires_grad=True)
            total_loss = 0
            model.zero_grad()

            baseline_reward = 0
            #for sample_index in range(0,len(sampled_off_line_data)):
            #    off_line_data = sampled_off_line_data[sample_index]
            #    baseline_reward += np.sum(off_line_data['reward_list'])
            #baseline_reward = baseline_reward/len(sampled_off_line_data)

            for sample_index in range(0,len(sampled_off_line_data)):
                off_line_data = sampled_off_line_data[sample_index]

                actor_log_prob_mean = model(off_line_data['observation_list'])

                dist = MultivariateNormal(actor_log_prob_mean, cov_matrix)
                actor_log_prob = dist.log_prob(off_line_data['action_list'])

                #calculate the ratio for adjusting off-line data
                ratios = torch.exp(actor_log_prob - off_line_data['log_prob_action_list'])
                ratio = torch.prod(ratios)

                #vanila policy gradient loss
                #vanila_pg_loss = off_line_data['log_prob_action_list']*np.sum(off_line_data['reward_list'])
                vanila_pg_loss = torch.sum(actor_log_prob)*(np.sum(off_line_data['reward_list'])-baseline_reward)

                #compute the ppo loss
                temp_loss1 = ratio*vanila_pg_loss
                temp_loss2 = torch.clamp(ratio,1-C.ppo_clip,1+C.ppo_clip)*vanila_pg_loss
                total_loss = total_loss - torch.min(temp_loss1,temp_loss2)

            total_loss = total_loss/len(sampled_off_line_data)

            #update the model
            model.zero_grad()
            total_loss.backward()
            model_optim.step()
Exemple #28
0
 def _fire_process(self, dataloader, prefetch):
     self.queue = mp.Queue(prefetch)
     self.process = mp.Process(target=_prefetch_generator,
                               args=(dataloader, self.queue,
                                     self._batchify))
     self.process.start()
Exemple #29
0
    def __init__(self, loader):
        self.dataset = loader.dataset
        self.collate_fn = loader.collate_fn
        self.batch_sampler = loader.batch_sampler
        self.num_workers = loader.num_workers
        self.pin_memory = loader.pin_memory and torch.cuda.is_available()
        self.timeout = loader.timeout

        self.sample_iter = iter(self.batch_sampler)

        base_seed = torch.LongTensor(1).random_().item()

        if self.num_workers > 0:
            self.worker_init_fn = loader.worker_init_fn
            self.worker_queue_idx = 0
            self.worker_result_queue = multiprocessing.Queue()
            self.batches_outstanding = 0
            self.worker_pids_set = False
            self.shutdown = False
            self.send_idx = 0
            self.rcvd_idx = 0
            self.reorder_dict = {}
            self.done_event = multiprocessing.Event()

            self.index_queues = []
            self.workers = []
            for i in range(self.num_workers):
                index_queue = multiprocessing.Queue()
                w = multiprocessing.Process(
                    target=_worker_loop,
                    args=(self.dataset, index_queue, self.worker_result_queue,
                          self.done_event, self.collate_fn, base_seed + i,
                          self.worker_init_fn, i))
                w.daemon = True  # ensure that the worker exits on process exit
                # Process.start() actually take some time as it needs to start a
                # process and pass the arguments over via a pipe. Therefore, we
                # only add a worker to self.workers list after it started, so
                # that we do not call .join() if program dies before it starts,
                # and __del__ tries to join it but will get:
                #     AssertionError: can only join a started process.
                w.start()
                self.index_queues.append(index_queue)
                self.workers.append(w)

            if self.pin_memory:
                self.data_queue = queue.Queue()
                self.pin_memory_thread = threading.Thread(
                    target=_pin_memory_loop,
                    args=(self.worker_result_queue, self.data_queue,
                          self.done_event, self.pin_memory,
                          torch.cuda.current_device()))
                self.pin_memory_thread.daemon = True
                self.pin_memory_thread.start()
            else:
                self.data_queue = self.worker_result_queue

            _update_worker_pids(id(self), tuple(w.pid for w in self.workers))
            _set_SIGCHLD_handler()
            self.worker_pids_set = True

            # prime the prefetch loop
            for _ in range(2 * self.num_workers):
                self._put_indices()
Exemple #30
0
    def _test_gpt2_config_pp(self, tmpdir, mp_size, pp_size, mp_resize,
                             pp_resize):
        @distributed_test(world_size=pp_size * mp_size)
        def _run_baseline(inputs, tag, output, quit_event):
            reset_random()
            args_defaults = {
                'num_layers': 8,
                'hidden_size': 128,
                'num_attention_heads': 8,
                'max_position_embeddings': 128,
            }

            topo = self.get_topology(mp_size, pp_size, mp_size * pp_size)
            gpt2_pipe_model = GPT2ModelPipe(num_layers=8,
                                            num_stages=pp_size,
                                            mp_size=mp_size,
                                            args_others=args_defaults,
                                            topo=topo)
            model = self.get_deepspeed_model(gpt2_pipe_model, tmpdir)

            with torch.no_grad():
                inputs = [x.cuda() for x in inputs]
                if model.is_first_stage() or model.is_last_stage():
                    loader = RepeatingLoader([(inputs[0], 0)])
                    data_iter = iter(loader)
                else:
                    data_iter = None

                baseline = model.eval_batch(data_iter=data_iter,
                                            compute_loss=False,
                                            reduce_output=None)

                if baseline is not None:
                    # baseline should be [[hidden, True]]]
                    assert len(baseline) == 1
                    assert len(baseline[0]) == 1
                    assert torch.is_tensor(baseline[0][0])
                    output.put(baseline[0][0].cpu())

                state_dict = {}
                state_dict['checkpoint_version'] = get_megatron_version()
                model.save_checkpoint(tmpdir, tag=tag, client_state=state_dict)
                quit_event.wait()

        @distributed_test(world_size=mp_resize * pp_resize)
        def _run_resize(inputs, tag, output, quit_event):
            reset_random()
            args_defaults = {
                'num_layers': 8,
                'hidden_size': 128,
                'num_attention_heads': 8,
                'max_position_embeddings': 128,
            }

            topo = self.get_topology(mp_resize, pp_resize,
                                     mp_resize * pp_resize)
            gpt2_pipe_model = GPT2ModelPipe(num_layers=8,
                                            num_stages=pp_resize,
                                            mp_size=mp_resize,
                                            args_others=args_defaults,
                                            topo=topo)
            model = self.get_deepspeed_model(gpt2_pipe_model, tmpdir)

            with torch.no_grad():
                model.load_checkpoint(tmpdir,
                                      tag=tag,
                                      load_optimizer_states=False,
                                      load_lr_scheduler_states=False)
                inputs = [x.cuda() for x in inputs]
                if model.is_first_stage() or model.is_last_stage():
                    loader = RepeatingLoader([(inputs[0], 0)])
                    data_iter = iter(loader)
                else:
                    data_iter = None

                test = model.eval_batch(data_iter=data_iter,
                                        compute_loss=False,
                                        reduce_output=None)

                if test is not None:
                    # test should be [[hidden, True]]]
                    assert len(test) == 1
                    assert len(test[0]) == 1
                    assert torch.is_tensor(test[0][0])
                    output.put(test[0][0].cpu())

            quit_event.wait()

        def _verify(b_queue, t_queue, baseline_event, test_event):
            baseline = b_queue.get()
            baseline_event.set()

            test = t_queue.get()
            test_event.set()

            assert torch.allclose(
                baseline, test, atol=1e-03
            ), f"Baseline output {baseline} is not equal to save-then-load output {test}"

        tag = f'mp_{mp_size}to{mp_resize}_pp_{pp_size}to{pp_resize}'

        baseline = mp.Queue()
        test = mp.Queue()
        baseline_event = mp.Event()
        test_event = mp.Event()

        verify_process = mp.Process(target=_verify,
                                    args=(baseline, test, baseline_event,
                                          test_event))
        verify_process.start()

        inputs = self.get_inputs()
        _run_baseline(inputs, tag, baseline, baseline_event)
        _run_resize(inputs, tag, test, test_event)

        verify_process.join()