Ejemplo n.º 1
0
def make_embedding(tasks_to_accomplish, tasks_finished, gpu_idx, embedding_type_name ) -> List[List[Tuple]]:
    GPU_NUM = gpu_idx # 원하는 GPU 번호 입력
    device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
    torch.cuda.set_device(device) # change allocation of current GPU
    model = SentenceTransformer(embedding_type_name )
    
    
    while True:
        try:
            if tasks_to_accomplish.empty():
                raise Empty

            task = tasks_to_accomplish.get_nowait()
        except Empty:
            break
        else:
            ret = []

            for i in tqdm(task):
                sentence_embeddings = tuple(model.encode([i])[0].tolist())
                ret.append((sentence_embeddings, i))
            print(' {} : embedding making finished'.format(current_process().name))
            for data in ret:
                print("{} put : {}".format(current_process().name, data[1]))
                tasks_finished.put(data)
            
            print(' {} : END pushing embed-sent {} data to finished_queue'.format(current_process().name, len(ret)))
            time.sleep(0.5)

            
    return True
Ejemplo n.º 2
0
def build_net(env, seeds):
    torch.manual_seed(seeds[0])
    net = Net(env.observation_space.shape, env.action_space.n)
    logger.debug("in build_net,current_process: %s,seeds:%s",
                 mp.current_process(), seeds)
    for seed in seeds[1:]:
        print(mp.current_process(), seed)
        net = mutate_net(net, seed, copy_net=False)
    return net
Ejemplo n.º 3
0
def get_frames(path):
	print(mp.current_process().name + ' is getting frames at ' + path )
	frames = dict()
	contents = os.listdir(path)
	for i in range(len(contents)):
		if not i % 50:
			print(mp.current_process().name + ' is ' + '{:.2f}'.format((i*1.0)/len(contents) * 100) + '% done')
		content = os.path.join(path, contents[i])
		frames[i] = torch.Tensor(tfm.resize(skio.imread(content), (3,224,224)))
	return frames
Ejemplo n.º 4
0
def my_func(x, d):
    print(mp.current_process())
    print('process id:', os.getpid())
    print('1:', mp.current_process(), id(d), id(d['y']), id(d['arr']), d['y'], d['arr'][x+500])
    d['y'] = 100
    print('2:', mp.current_process(), id(d), id(d['y']), id(d['arr']), d['y'], d['arr'][x+500])
    d['arr'][x:x+1000] = 10
    print('3:', mp.current_process(), id(d), id(d['y']), id(d['arr']), d['y'], d['arr'][x+500])
    for i in range(20):
        y = np.sum(d['arr'])
    return x*np.sum(d['arr'])
Ejemplo n.º 5
0
def worker_func(input_queue, output_queue, top_parent_cache, device="cpu"):
    env = make_env()

    while True:
        try:
            # parents:(parent_seed,net,child_seed)
            parents = input_queue.get()
            population = []

            if parents is None:
                break

            #logger.debug("current_process: %s,parents:%s", mp.current_process(), parents)
            #logger.debug("current_process: %s,top_parent_cache:%s", mp.current_process(), top_parent_cache)

            for net_seeds in parents:
                if len(net_seeds) > 1:
                    #logger.debug("current_process: %s,net_seeds[:-1]:%s,top_parent_cache: %s", mp.current_process(),
                    #net_seeds[0], top_parent_cache)
                    logger.debug("current_process:inside1,%s,net_seeds:%s",
                                 mp.current_process(), net_seeds)
                    net = Net(env.observation_space.shape, env.action_space.n)
                    #net = net_seeds[1]
                    net.load_state_dict(net_seeds[1])
                    if net is not None:
                        net = mutate_net(net, net_seeds[-1], device).to(device)
                    else:
                        assert False
                        #net = build_net(env, net_seeds, device).to(device)
                else:
                    net = build_net(env, net_seeds, device).to(device)
                    logger.debug("current_process:inside2,%s",
                                 mp.current_process())

                reward, steps = evaluate(env, net, device)
                population.append((net, net_seeds, reward, steps))
            #logger.debug("before, current_process: %s,seeds:%s", mp.current_process(), population)
            population.sort(key=lambda p: p[2], reverse=True)
            #logger.debug("output queue put, current_process: %s,population:%s", mp.current_process(), population[:][1])

            for i in range(PARENTS_COUNT):
                logger.debug("current_process:inside3,%s, population[i][0],%s",
                             mp.current_process(), population[i][0])
                #top_parent_cache[population[i][1][-1]] = population[i][0].state_dict()
                output_queue.put(
                    OutputItem(seeds=population[i][1],
                               net=population[i][0].state_dict(),
                               reward=population[i][2],
                               steps=population[i][3]))
        except Exception as e:
            logger.debug("comme here")
            logger.Exception("Unexpected exception! %s", e)
Ejemplo n.º 6
0
def worker_func(input_queue, output_queue, device="cpu"):
    #env = make_env()#gym.make("RoboschoolHalfCheetah-v1")
    env_pool = [make_env()]
    #cache = {}
    # first generation -- just evaluate given single seeds
    parents = input_queue.get()
    for seed in parents:
        net = build_net(env_pool[0], seed).to(device)
        #net.zero_noise(batch_size=1)
        reward, steps = evaluate(env_pool[0], net, device)
        output_queue.put((seed, reward, steps))

    while True:
        parents = input_queue.get()
        print(mp.current_process(), parents)
        if parents is None:
            break
        parents.sort()
        for parent_seeds, children_iter in itertools.groupby(
                parents, key=lambda s: s[:-1]):
            batch = list(children_iter)
            children_seeds = [b[-1] for b in batch]
            net = build_net(env_pool[0], parent_seeds).to(device)
            #net.set_noise_seeds(children_seeds)
            batch_size = len(children_seeds)
            while len(env_pool) < batch_size:
                env_pool.append(make_env())
            rewards, steps = evaluate_batch(env_pool[:batch_size], net, device)
            for seeds, reward, step in zip(batch, rewards, steps):
                output_queue.put((seeds, reward, step))
Ejemplo n.º 7
0
def train_worker(config, thr_incr):
    """
    Args:
        config (dict): dictionary containing configuration details.
        thr_incr (float): A threshold analysis is performed at the end of the training
            using the trained model and the validation sub-dataset to find the optimal binarization
            threshold. The specified value indicates the increment between 0 and 1 used during the
            ROC analysis (e.g. 0.1). Flag: ``-t``, ``--thr-increment``
    """
    current = mp.current_process()
    # ID of process used to assign a GPU
    ID = int(current.name[-1]) - 1

    # Use GPU i from the array specified in the config file
    config["gpu_ids"] = [config["gpu_ids"][ID]]

    # Call ivado cmd_train
    try:
        # Save best validation score
        config["command"] = "train"
        best_training_dice, best_training_loss, best_validation_dice, best_validation_loss = \
            ivado.run_command(config, thr_increment=thr_incr)

    except Exception:
        logging.exception('Got exception on main handler')
        logging.info("Unexpected error:", sys.exc_info()[0])
        raise

    # Save config file in output path
    config_copy = open(config["path_output"] + "/config_file.json", "w")
    json.dump(config, config_copy, indent=4)

    return config["path_output"], best_training_dice, best_training_loss, best_validation_dice, \
        best_validation_loss
Ejemplo n.º 8
0
def kernel(act_net, train_queue, device, num_threads, env_seed):

    ae = Autoencoder84(
        1,
        pretrained="./Design/Models/AE/Autoencoder84.dat").to(device).eval()
    env = sc.StageCreator(RES, ae=ae, boundary=.8, mode="goal",
                          seed=env_seed)  # individual environment
    env.mode = "selfplay"
    buffer = common.ExperienceBuffer(buffer_size=REPLAY_SIZE // num_threads,
                                     device=device)  # indiovidual exp replay
    agent = common.Agent_SAC_HER(act_net,
                                 env,
                                 buffer,
                                 GAMMA,
                                 device=device,
                                 unroll_steps=UNROLL)
    thread_id = mp.current_process().name
    exp_count = 0

    while True:
        exp_count = agent.play_episode(
            random=False)  # play same episode over and over again
        mean_episode_reward, mean_step_count = agent.get_mean_reward_and_steps(
        )

        if len(buffer) < REPLAY_INITIAL // num_threads:
            continue

        batch = buffer.sample(BATCH_SIZE)
        train_queue.put((batch, exp_count, mean_episode_reward,
                         mean_step_count, thread_id))
    def __init__(self,
                 env,
                 actor: torch.nn.Module,
                 replay_buffer: SharedReplayBuffer,
                 scheduler: Scheduler,
                 argp,
                 logger: SummaryWriter = None):

        self.env = env
        self.actor = actor
        self.replay_buffer = replay_buffer
        self.scheduler = scheduler
        self.num_trajectories = argp.num_trajectories
        self.trajectory_length = argp.episode_length
        self.schedule_switch = argp.schedule_switch
        self.discount_factor = argp.discount_factor

        self.discounts = torch.cumprod(
            torch.ones([self.trajectory_length - 1]) * 0.99, dim=-1)

        self.log_every = 10

        self.logger = logger
        if argp.num_workers > 1:
            self.process_id = current_process()._identity[0]  # process ID
        else:
            self.process_id = 1
Ejemplo n.º 10
0
def worker_func(input_queue, output_queue, device="cpu"):
    env = make_env()  #gym.make("RoboschoolHalfCheetah-v1")
    cache = {}

    while True:
        parents = input_queue.get()
        print(mp.current_process(), parents)
        if parents is None:
            break
        new_cache = {}
        for net_seeds in parents:
            if len(net_seeds) > 1:
                net = cache.get(net_seeds[:-1])
                if net is not None:
                    net = mutate_net(net, net_seeds[-1])
                else:
                    net = build_net(env, net_seeds).to(device)
            else:
                net = build_net(env, net_seeds).to(device)
            # store{(seed,):net}
            new_cache[net_seeds] = net
            reward, steps = evaluate(env, net, device)
            output_queue.put(
                OutputItem(seeds=net_seeds, reward=reward, steps=steps))
        cache.update(new_cache)
Ejemplo n.º 11
0
def train_worker(config, thr_incr):
    current = mp.current_process()
    # ID of process used to assign a GPU
    ID = int(current.name[-1]) - 1

    # Use GPU i from the array specified in the config file
    config["gpu"] = config["gpu"][ID]

    # Call ivado cmd_train
    try:
        # Save best validation score
        best_training_dice, best_training_loss, best_validation_dice, best_validation_loss = \
            ivado.run_command(config, thr_increment=thr_incr)

    except:
        logging.exception('Got exception on main handler')
        print("Unexpected error:", sys.exc_info()[0])
        raise

    # Save config file in log directory
    config_copy = open(config["log_directory"] + "/config_file.json", "w")
    json.dump(config, config_copy, indent=4)

    return config[
        "log_directory"], best_training_dice, best_training_loss, best_validation_dice, best_validation_loss
Ejemplo n.º 12
0
def fun(datum):
    global global_data
    if global_data is None:
        global_data = str(multiprocessing.current_process())

    with multiprocessing.Pool(processes=2) as p:
        result = list(p.imap_unordered(funfun, [datum + k for k in range(9)]))
    return {'global_data': global_data, 'results': result}
Ejemplo n.º 13
0
def bert_worker(i, artist, track, lyrics, get_embedding_fn, agg_type,
                tokenizer, models):
    wid = int(current_process().name[-1]) - 1
    print(f'[processor {wid}] i={i:<6} {artist:<20} {track:>30}')
    embedding = get_embedding_fn(tokenizer, agg_type, models[wid], lyrics)
    with open(f'wid_{wid}_done.txt', 'a') as f:
        f.write(f'"{artist}","{track}","{embedding}"\n')
    return (artist, track, embedding)
    def run(self):
        nStepBuffer = []
        bufferState, bufferAction, bufferReward, bufferNextState = [], [], [], []
        for self.epIdx in range(self.trainStep):

            print("episode index:" + str(self.epIdx) + " from" +
                  current_process().name + "\n")
            state = self.env.reset()
            done = False
            rewardSum = 0
            stepCount = 0

            # clear the nstep buffer
            nStepBuffer.clear()

            while not done:

                episode = self.epsilon_by_episode(
                    self.globalEpisodeCount.value)
                action = self.select_action(self.localNet, state, episode)
                nextState, reward, done, info = self.env.step(action)

                nStepBuffer.append((state, action, nextState, reward))

                if len(nStepBuffer) > self.nStepForward:
                    R = sum([
                        nStepBuffer[i][3] * (self.gamma**i)
                        for i in range(self.nStepForward)
                    ])
                    state, action, _, _ = nStepBuffer.pop(0)
                    bufferAction.append(action)
                    bufferState.append(state)
                    bufferReward.append(R)
                    bufferNextState.append(nextState)

                state = nextState
                rewardSum += reward

                if self.totalStep % self.updateGlobalFrequency == 0 and len(
                        bufferAction
                ) > 0:  # update global and assign to local net
                    # sync
                    self.update_net_and_sync(bufferAction, bufferState,
                                             bufferReward, bufferNextState)
                    bufferAction.clear()
                    bufferState.clear()
                    bufferReward.clear()
                    bufferNextState.clear()
                if done:
                    #                    print("done in step count: {}".format(stepCount))
                    #                    print("reward sum = " + str(rewardSum))
                    # done and print information
                    #    pass
                    self.recordInfo(rewardSum, stepCount)

                stepCount += 1
                self.totalStep += 1
        self.resultQueue.put(None)
Ejemplo n.º 15
0
def train_parallel(args):
    if torch.cuda.is_available() and not args.cpu:
        pid = mp.current_process().pid
        torch.cuda.set_device(args.pid_to_cuda[pid])
        args.devices = [args.pid_to_cuda[pid]]
    args.checkpoint_path = args.checkpoint_path + f".{args.devices[0]}"

    result = train(args)
    return result
Ejemplo n.º 16
0
def _initializer(*args):
    """
    Process initializer function that is called when mp.Pool is started.
    :param args:    arguments that are to be copied to the target process. This can be a tuple for convenience.
    """
    global process_initial_data
    process_id, process_initial_data = args

    assert 'OMP_NUM_THREADS' in os.environ
    torch.set_num_threads(int(os.environ['OMP_NUM_THREADS']))

    # manually set process name
    with process_id.get_lock():
        mp.current_process().name = 'PoolWorker-' + str(process_id.value)
        logger.info('pid=' + str(os.getpid()) + ' : ' +
                    mp.current_process().name)

        process_id.value += 1
Ejemplo n.º 17
0
    def run(self):
        torch.set_num_threads(1)
        bufferState, bufferAction, bufferReward, bufferNextState = [], [], [], []
        for self.epIdx in range(self.trainStep):

            print("episode index:" + str(self.epIdx) + " from" + current_process().name + "\n")
            state = self.env.reset()
            done = False
            rewardSum = 0


            # clear the nstep buffer
            self.nStepBuffer.clear()

            for stepCount in range(self.episodeLength):

                epsilon = self.epsilon_by_episode(self.globalEpisodeCount.value)
                action = self.select_action(self.localNet, state, epsilon)
                nextState, reward, done, info = self.env.step(action)

                if stepCount == 0:
                    print("at step 0: from " + current_process().name + "\n")
                    print(info)

                if done:
                    nextState = None

                self.update_net_and_sync(state, action, nextState, reward)

                state = nextState
                rewardSum += reward * pow(self.gamma, stepCount)

                self.totalStep += 1
                if done:
#                    print("done in step count: {}".format(stepCount))
#                    print("reward sum = " + str(rewardSum))
                # done and print information
                #    pass
                    break

            self.recordInfo(rewardSum, stepCount)


        self.resultQueue.put(None)
Ejemplo n.º 18
0
def multi_inference(embedding_type_name, tasks_to_accomplish, tasks_finished,
                    gpu_idx, src_sentences, src_sentence_embeddings) -> bool:
    '''
    param 
        - embedding_type_name : str
            - setnece transformer 에서 제공하는 embedding 종류 
        - tasks_to_accomplish : Queue
            - sentence(str) 이 담긴 queue
        - tasks_finished : Queue
            - inference 결과 로그를 담는 queue
        - src_sentences : List[str]
            - source pool sentence List
        - src_sentence_embeddgins
            - source pool settence vector 들의 List
    desc
        - 해당하는 emedding_type_name로 sentence encoder를 로딩
        - tasks_to_accomplish 에서 job(str)을 가져옴
        - src_sentence_embeddings 를 보고 가장 유사한 embeddig을 찾고 매핑된 src_sentence를 구한다

    '''
    GPU_NUM = gpu_idx  # 원하는 GPU 번호 입력
    device = torch.device(
        f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
    torch.cuda.set_device(device)  # change allocation of current GPU
    model = SentenceTransformer(embedding_type_name)

    levin = CustomLevin()
    while True:
        try:
            if tasks_to_accomplish.empty():
                raise Empty
            task = tasks_to_accomplish.get_nowait()

        except Empty:
            break
        else:
            ret = []
            # ret : [(sentence_embeddings, concat_synom) ... ]

            for i in tqdm(task):
                # i -> 한 프로세서에 배분된 target sentence의 리스트
                target = i
                sbert_inference = predict(model, levin, target, src_sentences,
                                          src_sentence_embeddings)
                ret.append([i, sbert_inference])
            for data in ret:
                # target = data[0]
                # inference = data[1]
                # sbert_only_inference = data[2]
                tasks_finished.put(data)

            print(' {} : END pushing {} data to finished_queue'.format(
                current_process().name, len(ret)))
            time.sleep(0.5)

    return True
Ejemplo n.º 19
0
def main(args):
    if torch.cuda.is_available() and not args.cpu:
        pid = mp.current_process().pid
        torch.cuda.set_device(args.pid_to_cuda[pid])

    set_random_seed(args.seed)

    task = build_task(args)
    result = task.train()
    return result
Ejemplo n.º 20
0
def worker_func(input_queue, output_queue, device_w="cpu"):
    new_env = make_env()
    parent_list = []
    for i in range(PARENTS_COUNT):
        parent_list.append(i)
    #parent_list = [0, 1]
    logger.debug("in work_func,current_process: {0},parent_list:{1}".format(
        mp.current_process(), parent_list))

    while True:
        get_item = input_queue.get()
        parents_w = get_item[0]
        pro_list = get_item[1]

        batch_steps_w = 0
        child = []
        logger.debug(
            "in worker_func, current_process: {0},parents[0][0]:{1},len of parents:{2},pro_list:{3}"
            .format(mp.current_process(), parents_w[0]['fc.2.bias'],
                    len(parents_w), pro_list))
        for _ in range(SEEDS_PER_WORKER):
            #solve pro do not sum to 1
            pro_list = np.array(pro_list)
            pro_list = pro_list / sum(pro_list)
            parent = np.random.choice(parent_list, p=pro_list)
            #parent = rand_pick(parent_list, pro_list)
            #parent = np.random.randint(PARENTS_COUNT)
            child_seed = np.random.randint(MAX_SEED)
            child_net = mutate_net(new_env, parents_w[parent],
                                   child_seed).to(device_w)
            reward, steps = evaluate(new_env, child_net, device_w)
            batch_steps_w += steps
            child.append((child_net.state_dict(), reward, steps))
        child.sort(key=lambda p: p[1], reverse=True)
        #logger.debug("middle, current_process: {0},child[0][1]:{1},child[0][2]:{2},len of "
        #             "child:{3}".format(mp.current_process(), child[0][1], child[0][2], len(child)))
        for i in range(PARENTS_COUNT):
            #output_queue.put(child[i])
            output_queue.put(
                OutputItem(child_net=child[i][0],
                           reward=child[i][1],
                           steps=batch_steps_w))
Ejemplo n.º 21
0
def get_batches(group, bsize=1):
	keys = group.keys()
	for i in range(0, len(keys), bsize):
		batch = torch.Tensor(np.zeros([bsize,3,224,224]))
		for j in range(bsize):
			if i+j < len(keys):
				key = keys[i+j]
				batch[j] = group[key]
			else:
				print(mp.current_process().name, i, j, i+j)
		yield batch
Ejemplo n.º 22
0
def kernel(alice, bob, train_queue, device, seed=None):
    """ Execute by each single thread """
    env = sc.StageCreator(RES, mode="selfplay", boundary=0, seed=seed)
    agent = SelfplayAgent(alice, bob, env, device=device, gamma=GAMMA)

    while True:
        for _ in range(REPORT_INTERV):
            train_queue.put(agent.selfplay_episode(random=True))
        # Calculate rewards and send them to master for logging
        a_data, b_data = agent.selfplay_episode(random=True)
        mean_r_a, mean_r_b = agent.get_selfplay_rewards()
        train_queue.put((a_data, b_data, mean_r_a, mean_r_b, mp.current_process().name))
Ejemplo n.º 23
0
def runEnv(env):
    env.reset()
    print("Hello, World! from " + current_process().name + "\n")
    print('agentID')
    print(id(env.agent))
    print('MapID')
    print(id(env.mapMat))
    print('envID')
    print(id(env))
    for i in range(4):
        action = random.randint(0, env.nbActions - 1)
        state, reward, done, info = env.step(action)
        print("Hello, World! from " + current_process().name + "\n")
        print('step ' + str(i))
        #print(state)
        #print(reward)
        #print(done)
        print(info)

        if done:
            print("great!!!!!!!!!!!!!!!!!!!")
            break
Ejemplo n.º 24
0
def kernel(alice, bob, train_queue, device):
    """ Execute by each single thread """
    env = sc.StageCreator(target=False)
    env = sc.ScreenOutput(RES, env)
    agent = SelfplayAgent(alice, bob, env, device=device, gamma=GAMMA)

    while True:
        for _ in range(10):
            train_queue.put(agent.selfplay_episode())
        a_data, b_data = agent.selfplay_episode()
        mean_r_a, mean_r_b = agent.get_selfplay_rewards()
        train_queue.put(
            (a_data, b_data, mean_r_a, mean_r_b, mp.current_process().name))
Ejemplo n.º 25
0
def _init1(args):
    trainer, keep_on_gpu, cuda_pool = args
    if cuda_pool:
        procid = (current_process()._identity or [1])[0] - 1
        cuda_id = cuda_pool[procid]
    else:
        cuda_id = None
    if (cuda_id is not None
            and torch.device(trainer.device).type.startswith('cuda')):
        trainer.device = torch.device(f'cuda:{cuda_id}')
    trainer.init()
    if not keep_on_gpu:
        trainer.model = trainer.to(device='cpu')
    return trainer
Ejemplo n.º 26
0
def worker_func(obj, f):
    print(mp.current_process())
    print('process id:', os.getpid())
    time.sleep(2)
    y = np.random.rand(int(100e6))
    obj.donothing()
    obj.calc(f)
    obj.donothing()
    print(id(obj), obj.x)
    time.sleep(2)
    obj.modify(f)
    print(id(obj), obj.x)
    obj.donothing()
    time.sleep(2)
Ejemplo n.º 27
0
def get_logger() -> logging.Logger:
    """Get a `logging.Logger` to stderr. It can be called whenever we wish to
    log some message. Messages can get mixed-up
    (https://docs.python.org/3.6/library/multiprocessing.html#logging), but it
    works well in most cases.

    # Returns

    logger: the `logging.Logger` object
    """
    if _new_logger():
        if mp.current_process().name == "MainProcess":
            _new_logger(logging.DEBUG)
        _set_log_formatter()
    return _LOGGER
Ejemplo n.º 28
0
    def test_multiProcess(self):
        print("Hello, World! from " + current_process().name + "\n")
        print(self.globalPolicyNet.state_dict())
        for gp in self.globalPolicyNet.parameters():
            gp.grad = torch.ones_like(gp)
            #gp.grad.fill_(1)

        self.globalOptimizer.step()

        print('globalNetID:')
        print(id(self.globalPolicyNet))
        print('globalOptimizer:')
        print(id(self.globalOptimizer))
        print('localNetID:')
        print(id(self.localNet))
Ejemplo n.º 29
0
    def run(self) -> None:
        self._proc_id = current_process()

        ex = self._build_sacred_experiment()

        @ex.automain
        def main(_run, _config, _log):
            # Load config and logger
            config = self._set_seed(_config)

            # run the framework
            self.run_sacred_framework(_run, config, _log)

        ex.run()

        os._exit(os.EX_OK)
Ejemplo n.º 30
0
    def __init__(self, param_server, shared_replay_buffer, parser_args):

        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"

        self.param_server = param_server

        if parser_args.num_worker > 1:
            self.pid = current_process()._identity[0]
        else:
            self.pid = 1

        self.logging = parser_args.logging
        self.logger = None
        if self.pid == 1 and self.logging:  # only create one logger
            self.logger = SummaryWriter()

        self.shared_replay_buffer = shared_replay_buffer
        self.env = gym.make("Swimmer-v2")
        # self.env = gym.make("Pendulum-v0")
        # self.env = gym.make("HalfCheetah-v2")
        self.num_actions = self.env.action_space.shape[0]
        self.num_obs = self.env.observation_space.shape[0]
        self.actor = Actor(num_actions=self.num_actions,
                           num_obs=self.num_obs,
                           log_std_init=np.log(parser_args.init_std)).to(
                               self.device)
        self.critic = Critic(num_actions=self.num_actions,
                             num_obs=self.num_obs).to(self.device)

        self.target_actor = copy.deepcopy(self.actor).to(self.device)
        self.target_actor.freeze_net()
        self.target_critic = copy.deepcopy(self.critic).to(self.device)
        self.target_critic.freeze_net()

        self.actor_loss = ActorLoss(alpha=parser_args.entropy_reg)
        self.critic_loss = Retrace(num_actions=self.num_actions)

        self.num_trajectories = parser_args.num_trajectories
        self.update_targnets_every = parser_args.update_targnets_every
        self.learning_steps = parser_args.learning_steps
        self.num_runs = parser_args.num_runs
        self.render = parser_args.render
        self.log_every = parser_args.log_interval

        self.num_grads = parser_args.num_grads
        self.grad_ctr = 0
        self.cond = Condition()