Esempio n. 1
0
def worker2():
    pub_try = Pub('lets')
    sub_try = Sub('receive')
    pub_try.put("I can send...")
    try:
        s = sub_try.get()
        print("Let see what i get,", s)
    except TimeoutError:
        print("I CANT")

    return
Esempio n. 2
0
File: single.py Progetto: wx-b/rlbc
    def __init__(self, args, obs_running_stats=None):
        self.parse_args(args)
        torch.set_num_threads(1)

        # create variables
        self.env = self.create_env(args['seed'])
        self.pub_out = Pub('observations')
        self.sub_in = Sub('env{}_input'.format(int(self.env_idx)))
        self.step_counter = 0
        self.step_counter_after_new_action = 0
        self.reset_env(reset_mime=False)

        # start the environment loop
        self.env_loop()
Esempio n. 3
0
def run_env(env_name, seed):
    try:
        import gym
        import mime
        env = gym.make(env_name)
        env.seed(seed)
        pub_obs = Pub('observations')
        sub_reset = Sub('env{}_reset'.format(int(seed)))
        print('seed {} ready'.format(seed))
        for counter in sub_reset:
            frames, scalars = get_frames_scalars(env, counter)
            pub_obs.put((frames, scalars, seed))
    except Exception as e:
        print('Exeception: {}'.format(e))
Esempio n. 4
0
File: batch.py Progetto: wx-b/rlbc
 def _init_dask(self):
     if self._cluster is not None:
         print('WARNING: reinitiailizing dask')
         self._cluster.close()
     if self._client is not None:
         self._client.close()
         del self._dask_futures
     if self.pub_out is not None:
         del self.pub_out
     if self.sub_in is not None:
         del self.sub_in
     self._cluster = LocalCluster(
         n_workers=self.num_processes,
         # silence_logs=0,
         memory_limit=None)
     self._client = Client(self._cluster)
     # always define publishers first then subscribers
     pub_out = [
         Pub('env{}_input'.format(env_idx))
         for env_idx in range(self.num_processes)
     ]
     self._dask_futures = self._client_map()
     sub_in = Sub('observations')
     self.pub_out = pub_out
     self.sub_in = sub_in
     # wait until all the peers are created
     time.sleep(5)
Esempio n. 5
0
def init_workers(env_name, num_processes):
    cluster = LocalCluster(n_workers=num_processes)
    client = Client(cluster)
    pubs_reset = [
        Pub('env{}_reset'.format(seed)) for seed in range(num_processes)
    ]
    client.map(run_env, [env_name] * num_processes, range(num_processes))
    sub_obs = Sub('observations')
    # sleep while sub/pub is initialized
    time.sleep(5)
    return client, pubs_reset, sub_obs
Esempio n. 6
0
File: single.py Progetto: wx-b/rlbc
class SingleEnv:
    def __init__(self, args, obs_running_stats=None):
        self.parse_args(args)
        torch.set_num_threads(1)

        # create variables
        self.env = self.create_env(args['seed'])
        self.pub_out = Pub('observations')
        self.sub_in = Sub('env{}_input'.format(int(self.env_idx)))
        self.step_counter = 0
        self.step_counter_after_new_action = 0
        self.reset_env(reset_mime=False)

        # start the environment loop
        self.env_loop()

    def parse_args(self, args):
        # parse the args
        self.env_idx = args['env_idx']
        self.env_name = args['env_name']
        self.max_length = args['max_length']
        self.render = args['render'] and self.env_idx == 0
        self.action_keys = Actions.action_space_to_keys(
            args['bc_args']['action_space'])[0]
        if args['input_type'] == 'depth':
            self.channels = ('depth', )
        elif args['input_type'] == 'rgbd':
            self.channels = ('depth', 'rgb')
        else:
            raise NotImplementedError('Unknown input type = {}'.format(
                args['input_type']))
        self.augmentation = None
        self.augmentation_str = args['augmentation']
        self.use_expert_scripts = args['use_expert_scripts']
        if not self.use_expert_scripts:
            # timescales for skills (rlbc setup only)
            if isinstance(args['timescale'], list):
                self.skills_timescales = args['timescale']
            else:
                assert isinstance(args['timescale'], int)
                self.skills_timescales = []
                for _ in range(args['num_skills']):
                    self.skills_timescales.append(args['timescale'])
        else:
            self.skills_timescales = None

        # gifs writing
        self.gifdir = None
        if 'gifdir' in args:
            self.gifdir = os.path.join(args['gifdir'],
                                       '{:02d}'.format(self.env_idx))
            self.gif_counter = 0
            if self.gifdir:
                self.obs_history = {}

    def env_loop(self):
        for input_ in self.sub_in:
            self.step_counter += 1
            self.step_counter_after_new_action += 1

            if input_['function'] == 'reset':
                obs = self.reset_env()
                self.publish_obs(obs_dict={'observation': obs})
            elif input_['function'] == 'reset_after_crash':
                obs = self.reset_env()
                self.publish_obs(
                    obs_dict={
                        'observation':
                        obs,
                        'reward':
                        0,
                        'done':
                        True,
                        'info':
                        self.update_info({
                            'success': False,
                            'failure_message': 'Env crashed'
                        })
                    })
            elif input_['function'] == 'step':
                action_applied = self.get_action_applied(input_['action'])
                obs, reward, done, info = self.env.step(action_applied)
                info = self.update_info(info)
                if done:
                    obs = self.reset_env(error_message=info['failure_message'],
                                         success=info['success'])
                self.publish_obs(
                    obs_dict={
                        'observation': obs,
                        'reward': reward,
                        'done': done,
                        'info': info
                    })
            else:
                raise NotImplementedError(
                    'function {} is not implemented'.format(
                        input_['function']))

    def create_env(self, seed):
        env = gym.make(self.env_name)
        env.seed(self.env_idx + seed)
        if self.max_length is not None:
            env._max_episode_steps = self.max_length
        if self.render:
            env.unwrapped.scene.renders(True)
        return env

    def reset_env(self, reset_mime=True, error_message='', success=False):
        step_counter_cached = self.step_counter
        step_counter_after_new_action_cached = self.step_counter_after_new_action
        self.step_counter = 0
        self.step_counter_after_new_action = 0
        self.prev_script = None
        self.need_master_action = True
        if self.gifdir:
            for obs_key, obs_list in self.obs_history.items():
                if obs_key != 'skills':
                    gif_name = os.path.join(
                        self.gifdir,
                        '{}_{}.mp4'.format(self.gif_counter, obs_key))
                    write_video(obs_list, gif_name)
                else:
                    obs_list[-1] = (obs_list[-1],
                                    step_counter_after_new_action_cached)
                    obs_list.append('Success = {}'.format(success))
                    json_name = os.path.join(
                        self.gifdir, '{}_skills.json'.format(self.gif_counter))
                    with open(json_name, 'w') as json_file:
                        json.dump(obs_list, json_file)
            if len(self.obs_history) > 0:
                self.gif_counter += 1
                self.obs_history = {}
        if reset_mime:
            obs = self.env.reset()
            if self.render:
                print('env {:02d} is reset after {} timesteps: {}'.format(
                    self.env_idx, step_counter_cached - 1, error_message))
            return obs
        # define new augmentation path at each reset
        self.augmentation = Augmentation(self.augmentation_str)
        self.augmentation.sample_sequence(img_size=(240, 240))

    def update_info(self, info):
        info['length'] = self.step_counter
        info['need_master_action'] = self.need_master_action
        info['length_after_new_action'] = self.step_counter_after_new_action
        if self.need_master_action:
            self.step_counter_after_new_action = 0
        return info

    def publish_obs(self, obs_dict):
        obs_tensor = self.convert_obs(obs_dict['observation'])
        obs_dict['observation'] = obs_tensor
        self.pub_out.put((obs_dict, self.env_idx))

    def convert_obs(self, obs_dict):
        if 'Cam' not in self.env_name:
            observation = np.array([])
            obs_sorted = OrderedDict(
                sorted(obs_dict.items(), key=lambda t: t[0]))
            for obs_key, obs_value in obs_sorted.items():
                if obs_key != 'skill':
                    if isinstance(obs_value, (int, float)):
                        obs_value = [obs_value]
                    elif isinstance(obs_value, np.ndarray):
                        obs_value = obs_value.flatten()
                    elif isinstance(obs_value, list) and isinstance(
                            obs_value[0], np.ndarray):
                        obs_value = np.concatenate(obs_value)
                    observation = np.concatenate((observation, obs_value))
            obs_tensor = torch.tensor(observation).float()
        else:
            im_keys = ['depth', 'rgb', 'mask']
            obs_im = {}
            for key, value in obs_dict.items():
                for im_key in im_keys:
                    if im_key in key:
                        obs_im[im_key] = obs_dict[key]
            obs_tensor = Frames.dict_to_tensor(
                frames=[obs_im],
                channels=self.channels,
                num_channels=Frames.sum_channels(self.channels),
                augmentation_str='',
                augmentation=self.augmentation)
            if self.gifdir:
                if 'orig' in self.obs_history:
                    self.obs_history['orig'].append(obs_im['depth'])
                else:
                    self.obs_history['orig'] = [obs_im['depth']]
                obs_tensor_denormalized = (obs_tensor[0].numpy() + 1) / 2 * 255
                if 'aug' in self.obs_history:
                    self.obs_history['aug'].append(obs_tensor_denormalized)
                else:
                    self.obs_history['aug'] = [obs_tensor_denormalized]
        return obs_tensor

    def get_action_applied(self, action):
        skill = action.pop('skill')[0]
        if self.step_counter_after_new_action == 1:
            if self.gifdir:
                if 'skills' in self.obs_history:
                    self.obs_history['skills'].append(int(skill))
                else:
                    self.obs_history['skills'] = [int(skill)]
            if self.render:
                print(
                    'env {:02d} got a new master action = {} (ts = {})'.format(
                        self.env_idx, skill, self.step_counter))
        if self.use_expert_scripts:
            action = self.get_script_action(skill)
        else:
            if self.step_counter_after_new_action >= self.skills_timescales[
                    skill]:
                if self.render:
                    print('env {:02d} needs a new master action (ts = {})'.
                          format(self.env_idx, self.step_counter))
                self.need_master_action = True
            else:
                self.need_master_action = False
        if self.gifdir and self.need_master_action:
            self.obs_history['skills'][-1] = (
                self.obs_history['skills'][-1],
                self.step_counter_after_new_action)
        return action

    def get_script_action(self, skill):
        if self.prev_script != skill.item():
            self.prev_script = skill.item()
            self.prev_action_chain = self.env.unwrapped.scene.script_subtask(
                skill)
        action_chain = itertools.chain(*self.prev_action_chain)
        action_applied = Actions.get_dict_null_action(self.action_keys)
        action_update = next(action_chain, None)
        if action_update is None:
            if self.render:
                print('env {:02d} needs a new master action (ts = {})'.format(
                    self.env_idx, self.step_counter))
            self.need_master_action = True
        else:
            self.need_master_action = False
            action_applied.update(
                Actions.filter_action(action_update, self.action_keys))
        if self.skills_timescales is not None:
            skill_timescale = self.skills_timescales[skill]
            self.need_master_action = self.step_counter_after_new_action >= skill_timescale
        return action_applied
Esempio n. 7
0
def coordinator(clf,e,n_minibatch,total_workers):
    pub_results = Pub('results')
    pub_init = Pub('Initialize')
    pub_th = Pub('Theta')
    pub_endr = Pub('EndRound')
    pub_endsub = Pub('EndSubRound')
    #pub_ask_state = Pub('AskState')
    sub_incr = Sub('Increment')
    sub_f = Sub('Fs')
    sub_x = Sub('Xs')
    # sub_try=Sub('lets')
    # pub_try=Pub('receive')
    
    # get increments from workers
    def get_incr():    
            try:
                incr=sub_incr.get(timeout=5)
                print("Coo Received increments...",incr)
                if incr<0: # works as a flag to let coordinator know that chunks are out
                    print("Coo received notice of chunks ended...")
                return incr
            except TimeoutError:
                return 0
    # get fi's from all workers
    def get_fi(n_workers):  
        fis=[]
        print("try to get fis workers:",n_workers)
        for i in range(n_workers):
            try:
                fi=sub_f.get(timeout=5) 
                print("Coo received",i+1,"fi") 
                fis.append(fi)
            except TimeoutError:
                print('Fis Lost worker/workers num=',len(fis))
                break
        return fis
        

    # get xi's from all workers
    def get_xi(n_workers):  
        drifts=[]
        print("try to get xi workers:",n_workers)
        for i in range(n_workers):
            try:
                xi=sub_x.get(timeout=6)
                print("Coo received",i+1,"xi") 
                drifts.append(xi)
            except TimeoutError:
                print('Lost worker/workers')
                break
        print("Num of workers",len(drifts))
        return drifts

    def check_subcribers(pub,n_workers):
        print("Check...")
        if n_workers==0:
            print("No workers left")
            return "end"
        
        while len(pub.subscribers)<n_workers: #if not all workers subscribe sleep
                time.sleep(0.01)
        
        print("OK Check")
        return "ok"

    #____________________________Start coordinator_________________________________
    E=None
    
    th=0
    fis=0
    drifts=0
    sum_xi=0
    incr=0
    e_y=0.01
    workers=[]
    time_stamb=0
    n_rounds=0
    
    print("Coo started ...")
    client= get_client()
    for i in range(len(total_workers)-1):
        workers.append(client.submit(worker_f,i,clf,n_minibatch,e,workers=total_workers[i+1]))
    
    time.sleep(1)
    flag=True #use this flag to finish future if chunks are out
    start_time=time.time()
    while flag==True:
        n_subs=0
        workers_status=[w.status for w in workers]
        k=workers_status.count('pending')
        print("NUMBER OF WORKERS...",k)
        if E is None: #if E=0 we need to update E
            pub_init.put(None)
            print("Warmup...Coo Sended E=0...") 
            drifts=get_xi(k) #get local drifts (Xi's)
            print("Coo received xi's...workers=",k)
            
            sum_xi=add_x(drifts)
            e1=sum_xi[0]/len(drifts)
            e2=sum_xi[1]/len(drifts)
            E=[e1,e2]
            pub_init.put(E)
            print("Coo Sended E")
        else:
            pub_init.put(E)
            print("Coo Sended E")
        n_rounds+=1

        y=k*f([[0],0],E,e)
        barrier=e_y*k*f([[0],0],E,e)
        

        #start of the round...
        print("START ROUND:",n_rounds," workers ",k)
        while y<=barrier: 
            th=-y/(2*k)

            pub_th.put(th) #send theta
            print("Coo Sended theta")
            n_subs+=1
            print("START SUBROUND:",n_subs," workers ",k)
            c=0
            fis=[]
            
            #start of the subround...
            while c<k: 
                
                incr=get_incr() #Get increments
                if incr<0: # works as a flag to let coordinator know that chunks are out
                    incr=0
                workers_status=[w.status for w in workers]
                k=workers_status.count('pending')
                if k==0:
                    flag=False
                c=c+incr
                #subrounds ended...
            
            pub_endsub.put(0) #let workers know that subrounds ended
            print("Coo Sended endofSub... num_workers",k)
            workers_status=[w.status for w in workers]
            k=workers_status.count('pending') 
            fis=get_fi(k) #get F(Xi)'s from workers
            
            if len(fis)==0:
                pub_endr.put(0)
                break
            print("Coo Received fi's workers=",k)
            y=add_f(fis)
            print("y",y)
            if flag==False: #if false chunks are out end future
                print("Coo Sended endofSub..")
                break
            
        #rounds ended...
        
        pub_endr.put(0) #let workers know that rounds ended 
        
        print("Coo Sended endofround... num_workers",k)
        drifts=get_xi(len(fis)) #get local drifts (Xi's)
        print("len of drifts",len(drifts))
        print("Coo Received xi's workers=",k)
        if len(drifts)==0: break

        sum_xi=add_x(drifts)
        e1=E[0]+(sum_xi[0]/len(drifts)) #len(drifts)
        e2=E[1]+(sum_xi[1]/len(drifts)) #len(drifts)
        E=[e1,e2]
        time_stamb=time.time()-start_time
        pub_results.put([E,n_subs,k,time_stamb])
        if flag==False:
            break
    print("Coo ended...")
    return E,n_rounds,n_subs,k,time_stamb
Esempio n. 8
0
def worker_f(name, clf, parts, e):
    sub_init = Sub('Initialize')
    sub_th = Sub('Theta')
    sub_endr = Sub('EndRound')
    sub_endsub = Sub('EndSubRound')
    pub_incr = Pub('Increment')
    pub_f = Pub('Fs')
    pub_x = Pub('Xs')

    # get initial E value from coordinator
    def get_init():
        w_id = get_worker().name
        try:
            print(w_id, "waits to receive E...")
            init = sub_init.get(timeout=20)
            print(w_id, "Received E")
            return init
        except TimeoutError:
            print(w_id, 'Error E not received')
            return False

    #get theta from cordinator
    def get_th():
        w_id = get_worker().name
        try:
            print(w_id, "waits to receive th...")
            th = sub_th.get(timeout=1)
            print(w_id, "Received theta")
            return th
        except TimeoutError:
            print(w_id, 'Theta aknowlegment not received')
            return None

    #get aknowlegment for continue or stop the rounds
    def get_endr():
        try:
            endr = sub_endr.get(timeout=1)
            print(w_id, 'End of round received')
            return endr
        except TimeoutError:
            return None

    #get aknowlegment for continue or stop the subrounds
    def get_endsub():
        try:
            endsub = sub_endsub.get(timeout=1)
            print(w_id, 'End of subround received')
            return endsub
        except TimeoutError:
            return None

    #                       ____Start of worker____

    th = 0
    w_id = get_worker().name  #get worker id
    print("worker", w_id, "started...")
    flag = True
    E = [[0], 0]
    Si = [0, 0]
    S_prev = [0, 0]
    Xi = [[0], 0]

    count_chunks = 0
    minibatches = 0

    #TAG chunks assigned and load first one
    X_chunk_array, y_chunk_array = load_chunks(
        name)  #get the array with the chunk names assigned to this worker

    X_chunk, y_chunk = load_np(X_chunk_array, y_chunk_array, count_chunks)
    count_chunks += 1

    while flag == True:  #while this flag stays true there are chunks
        E = get_init()  # get E from coordinator
        if E is False:
            pub_incr.put(-1)
            return clf

        if E is None:  #if E=0 compute Xi and return Xi to update E
            #TODO make it prettier
            print(w_id, "Warmup....")
            temp = get_minibatch(X_chunk, y_chunk, minibatches,
                                 parts)  #get_newSi(count_chunks,f_name)

            if temp is None:
                minibatches = 0
                load = load_np(X_chunk_array, y_chunk_array, count_chunks)
                if load is None:
                    print(w_id, "End of chunks")
                    flag = False
                    pub_incr.put(-1)
                    break
                X_chunk, y_chunk = load
                count_chunks += 1
                temp = get_minibatch(X_chunk, y_chunk, minibatches, parts)

            minibatches += 1
            X, y = temp

            clf.partial_fit(X, y, np.unique(([0, 1])))
            Si = [clf.coef_[0], clf.intercept_[0]]
            Xi = [clf.coef_[0], clf.intercept_[0]]
            while len(pub_x.subscribers) != 1:
                time.sleep(0.01)
            pub_x.put(Xi)
            print(w_id, "Sended Xi")
            E = get_init()  # get E from coordinator
            if E is False:
                pub_incr.put(-1)
                break
        print(w_id, "Start of round")
        clf.coef_[0] = E[0]
        clf.intercept_[0] = E[1]
        S_prev[0] = np.array(list(E[0]))
        S_prev[1] = E[1]
        Xi = [[0], 0]
        #begin of round...
        #FIXME do not send message every time & check rounds and subrounds
        while get_endr() == None:

            ci = 0
            # Xi=[[0],0]
            th = get_th()  #get theta
            if th == None:
                print("nonreceive")
                continue
            print(w_id, "Received start of subround")
            #begin of subround...
            while get_endsub() == None:
                zi = f(Xi, E, e)
                temp = get_minibatch(X_chunk, y_chunk, minibatches, parts)

                while temp is None:
                    load = load_np(X_chunk_array, y_chunk_array, count_chunks)
                    if load is None:
                        print(w_id, "End of chunks")
                        flag = False
                        break
                    X_chunk, y_chunk = load
                    count_chunks += 1
                    minibatches = 0
                    temp = get_minibatch(X_chunk, y_chunk, minibatches, parts)
                if flag == False:

                    break
                else:
                    minibatches += 1
                    X, y = temp
                    clf.partial_fit(X, y, np.unique([0, 1]))
                    Si[0] = clf.coef_[0]
                    Si[1] = clf.intercept_[0]
                    Xi = [Si[0] - S_prev[0], Si[1] - S_prev[1]]
                    c_th = 0
                    if th != 0:  #avoid division with 0 if th=0 c_th=0
                        c_th = (f(Xi, E, e) - zi) / th
                    ci_new = max(ci, math.floor(c_th))
                    if ci != ci_new:  #if we detect a difference send it to the coordinator
                        incr = ci_new - ci
                        pub_incr.put(incr)
                        ci = ci_new
                        print(w_id, "Sended...", incr)
            while len(pub_f.subscribers) != 1:
                time.sleep(0.01)
            pub_f.put(f(Xi, E, e))
            print(w_id, "Sended Fi")
            print(w_id, "End of subround")
            if flag == False:
                break

            #end of subround...

        if all([v == 0 for v in Xi[0]]):
            print(w_id, "ZERO XI")
        else:

            pub_x.put(Xi)  # send Xi
            print(w_id, "Sended Xi")
            Xi = [[0], 0]
        if flag == False:
            break
    # pub_incr.put(-1)
    print(w_id, "Ended...")
    return clf