Example #1
0
class Server:
    """
    Contains all the information that is required
    to use a remote object (alone and within a grid)

    Also it does all the job to launch the instance on remote side
    and wrap it within a Grid specific Proxy object

    1) self.service_id
    2) self.proxy

    3) self.host

    """

    def __init__(self, proxy, service_id, host, debug):

        self.debug = debug
        self.debug_out = sys.stdout

        self.proxy = proxy
        self.service_id = service_id
        self.host = host

        ## register itself (server) in the proxy

        self.proxy.__dict__['_server'] = self

        ## variables for load balancing

        self.time = Pipe(50)
        self.time.put(0.)
        self.jobs = 0          ## (log) number of jobs done by the Server
        self.acquired = False  ## (log)

    def dp(self, str):

        print >>self.debug_out, str
        self.debug_out.flush()

    def terminate(self):
        """
        It terminates self.proxy if needed

        """
        raise NotImplementedError

    def __str__(self):

        s = '%s(busy=%s, <time>=%.2f, jobs=%d, host="%s", proxy="%s"' \
             % (self.__class__.__name__, self.acquired, average(self.time), self.jobs, \
                self.host.name, self.proxy)

        return s

    __repr__ = __str__
def randomSteps(env,steps,dqn):
    t0 = time.time()
    env.reset()
    i = 0
    frame_stack = Pipe(4)
    initial_no_op = np.random.randint(4, NO_OP_MAX)

    for _ in range(0,steps):
        if i < initial_no_op:
            # WE PERFORM A RANDOM NUMBER OF NO_OP ACTIONS
            action = NO_OP_CODE
            state, reward, done, info = env.step(action)
            greyObservation = rgb2gray(state)
            state = downSample(greyObservation)
            frame_stack.push(state)
            i += 1
        else:

            state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))

            action = np.random.randint(0, len(ACTIONS))
            actionH =np.zeros(len(ACTIONS))
            actionH[action] = 1
            next_state, reward, game_over, info = env.step(action)


            greyObservation = rgb2gray(next_state)
            next_state = downSample(greyObservation)

            frame_stack.push(next_state)

            next_state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))

            dqn.storeExperience(
                state.astype(type),
                actionH,
                reward,
                next_state.astype(type),
                game_over)
            if done:
                #print("Episode finished after {} timesteps".format(_ + 1))
                env.reset()
                i=0
                frame_stack=[]




    t1 = time.time()
    print("Fullfilling replay memory operation took:",t1-t0,)
    print('Size of replay memory %s bytes and has %s elements' % ((sys.getsizeof(dqn.replayMemory)),len(dqn.replayMemory)))
    print
Example #3
0
    def __init__(self, proxy, service_id, host, debug):

        self.debug = debug
        self.debug_out = sys.stdout

        self.proxy = proxy
        self.service_id = service_id
        self.host = host

        ## register itself (server) in the proxy

        self.proxy.__dict__['_server'] = self

        ## variables for load balancing

        self.time = Pipe(50)
        self.time.put(0.)
        self.jobs = 0          ## (log) number of jobs done by the Server
        self.acquired = False  ## (log)
    def run(self):

        self.removed = Pipe(100000)

        while not self.__stop:

            if os.path.exists(os.path.join(self.hosts[0].temp_path, 'quit')):

                [self.send(server.url, MSG_TERMINATE) for \
                 server in self.servers[service_id]]

                #self.halt()
                self.terminate()

                break

            recv = self.recv(-1, -1)

            if recv is None:
                continue

            tid, msg, data = recv

            if self.debug:
                template = 'received: tid=%d, msg=%d, type(data)=%s'

            if msg == MSG_INIT_DONE:

                for service_id, server_list in self.servers.items():

                    server = [s for s in server_list if s.url == tid]

                    if server:

                        if len(server) > 1:
                            raise 'Inconsistency'

                        if self.debug:
                            print server[0].host.name, 'ready.'

                continue

            elif msg == MSG_CALL_METHOD_RESULT:

                try:
                    key, value, elapsed = data
                except:
                    print data, len(data)

                if key in self.results:

                    result = self.results[key]

                    result.value = value
                    result.event.set()

                    self.add_time(result.proxy, elapsed)

                    if result.proxy._selfrelease:

                        if self.debug:
                            self.dp( 'FileBasedGrid.run: releasing server %s for the grid' \
                                     % result.proxy._server )

                        result.proxy._selfrelease = False
                        self._release_service(result.proxy)

                    del self.results[key]
                    self.removed.put(key)

                else:

                    print 'Result object, key %d, not known.' % key

                    if key in self.removed.pipe:
                        print 'Key has already been deleted.'

            elif msg == MSG_CLIENT_CRASHED:

                ## find service to which the crashed client belongs

                for service_id, server_list in self.servers.items():

                    crashed = [server for server in server_list \
                               if server.url == tid]

                    if crashed:
                        break

                else:
                    raise 'Inconsistency: TID %d not known.' % tid

                msg = 'Client on host "%s" inaccessible. Attempting shutdown...'
                print msg % crashed[0].host.name

                self.terminate(service_id)

            else:
                print 'Unknown message', tid, msg

        self.__stopped = self.__stop
class FileBasedGrid(AbstractGrid):

    def __init__(self, hosts, src_path, display, X11_delay, debug, verbose, nfs_care=False):

        AbstractGrid.__init__(self, hosts, src_path, display, X11_delay, debug, verbose, shared_temp_path=True)

        #copy all files in current dir to remote host.
        if self.debug: print "initialising"
        self.initialise(src_path, ['filebased_loader','ro',
                                   'FileBasedGrid','OrderedDict'])


        if self.debug: print "setting filebased_loader"
        self.set_loader(src_path, 'filebased_loader')

        if self.debug: print "creating communicator"
        self.create_communicator(nfs_care)

        self.results = {}
        self.key = 0

        self.__stop = False
        self.__stopped = False

        if self.debug:
            print 'FileBasedGrid created: tid = ', self.communicator.tid

    def set_debug(self, debug):

        AbstractGrid.set_debug(self, debug)
        self.communicator.debug = debug

    def create_communicator(self, nfs_care):

        self.communicator = FileBasedCommunicator(self.hosts[0].temp_path,
                                debug = self.debug, nfs_care = nfs_care)

    def publish(self, instance):
        """
        for each host of self.hosts
            -launch a FileBasedRemoteObjectHandler through the filebased loader.
            - pickle the instance object and send it to the handler.
            setting an attribute of the proxy results in a message being sent to
            the concerned host.
            - create a FileBasedServed for the proxy
            - add it to the queue self.queues[sid]
                and to the list self.servers[sid]
                and set server.grid and server.proxy
        returns the service id associated to this instance.

        note from the authors
        In FileBasedGrid there is one to one correspondence between
        a Server and a Handler...

        """

        if self.debug:
            try:
                print "publishing instance %s" % \
                    instance.__class__.__name__
            except:
                print "publishing instance"

        if self.debug: print " creating sevice id"
        service_id = self.create_service_id(instance)

        for host in self.hosts:

            if self.debug: print " host ",host.name

            if self.debug: print "  creating proxy"
            proxy = self.create_proxy(instance, host, self.display, daemon = 1)

            if self.debug: print "  creating FileBasedServer"
            server = FileBasedServer(proxy, service_id, host, self.debug)

            if self.debug: print "  proxy._get_url()"
            server.url = proxy._get_url()

            if self.debug: print "  adding server"
            self.add_server(server)

            if self.display and self.X11_delay is not None:
                time.sleep(self.X11_delay)

        return service_id

    def create_proxy(self, instance, host, display = 0, daemon = 0):
        """
        (copied from Grid, called from AbstractISDGrid.create_server)

        """

        if self.debug: print "   creating handler"
        handler_tid = self.create_handler(instance, host, display, daemon)

        if self.debug: print "   creating proxy"
        proxy = FileBasedRemoteObject(instance, handler_tid, manager = self)

        if self.debug:
            print 'Connected: tid=%d' % handler_tid

        return proxy

    def create_handler(self, instance, host, display, daemon):

        d = {'object': instance,
             'daemon': daemon}

        handler_tid = self.spawn_handler(host, display)

        if self.debug:
            print 'Initialising service on host "%s"' % host.name

        self.send(handler_tid, MSG_INIT, d)

        if self.debug:
            print 'MSG_INIT sent.'

        return handler_tid

    def spawn_handler(self, host, display):

        handler_script = os.path.join(self.hosts[0].temp_path,
                                      self.loader_filename)

        init_data = {'niceness': host.niceness,
                     'display': display}

        argv = [handler_script]

        #add required init commands prior to launching anything else on the target host.
        if host.init_cmd != '':
            if host.init_cmd.rstrip().endswith(';'):
                command = host.init_cmd
            elif host.init_cmd.rstrip().endswith('!'):
                command = host.init_cmd.rstrip()[:-1]
            else:
                command = host.init_cmd + ';'
        else:
            command = ''

        if display:

            if type(display) is type(0):

                master_name = socket.gethostname()

                if host.name == master_name:
                    display = ':0.0'
                else:
                    display = master_name + ':0.0'

            command += 'xterm'

            argv = ['-title', host.name,
                    '-geometry', self.window_size,
                    '-hold',
                    '-e',
                    host.python, '-i'] + argv

            pipe = ''

        else:
            command += host.python
            pipe = '> /dev/null'

        if self.debug:
            print 'Spawning service on host "%s"' % host.name

        X_forward = display

        tid = self.communicator.spawn(host.name, command, argv,
                                      init_data, pipe, X_forward)

        if self.debug:
            print 'Service spawned: tid = ', tid

        return tid

    def recv(self, tid, msg):
        return self.communicator.recv(tid, msg)

    def send(self, tid, msg, value = None):
        self.communicator.send(tid, msg, value)

    def create_result_object(self, proxy):
        """
        Results has to be temporarily stored in the Grid
        until their values are calculated

        """

        key = self.key
        self.key += 1

        if key in self.results:
            print 'Result object for key %d already exists.' % key

        #result = Result(tid, key, self)

        result = Result(proxy)

        result.key = key

        self.results[key] = result

        return result

    def run(self):

        self.removed = Pipe(100000)

        while not self.__stop:

            if os.path.exists(os.path.join(self.hosts[0].temp_path, 'quit')):

                [self.send(server.url, MSG_TERMINATE) for \
                 server in self.servers[service_id]]

                #self.halt()
                self.terminate()

                break

            recv = self.recv(-1, -1)

            if recv is None:
                continue

            tid, msg, data = recv

            if self.debug:
                template = 'received: tid=%d, msg=%d, type(data)=%s'

            if msg == MSG_INIT_DONE:

                for service_id, server_list in self.servers.items():

                    server = [s for s in server_list if s.url == tid]

                    if server:

                        if len(server) > 1:
                            raise 'Inconsistency'

                        if self.debug:
                            print server[0].host.name, 'ready.'

                continue

            elif msg == MSG_CALL_METHOD_RESULT:

                try:
                    key, value, elapsed = data
                except:
                    print data, len(data)

                if key in self.results:

                    result = self.results[key]

                    result.value = value
                    result.event.set()

                    self.add_time(result.proxy, elapsed)

                    if result.proxy._selfrelease:

                        if self.debug:
                            self.dp( 'FileBasedGrid.run: releasing server %s for the grid' \
                                     % result.proxy._server )

                        result.proxy._selfrelease = False
                        self._release_service(result.proxy)

                    del self.results[key]
                    self.removed.put(key)

                else:

                    print 'Result object, key %d, not known.' % key

                    if key in self.removed.pipe:
                        print 'Key has already been deleted.'

            elif msg == MSG_CLIENT_CRASHED:

                ## find service to which the crashed client belongs

                for service_id, server_list in self.servers.items():

                    crashed = [server for server in server_list \
                               if server.url == tid]

                    if crashed:
                        break

                else:
                    raise 'Inconsistency: TID %d not known.' % tid

                msg = 'Client on host "%s" inaccessible. Attempting shutdown...'
                print msg % crashed[0].host.name

                self.terminate(service_id)

            else:
                print 'Unknown message', tid, msg

        self.__stopped = self.__stop

    def ishalted(self):
        return self.__stopped

    def terminate(self, service_id = None):

        AbstractGrid.terminate(self, service_id)

        self.communicator.halt()
        self.__stop = True

        if self.debug: ## please keep debug statement!
            print 'FileBasedGrid: terminated.'

    def __del__(self):
        self.terminate()

    #### YS: a few additions

    def broadcast(self, sfo_id, funcname, *args, **kw):
        results = []
        for server in self.servers[sfo_id]:
            func=getattr(server.proxy, funcname)
            results.append(func(*args, **kw))
        return results

    def scatter(self, sfo_id, funcname, arglist, kwlist=None):
        results = []
        if kwlist is None:
            kwlist=[{} for i in xrange(len(arglist))]
        if not hasattr(arglist[0],'__iter__'):
            arglist = [[i] for i in arglist]
        for server,args,kw in zip(self.servers[sfo_id],arglist,kwlist):
            func=getattr(server.proxy, funcname)
            results.append(func(*args, **kw))
        return results

    def gather(self, results):
        retval=[]
        for server in results:
            retval.append(server.get())
        return retval
    #Saving and loading networks
    saver = tf.train.Saver()
    checkpoint = tf.train.get_checkpoint_state("saved_networks")
    if checkpoint and checkpoint.model_checkpoint_path and LOAD_NETWORK:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print("Successfully loaded:", checkpoint.model_checkpoint_path)
        game = int(re.match('.*?([0-9]+)$', checkpoint.model_checkpoint_path).group(1))
    else:
        print("Could not find old network weights")

    sess.run(tf.global_variables_initializer())
    game = 0
    game_scores =[]
    initial_no_op = np.random.randint(4, NO_OP_MAX)
    i=0
    frame_stack=Pipe(4)
    score=0
    summary_writer = tf.summary.FileWriter('logs',sess.graph)
    summary_writer = tf.summary.FileWriter(LOG_DIRECTORY + RUN_STRING,
                                           sess.graph)
    print('Started training')
    for step in range(STEPS):
        if i < initial_no_op:
            # WE PERFORM A RANDOM NUMBER OF NO_OP ACTIONS
            action = NO_OP_CODE
            state, reward, done, info = env.step(action)
            greyObservation = rgb2gray(state)
            state = downSample(greyObservation)
            frame_stack.push(state)
            i+=1
        else: