Exemple #1
0
  def execute(self, dummy):
    old = w = self.resource.wants
    w += self.fraction * (1 - 2 * random.random()) * w

    if w < 0:
      w = 0

    self.resource.wants = w
    logger.debug('%s changing wants from %lf to %lf' % (self.client_id, old, w))
    scheduler.add_relative(self.interval, self)
    Gauge.get('client.%s.wants' % self.client_id).set(w)
Exemple #2
0
    def Discovery_RPC(self, request):
        assert request.IsInitialized()

        timer = Gauge.get('server.DiscoveryRPC.latency')
        timer.start_timer()
        logger.info('%s handling Discovery RPC from %s' %
                    (self.server_id, request.client_id))
        response = DiscoveryResponse()

        # Sets the master_bns field in the response if there is a current
        # master.
        master = self.job.get_master()

        if master:
            response.master_bns = master.get_server_id()
        else:
            # We don't know who the master is.
            Counter.get('server.incomplete_discovery_response').inc()

        # Goes through the resource ids in the request and sets the
        # safe capacity for every resource that has a safe capacity
        # configured.
        for r in request.resource_id:
            t = global_config.find_resource_template(r)

            if t and t.HasField('safe_capacity'):
                safe = response.safe_capacity.add()
                safe.resource_id = r
                safe.safe_capacity = t.safe_capacity

        assert response.IsInitialized()

        timer.stop_timer()

        return response
  def process_capacity_response(self, response):
    for resp in response.resource:
      assert resp.gets.capacity >= 0

      resource = self.find_resource(resp.resource_id)
      n = sum_leases(resource)

      if resp.gets.capacity < n:
        logger.warning(
            '%s shortfall for %s: getting %lf, but has %lf outstanding leases' %
            (self.get_server_id(), resource.resource_id,
             resp.gets.capacity, n))
        Counter.get('server_capacity_shortfall').inc()
        Gauge.get('server.%s.shortfall' %
                  self.get_server_id()).set(resp.gets.capacity - n)

      resource.has.CopyFrom(resp.gets)

      # Schedules an action at the expirty time to clear out the lease.
      scheduler.add_absolute(
          resource.has.expiry_time,
          lambda: self._maybe_lease_expired(resource.resource_id))
    def process_capacity_response(self, response):
        for resp in response.resource:
            assert resp.gets.capacity >= 0

            resource = self.find_resource(resp.resource_id)
            n = sum_leases(resource)

            if resp.gets.capacity < n:
                logger.warning(
                    '%s shortfall for %s: getting %lf, but has %lf outstanding leases'
                    % (self.get_server_id(), resource.resource_id,
                       resp.gets.capacity, n))
                Counter.get('server_capacity_shortfall').inc()
                Gauge.get('server.%s.shortfall' %
                          self.get_server_id()).set(resp.gets.capacity - n)

            resource.has.CopyFrom(resp.gets)

            # Schedules an action at the expirty time to clear out the lease.
            scheduler.add_absolute(
                resource.has.expiry_time,
                lambda: self._maybe_lease_expired(resource.resource_id))
Exemple #5
0
    def loop(self, duration):
        gauge = Gauge.get('scheduler.latency')
        gauge.start_timer()
        until = duration + clock.get_time()

        # Runs for the specified amount of time.
        while clock.get_time() < until:
            # Figures out when the first scheduled action is and advances the clock
            # until then.
            now = clock.get_time()
            t = min(self._first_time(), until)
            clock.set_time(t)

            # If there are scheduled items to be done right now, do them.
            if t in self.schedule:
                # Makes a copy of the list of actions that we need to execute and clears
                # out the schedule for this timestamp. This will allow actions to schedule
                # something for the current time.
                actions = list(self.schedule[t])
                del self.schedule[t]

                # Executes all the actions we need to execute now.
                for task in actions:
                    if type(task[0]) == types.LambdaType:
                        task[0]()
                    else:
                        task[0].execute(task[1])

            # And continue any threads which need to continue.
            for (thread, timestamp) in self.thread_map.iteritems():
                if timestamp <= t:
                    self.update_thread(thread, thread.thread_continue())

        # Stop the timer
        gauge.stop_timer()

        # Runs the finalizers.
        logger.info('Running finalizers')

        for target in self.finalizers:
            if type(target) == types.LambdaType:
                target()
            else:
                target.finalize()

        # Ends the simulation.
        logger.info('End of simulation. It took %lf seconds' %
                    gauge.get_value())
Exemple #6
0
  def loop(self, duration):
    gauge = Gauge.get('scheduler.latency')
    gauge.start_timer()
    until = duration + clock.get_time()

    # Runs for the specified amount of time.
    while clock.get_time() < until:
      # Figures out when the first scheduled action is and advances the clock
      # until then.
      now = clock.get_time()
      t = min(self._first_time(), until)
      clock.set_time(t)

      # If there are scheduled items to be done right now, do them.
      if t in self.schedule:
        # Makes a copy of the list of actions that we need to execute and clears
        # out the schedule for this timestamp. This will allow actions to schedule
        # something for the current time.
        actions = list(self.schedule[t])
        del self.schedule[t]

        # Executes all the actions we need to execute now.
        for task in actions:
          if type(task[0]) == types.LambdaType:
            task[0]()
          else:
            task[0].execute(task[1])

      # And continue any threads which need to continue.
      for (thread, timestamp) in self.thread_map.iteritems():
        if timestamp <= t:
          self.update_thread(thread, thread.thread_continue())

    # Stop the timer
    gauge.stop_timer()

    # Runs the finalizers.
    logger.info('Running finalizers')

    for target in self.finalizers:
      if type(target) == types.LambdaType:
        target()
      else:
        target.finalize()

    # Ends the simulation.
    logger.info('End of simulation. It took %lf seconds' % gauge.get_value())
Exemple #7
0
  def Discovery_RPC(self, request):
    assert request.IsInitialized()

    timer = Gauge.get('server.DiscoveryRPC.latency')
    timer.start_timer()
    logger.info(
        '%s handling Discovery RPC from %s' %
        (self.server_id, request.client_id))
    response = DiscoveryResponse()

    # Sets the master_bns field in the response if there is a current
    # master.
    master = self.job.get_master()

    if master:
      response.master_bns = master.get_server_id()
    else:
      # We don't know who the master is.
      Counter.get('server.incomplete_discovery_response').inc()

    # Goes through the resource ids in the request and sets the
    # safe capacity for every resource that has a safe capacity
    # configured.
    for r in request.resource_id:
      t = global_config.find_resource_template(r)

      if t and t.HasField('safe_capacity'):
        safe = response.safe_capacity.add()
        safe.resource_id = r
        safe.safe_capacity = t.safe_capacity

    assert response.IsInitialized()

    timer.stop_timer()

    return response
Exemple #8
0
    def GetCapacity_RPC(self, request):
        assert request.IsInitialized()
        assert self.state.is_initialized()

        # If this server is not the master it cannot handle this request.
        # The client should do a new Discovery.
        if not self.is_master():
            self.state.assert_clean()
            logger.info('%s getting a GetCapacity request when not master' %
                        self.server_id)
            Counter.get('server.GetCapacity_RPC.not_master').inc()

            return None

        timer = Gauge.get('server.GetCapacity_RPC.latency')
        timer.start_timer()
        logger.debug(request)
        now = clock.get_time()

        # Cleanup the state. This removes resources and clients with expired
        # leases and such.
        self.state.cleanup()

        # A set of resources that we need to skip in step 2 (the actual
        # handing out of capacity.
        resources_to_skip = set()

        # First step: Go through the request and update the state with the
        # information from the request.
        for req in request.resource:
            # Finds the resource and the client state for this resource.
            (resource,
             cr) = self.state.find_client_resource(request.client_id,
                                                   req.resource_id)

            # If this resource does not exist we don't need to do anything
            # right now.
            if resource:
                assert cr

                # Checks whether the last request from this client was at least
                # _kMinimumInterval seconds ago.
                if cr.HasField(
                        'last_request_time'
                ) and now - cr.last_request_time < _kMinimumInterval:
                    logger.warning(
                        '%s GetCapacity request for resource %s within the %d second '
                        'threshold' %
                        (self.server_id, req.resource_id, _kMinimumInterval))
                    resources_to_skip.add(req.resource_id)
                else:
                    # Updates the state with the information in the request.
                    cr.last_request_time = now
                    cr.priority = req.priority
                    cr.wants = req.wants

                    if req.HasField('has'):
                        cr.has.CopyFrom(req.has)
                    else:
                        cr.ClearField('has')

        # Creates a new response object in which we will insert the responses for
        # the resources contained in the request.
        response = GetCapacityResponse()

        # Step 2: Loop through all the individual resource requests in the request
        # and hand out capacity.
        for req in request.resource:
            # If this is a resource we need to skip, let's skip it.
            if req.resource_id in resources_to_skip:
                continue

            # Finds the resource and the client state for this resource.
            (resource,
             cr) = (self.state.find_client_resource(request.client_id,
                                                    req.resource_id))

            # Adds a response proto to the overall response.
            resp = response.response.add()
            resp.resource_id = req.resource_id

            # If this is an unknown resource just give the client whatever it
            # is asking for.
            if not resource:
                assert not cr

                logger.warning(
                    '%s GetCapacity request for unmanaged resource %s' %
                    (self.server_id, req.resource_id))
                resp.gets.expiry_time = now + _kDefaultLeaseTimeForUnknownResources
                resp.gets.capacity = req.wants
            else:
                # Sets the safe capacity in the response if there is one
                # configured for this resource.
                if resource.template.HasField('safe_capacity'):
                    resp.safe_capacity = resource.template.safe_capacity

                # Finds the algorithm implementation object for this resource.
                algo = AlgorithmImpl.create(resource.template,
                                            self.server_level)

                # If the resource is in learning mode we just return whatever the client
                # has now and create a default lease.
                if resource.learning_mode_expiry_time >= now:
                    if cr.HasField('has'):
                        has_now = cr.has.capacity
                    else:
                        has_now = 0

                    cr.has.CopyFrom(algo.create_lease(resource, has_now))
                    Counter.get('server.learning_mode_response').inc()
                else:
                    # Otherwise we just run the algorithm. This will update the
                    # client state object.
                    algo.run_client(resource, cr)
                    Counter.get('server.algorithm_runs').inc()

                # Copies the output from the algorithm run into the response.
                resp.gets.CopyFrom(cr.has)

            assert resp.IsInitialized()
            logger.info(
                '%s for %s resource: %s wants: %lf gets: %lf lease: %d refresh: %d'
                % (self.server_id, request.client_id, req.resource_id,
                   req.wants, resp.gets.capacity, resp.gets.expiry_time - now,
                   resp.gets.refresh_interval))

        assert response.IsInitialized()

        timer.stop_timer()

        return response
Exemple #9
0
  def final_report(self):
    if self.filename:
      fout = open(self.filename, 'w')
    else:
      fout = sys.stdout

    # Prints the first header line.
    print >>fout, ',',

    for client in sorted(self.all_clients):
      print >>fout, '"%s"' % client, ',,',

    print >>fout, ',',

    for server in sorted(self.all_server_jobs):
      print >>fout, '"%s"' % server, ',,,,',

    print >>fout, ',',

    for s in sorted(self.all_summaries):
      print >>fout, '"%s"' % s,

      if s == 'clients':
        print >>fout, ',,',
      else:
        print >>fout, ',,,,',

    print >>fout

    # Prints the second header line.
    print >>fout, '"Time",',

    for client in sorted(self.all_clients):
      print >>fout, '"wants", "has",',

    print >>fout, ',',

    for server in sorted(self.all_server_jobs):
      print >>fout, '"wants", "has", "leases", "outstanding",',

    print >>fout, ',',

    for s in self.all_summaries:
      if s == 'clients':
        print >>fout, '"total_wants", "total_has",',
      else:
        print >>fout, ('"total_wants", "total_has", "total_leases", '
                       '"total_outstanding",'),

    print >>fout

    # Goes through the data set in timestamp order.
    for time in sorted(self.data.keys()):
      print >>fout, time, ',',
      data = self.data[time]

      # Prints the reporting data for every client and server that we ever saw.
      # If we have no data for a timestamp we print nothing.
      for client in sorted(self.all_clients):
        if client in data:
          d = data[client]
          print >>fout, d.wants, ',', d.has, ',',
        else:
          print >>fout, ',,',

      print >>fout, ',',

      # Do the same for the servers.
      for server in sorted(self.all_server_jobs):
        if server in data:
          d = data[server]
          print >>fout, d.wants, ',', d.has, ',', d.leases, ',', d.outstanding, ',',
        else:
          print >>fout, ',,,,',

      # Now for the summaries
      print >>fout, ',',

      data = self.summaries[time]

      for s in sorted(self.all_summaries):
        if not s in data:
          if s == 'clients':
            print >>fout, ',,',
          else:
            print >>fout, ',,,,',

          continue

        d = data[s]

        if s == 'clients':
          print >>fout, d.total_wants, ',', d.total_has, ',',
        else:
          print >>fout, d.total_wants, ',', d.total_has, ',', d.total_leases, ',', d.total_outstanding, ',',

      print >>fout

    # Now we go an print the counters.
    print >>fout
    print >>fout, '"Name", "Value"'
    names = list()

    for counter in Counter.all_counters():
      names.append(counter.get_name())

    for name in sorted(names):
      counter = Counter.get(name)
      print >>fout, counter.get_name(), ',', counter.get_value()

    # And all the gauges.
    print >>fout
    print >>fout, '"Name", "N", "Min", "Average", "Max"'
    names = list()

    for gauge in Gauge.all_gauges():
      names.append(gauge.get_name())

    for name in sorted(names):
      gauge = Gauge.get(name)
      print >>fout, gauge.get_name(), ',', gauge.get_count(), ',', gauge.get_min_value(
          ), ',', gauge.get_average(), ',', gauge.get_max_value()

    # Closes the output file.
    if self.filename:
      fout.close()
      logger.info('Report written to %s' % self.filename)
Exemple #10
0
  def GetCapacity_RPC(self, request):
    assert request.IsInitialized()
    assert self.state.is_initialized()

    # If this server is not the master it cannot handle this request.
    # The client should do a new Discovery.
    if not self.is_master():
      self.state.assert_clean()
      logger.info('%s getting a GetCapacity request when not master' %
                  self.server_id)
      Counter.get('server.GetCapacity_RPC.not_master').inc()

      return None

    timer = Gauge.get('server.GetCapacity_RPC.latency')
    timer.start_timer()
    logger.debug(request)
    now = clock.get_time()

    # Cleanup the state. This removes resources and clients with expired
    # leases and such.
    self.state.cleanup()

    # A set of resources that we need to skip in step 2 (the actual
    # handing out of capacity.
    resources_to_skip = set()

    # First step: Go through the request and update the state with the
    # information from the request.
    for req in request.resource:
       # Finds the resource and the client state for this resource.
      (resource, cr) = self.state.find_client_resource(
          request.client_id,
          req.resource_id)

      # If this resource does not exist we don't need to do anything
      # right now.
      if resource:
        assert cr

        # Checks whether the last request from this client was at least
        # _kMinimumInterval seconds ago.
        if cr.HasField('last_request_time') and now - cr.last_request_time < _kMinimumInterval:
          logger.warning(
              '%s GetCapacity request for resource %s within the %d second '
              'threshold' %
              (self.server_id, req.resource_id, _kMinimumInterval))
          resources_to_skip.add(req.resource_id)
        else:
          # Updates the state with the information in the request.
          cr.last_request_time = now
          cr.priority = req.priority
          cr.wants = req.wants

          if req.HasField('has'):
            cr.has.CopyFrom(req.has)
          else:
            cr.ClearField('has')

    # Creates a new response object in which we will insert the responses for
    # the resources contained in the request.
    response = GetCapacityResponse()

    # Step 2: Loop through all the individual resource requests in the request
    # and hand out capacity.
    for req in request.resource:
      # If this is a resource we need to skip, let's skip it.
      if req.resource_id in resources_to_skip:
        continue

      # Finds the resource and the client state for this resource.
      (resource, cr) = (
          self.state.find_client_resource(
              request.client_id,
              req.resource_id))

      # Adds a response proto to the overall response.
      resp = response.response.add()
      resp.resource_id = req.resource_id

      # If this is an unknown resource just give the client whatever it
      # is asking for.
      if not resource:
        assert not cr

        logger.warning(
            '%s GetCapacity request for unmanaged resource %s' %
            (self.server_id, req.resource_id))
        resp.gets.expiry_time = now + _kDefaultLeaseTimeForUnknownResources
        resp.gets.capacity = req.wants
      else:
        # Sets the safe capacity in the response if there is one
        # configured for this resource.
        if resource.template.HasField('safe_capacity'):
          resp.safe_capacity = resource.template.safe_capacity

        # Finds the algorithm implementation object for this resource.
        algo = AlgorithmImpl.create(resource.template, self.server_level)

        # If the resource is in learning mode we just return whatever the client
        # has now and create a default lease.
        if resource.learning_mode_expiry_time >= now:
          if cr.HasField('has'):
            has_now = cr.has.capacity
          else:
            has_now = 0

          cr.has.CopyFrom(algo.create_lease(resource, has_now))
          Counter.get('server.learning_mode_response').inc()
        else:
          # Otherwise we just run the algorithm. This will update the
          # client state object.
          algo.run_client(resource, cr)
          Counter.get('server.algorithm_runs').inc()

        # Copies the output from the algorithm run into the response.
        resp.gets.CopyFrom(cr.has)

      assert resp.IsInitialized()
      logger.info(
          '%s for %s resource: %s wants: %lf gets: %lf lease: %d refresh: %d' %
          (self.server_id, request.client_id, req.resource_id, req.wants,
           resp.gets.capacity, resp.gets.expiry_time - now,
           resp.gets.refresh_interval))

    assert response.IsInitialized()

    timer.stop_timer()

    return response