Exemple #1
0
def check_except(name):
    log = logging.getLogger(name)
    total_uptime = 0.
    total_downtime = 0.
    run_time = 0.
    counter = 0
    up = False
    uptxt = 'DOWN'
    ts = dt.now()
    log.info('START,%f,%f,%f', total_uptime, total_downtime, run_time)
    while True:
        ts = dt.now()
        time.sleep(.5)
        try:
            client = RedisClient(name)
            status = client.ping()
            if counter == 0:
                client.set('testcounter', 1)
            else:
                client.incr('testcounter')
            counter += 1
            testcounter = int(client.get('testcounter'))
            assert (counter == testcounter)
        except redis.RedisError as e:
            print(' REDIS ERROR ===>   ', e.__name__)
            status = False
        except OverlayNotAvailable as e:
            print(' OVERLAY not available')
            status = False
        delta = (dt.now() - ts).total_seconds()
        if status == up:
            run_time += delta
        else:
            print('STATUS Change from %s' % uptxt)
            log.info('%s,%f,%f,%f', uptxt, total_uptime, total_downtime,
                     run_time)
            run_time = 0.
        if status:
            uptxt = 'UP'
            total_uptime += delta
        else:
            uptxt = 'DOWN'
            total_downtime += delta
        print('%s,%f' % (uptxt, run_time))
        up = status
Exemple #2
0
  def wait_catalog(self):
    """Blocks current execution until the catalog service is available. If it
    is not available remotely, start up a local service.
    """
    start = dt.datetime.now()
    settings = systemsettings()
    while True:
      try:
        if self.catalog is None:
          self.catalog = RedisClient(settings.name)
        self.catalog.ping()
        break
      except OverlayNotAvailable as e:
        self.start_local_catalog()
        self.catalog = None
      except redis.RedisError as e:
        self.catalog = None        

    delta = (dt.datetime.now() - start).total_seconds()
    if delta > 1:
      logging.info('CLIENT_DELAY,%f', delta)
Exemple #3
0
  #   return self.feal_list[trnum]


#############################


if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument('name', default='default')
  parser.add_argument('--centroid', action='store_true')
  args = parser.parse_args()

  confile = args.name + '.json'
  settings = systemsettings()
  settings.applyConfig(confile)
  catalog = RedisClient(args.name)

  # TO Recalculate PCA Vectors from DEShaw (~30-40 mins at 10% of data)
  # calcDEShaw_PCA(catalog)
  # sys.exit(0)

  if args.centroid:
    centroid_bootstrap(catalog)



# #================
# hcf = {k: np.array([np.array(feal[i]) for i in v['elm']]) for k,v in hc5.items()}
# fmean = {k: np.mean(v, axis=0) for k,v in hcf.items()}

# def find_hc(hclist, index):
Exemple #4
0
  def run(self):
    args = self.parser.parse_args()

    settings = systemsettings()
    self.experiment_number = settings.EXPERIMENT_NUMBER

    logging.info("APPLICATION:    %s", settings.APPL_LABEL)
    logging.info("WORKDIR:  %s", settings.WORKDIR)

    # Read in Slurm params  (TODO: Move to abstract slurm call)
    if self.job_id is None:
      self.job_id   = os.getenv('JOB_NAME')
    self.slurm_id = os.getenv('SLURM_JOB_ID')

    logging.debug('EnVars')

    for i in ['SBATCH_JOBID', 'SBATCH_JOB_NAME', 'SLURM_JOB_ID', 'SLURM_JOBID', 'SLURM_JOB_NAME']:
      logging.debug('    %s : %s', i, os.getenv(i))

    logging.info("JOB NAME :  %s", str(self.job_id))
    logging.info("SLURM JOB:  %s", str(self.slurm_id))

    if args.debug:
      logging.debug("DEBUGGING: %s", self.name)

    if args.single:
      logging.debug("Macrothread running in single exection Mode (only 1 manager will execute).")
      self.singleuse = True

    if args.init:
      sys.exit(0)

    # Both Worker & Manager need catalog to run; load it here and import schema
    retry = 3
    connected = False
    while retry > 0:
      retry -= 1
      logging.info('Trying to estabish connection to the Catalog Service')
      try:
        self.catalog = RedisClient(settings.name)
        if self.catalog.isconnected and self.catalog.ping():
          logging.info('Catalog service is connected')
          connected = True
          break
        logging.info("Catalog service is not running. Trying to start the service now")
        self.start_local_catalog()
      except (redis.RedisError, OverlayNotAvailable) as e:
        self.catalog = None
        self.start_local_catalog()

    if not connected:
      # If the catalog is unavailable. Fail this thread and re-schedule it
      if args.workinput:
        relaunch_cmd = "python3 %s -c %s -w" % (self.fname, self.config, args.workinput)
      else:
        self.slurmParams['cpus-per-task'] = 1
        relaunch_cmd = "python3 %s -c %s" % (self.fname, self.config)

      self.slurmParams['job-name'] = self.job_id
      slurm.sbatch(taskid =self.slurmParams['job-name'],
                options   = self.slurmParams,
                modules   = self.modules,
                cmd       = relaunch_cmd)
      # NOTE: This should be handled in an exception (need to figure out which one)
      #  And then raise a custom OverlayConnectionError here
      return

    #  LOAD Some self-bootstraping meta-data (if not alread loaded):
    mthread_key = 'macrothread:' + self.name
    if not self.catalog.exists(mthread_key):

      self.catalog.hmset(mthread_key, {'fname': self.fname})

    self.catalog.loadSchema()   # Should this be called from within the catalog module?

    # Load meta-data about registered mactrothreads
    self.data['macrothread'] = {}
    for key in self.catalog.keys('macrothread'):
      mt_name = key.split(':')[1]
      self.data['macrothread'][mt_thread] = self.catalog.hgetall(key)

    # Load current STATE from Catalog
    logging.info("Loading Thread State for from catalog:")

    # Load Standard set of simple params (init and simulation vals)
    # By default these are immutable. For any vals which may change or update
    # during execution, they should be explicitly set in the _mut or _append
    self.load(list(settings.state.keys()))
    self.load(list(settings.sim_params.keys()))

    # Load additional State values  
    self.load(self._mut, self._immut, self._append)

    if args.workinput:
      logging.debug("Running worker.")
      self.worker(args.workinput)
    else:
      self.manager()

    if self.localcatalogserver:
      logging.debug("This thread is running the catalog. Waiting on local service to terminate...")
      self.localcatalogserver.join()
      self.localcatalogserver = None