def __init__(self, name, port=6379, **kwargs): """ """ config = systemsettings() if not config.configured(): # For now assume JSON file config.applyConfig(name + '.json') self.workdir = config.WORKDIR #ini.get('workdir', '.') self.redis_conf_template = config.REDIS_CONF_TEMPLATE #init.get('redis_conf_template', 'templates/redis.conf.temp') self.MONITOR_WAIT_DELAY = config.MONITOR_WAIT_DELAY #ini.get('monitor_wait_delay', 30) self.CATALOG_IDLE_THETA = config.CATALOG_IDLE_THETA #ini.get('catalog_idle_theta', 300) self.CATALOG_STARTUP_DELAY = config.CATALOG_STARTUP_DELAY #ini.get('catalog_startup_delay', 10) port = config.CATALOG_PORT OverlayService.__init__(self, name, port, **kwargs) self.connection = None # Check if a connection exists to do an immediate shutdown request # if os.path.exists(self.lockfile): # host, port, state = self.getconnection() # self.shutdowncmd = 'redis-cli -h %s -p %s shutdown' % (host, port) # FOR IDLE REPORTING Using Command Count self.ping_count = 0 self.last_cmd_count = 0 self.last_cmd_ts = dt.now() self.total_idle_time = 0. self.current_idle_time = 0. # FOR IDLE REPORTING and Metric Recording self.persist = True self.idle_report = 0 self.idle_actual = 0 self.IDLE_REPORT_THETA = 60 # we will consider the service "idle" if it receives no request for 1 min # The Command monitor to track idle time self.cmd_mon = None self.cmd_mon_pid = None # External Logger to track metrics: logfile = os.path.join(os.environ['HOME'], 'ddc', 'results', 'redismon_%s.log'%name) self.monlog = logging.getLogger('redis_mon_%s'%name) self.monlog.setLevel(logging.INFO) fh = logging.FileHandler(logfile) fh.setLevel(logging.INFO) fmt = logging.Formatter('%(message)s') fh.setFormatter(fmt) self.monlog.addHandler(fh) self.monlog.propagate = False
def __init__(self, name, port=6379, **kwargs): """ """ config = systemsettings() if not config.configured(): # For now assume JSON file config.applyConfig(name + '.json') self.workdir = config.WORKDIR #ini.get('workdir', '.') self.redis_conf_template = config.REDIS_CONF_TEMPLATE #init.get('redis_conf_template', 'templates/redis.conf.temp') self.MONITOR_WAIT_DELAY = config.MONITOR_WAIT_DELAY #ini.get('monitor_wait_delay', 30) self.CATALOG_IDLE_THETA = config.CATALOG_IDLE_THETA #ini.get('catalog_idle_theta', 300) self.CATALOG_STARTUP_DELAY = config.CATALOG_STARTUP_DELAY #ini.get('catalog_startup_delay', 10) port = config.CATALOG_PORT OverlayService.__init__(self, name, port, **kwargs) self.connection = None # Check if a connection exists to do an immediate shutdown request # if os.path.exists(self.lockfile): # host, port, state = self.getconnection() # self.shutdowncmd = 'redis-cli -h %s -p %s shutdown' % (host, port) # FOR IDLE REPORTING Using Command Count self.ping_count = 0 self.last_cmd_count = 0 self.last_cmd_ts = dt.now() self.total_idle_time = 0. self.current_idle_time = 0. # FOR IDLE REPORTING and Metric Recording self.persist = True self.idle_report = 0 self.idle_actual = 0 self.IDLE_REPORT_THETA = 60 # we will consider the service "idle" if it receives no request for 1 min # The Command monitor to track idle time self.cmd_mon = None self.cmd_mon_pid = None # External Logger to track metrics: logfile = os.path.join(os.environ['HOME'], 'ddc', 'results', 'redismon_%s.log' % name) self.monlog = logging.getLogger('redis_mon_%s' % name) self.monlog.setLevel(logging.INFO) fh = logging.FileHandler(logfile) fh.setLevel(logging.INFO) fmt = logging.Formatter('%(message)s') fh.setFormatter(fmt) self.monlog.addHandler(fh) self.monlog.propagate = False
def prepare_service(self): config = systemsettings() if not config.configured(): # For now assume JSON file config.applyConfig(self._name_app + '.json') # Default Settings home = os.getenv('HOME') alluxio_home = os.path.join(home, 'pkg', 'alluxio-1.0.0') self.workdir = config.WORKDIR #ini.get('workdir', '.') self.ramdisk = tempfile.mkdtemp() self.hdd = '/tmp/alluxio/hdd' os.environ['ALLUXIO_HOME'] = alluxio_home if self._role == 'SLAVE': os.environ['ALLUXIO_MASTER_ADDRESS'] = self.master self.launchcmd = 'alluxio-start.sh worker Mount -f' else: os.environ['ALLUXIO_MASTER_ADDRESS'] = 'localhost' self.launchcmd = 'alluxio-start.sh local -f' os.environ['DEFAULT_LIBEXEC_DIR'] = os.path.join(alluxio_home, 'libexec') os.environ['ALLUXIO_RAM_FOLDER'] = self.ramdisk os.environ['ALLUXIO_UNDERFS_ADDRESS'] = config.ALLUXIO_UNDERFS os.environ['ALLUXIO_WORKER_MEMORY_SIZE'] = config.ALLUXIO_WORKER_MEM if not os.path.exists(config.ALLUXIO_UNDERFS): os.mkdir(config.ALLUXIO_UNDERFS) if not os.path.exists(self.hdd): os.mkdir(self.hdd) # logdir = os.path.join(alluxio_home, 'logs', self._host) # if not os.path.exists(logdir): # os.mkdir(logdir) # os.environ['ALLUXIO_LOGS_DIR'] = logdir self.MONITOR_WAIT_DELAY = config.MONITOR_WAIT_DELAY #ini.get('monitor_wait_delay', 30) self.CATALOG_IDLE_THETA = config.CATALOG_IDLE_THETA #ini.get('catalog_idle_theta', 300) self.CATALOG_STARTUP_DELAY = config.CATALOG_STARTUP_DELAY #ini.get('catalog_startup_delay', 10) logging.debug("Checking ENV:") logging.debug(' ALLUXIO_HOME=%s', executecmd('echo $ALLUXIO_HOME')) logging.debug(' ALLUXIO_MASTER_ADDRESS=%s', executecmd('echo $ALLUXIO_MASTER_ADDRESS')) logging.debug(' ALLUXIO_RAM_FOLDER=%s', executecmd('echo $ALLUXIO_RAM_FOLDER')) executecmd('alluxio format') self.shutdowncmd = 'alluxio-stop.sh all'
def __init__(self, name): settings = systemsettings() self._name_app = name self.lockfile = '%s_%s.lock' % (self._name_app, 'RedisService') self.isconnected = False self.host = None self.port = None self.pool = None connect_wait = 0 while True: try: with open(self.lockfile, 'r') as conn: conn_string = conn.read().split(',') if len(conn_string) < 5: raise OverlayNotAvailable host = conn_string[0] port = conn_string[1] ttl = float(conn_string[3]) state = conn_string[4] ts = dt.now() if ts.timestamp() > ttl: logging.warning("Overlay Service Master has expired. Using this at risk!") self.pool = redis.ConnectionPool(host=host, port=port, db=0, decode_responses=True) redis.StrictRedis.__init__(self, connection_pool=self.pool, decode_responses=True) logging.info('Connection is open... Checking to see if I can connect') self.host = host self.port = port # If set name suceeds. The Db is connected. O/W handle the error below accordingly self.client_setname(getUID()) logging.info('[Redis Client] Connected as client to master at %s on port %s', host, port) self.isconnected = True break except FileNotFoundError: logging.warning('[Redis Client] No Lockfile Found. Service unavailable: %s', self.lockfile) raise OverlayNotAvailable except redis.ReadOnlyError as e: logging.warning('[Redis Client] Connecting as read only') self.isconnected = True break except redis.BusyLoadingError as e: logging.warning('[Redis Client] Current Master is starting up. Standing by.....') time.sleep(5) connect_wait += (dt.now()-start).total_seconds() continue except redis.RedisError: logging.warning('[Redis Client] Service is not running. Cannot get master from lockfile: %s', self.lockfile) raise OverlayNotAvailable
def sbatch(cls, taskid, options, modules, cmd, environ={}): logging.info("Slurm sbatch Job submitted for " + str(taskid)) config = systemsettings() inline = '#!/bin/bash -l\n\n#SBATCH\n' for k, v in options.items(): if v == None: inline += '#SBATCH --%s\n' % k else: inline += '#SBATCH --%s=%s\n' % (k, str(v)) joboutput = "%s/%s.out" % (config.LOGDIR, str(taskid)) inline += '#SBATCH --output=%s\n' % joboutput for mod in modules: inline += 'module load %s\n' % mod environ['JOB_NAME'] = options['job-name'] for k, v in environ.items(): inline += 'export %s=%s\n' % (k, v) inline += 'echo ================================\n' inline += 'echo JOB NAME: %s\n' % options['job-name'] inline += '\n%s\n' % cmd logging.debug("Inline SBATCH:------------->>") logging.debug(inline) logging.debug("<<------- Batch Complete") # Launch job job = proc.Popen( 'sbatch <<EOF\n%sEOF' % inline, shell=True, stdin=None, stdout=proc.PIPE, stderr=proc.STDOUT) stdout, stderr = job.communicate() logging.info("Slurm Batch Job Submitted. Output follows:") logging.info(stdout.decode()) return stdout.decode()
def __init__(self, name, uid=None): setting = systemsettings() self.stat = OrderedDict() self.name = name self.uid = int('000000' if uid is None else uid) # Set up logging log = logging.getLogger('stat' + name + str(self.uid)) log.setLevel(logging.INFO) fh = logging.FileHandler(default_log_loc + 'stat_' + name + '.log') fh.setLevel(logging.INFO) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) if uid is None: fmt = logging.Formatter(name + ',%(message)s') else: fmt = logging.Formatter(uid + ',%(message)s') fh.setFormatter(fmt) ch.setFormatter(fmt) log.addHandler(fh) log.addHandler(ch) log.propagate = False self.log = log
def sbatch(cls, taskid, options, modules, cmd, environ={}): logging.info("Slurm sbatch Job submitted for " + str(taskid)) config = systemsettings() inline = '#!/bin/bash -l\n\n#SBATCH\n' for k, v in options.items(): if v == None: inline += '#SBATCH --%s\n' % k else: inline += '#SBATCH --%s=%s\n' % (k, str(v)) joboutput = "%s/%s.out" % (config.LOGDIR, str(taskid)) inline += '#SBATCH --output=%s\n' % joboutput for mod in modules: inline += 'module load %s\n' % mod environ['JOB_NAME'] = options['job-name'] for k, v in environ.items(): inline += 'export %s=%s\n' % (k, v) inline += 'echo ================================\n' inline += 'echo JOB NAME: %s\n' % options['job-name'] inline += '\n%s\n' % cmd logging.debug("Inline SBATCH:------------->>") logging.debug(inline) logging.debug("<<------- Batch Complete") # Launch job job = proc.Popen('sbatch <<EOF\n%sEOF' % inline, shell=True, stdin=None, stdout=proc.PIPE, stderr=proc.STDOUT) stdout, stderr = job.communicate() logging.info("Slurm Batch Job Submitted. Output follows:") logging.info(stdout.decode()) return stdout.decode()
def __init__(self, home=None, master=None): config = systemsettings() # Require alluxio home to be defined if home is None and os.getenv('ALLUXIO_HOME') is None: logging.error('[AlluxioClient] ALLUXIO_HOME is not set or provided. Cannot create shell client') return elif home is not None: self._home = home os.environ['ALLUXIO_HOME'] = home else: self._home = os.getenv('ALLUXIO_HOME') # Find master using AlluxioService lock file if master is None: lockfile = '%s_%s.lock' % (config.name, 'AlluxioService') if not os.path.exists(lockfile): logging.error('[AlluxioClient] Alluxio service is not running.') return with open(lockfile, 'r') as conn: conn_string = conn.read().split(',') master = conn_string[0] self.set_master(master)
def tear_down(self): config = systemsettings() logging.info("[%s] Removing the ramdisk", self._name_svc) shutil.rmtree(self.ramdisk) logging.info("[%s] Removing the Under FS (on local /tmp)", self._name_svc) shutil.rmtree(config.ALLUXIO_UNDERFS)
pipe.set('allkeys', str(allkeys)) result = pipe.execute() print('Pipeline complete.') print(client.incr('foo')) print(client.get('foo')) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('name') parser.add_argument('--client', action='store_true') parser.add_argument('--stop', action='store_true') args = parser.parse_args() if args.client: testClient(args.name) sys.exit(0) settings = systemsettings() settings.applyConfig('%s.json' % args.name) server = RedisService(args.name) if args.stop: server.stop() else: server.start()
def fetcher(self): """The fetcher is resposible for loading files and inserting data into the cache. It opens a local client connection and blocks until file data requests are inserted (by the corresponding remote client). When triggered, it will open the file and load data into the cache. This is designed to run in a separate thread. TODO: Evictor (in here or separate) """ config = systemsettings() # Capacity should be set in GB (in settings) capacity = config.CACHE_CAPACITY * (2**30) logging.info('[Fetcher] Fetcher is starting. Preparing capacity of %d GB', config.CACHE_CAPACITY) # Create client conn = redis.StrictRedis(host='localhost', port=self._port, decode_responses=True) # Wait until service is up (and not terminating on launch) timeout = 60 while True: try: logging.info('[Fetcher] Fetcher is waiting for the service to start... Conn=%s', (self.connection is not None)) info = conn.info() if info is None: logging.info('[Fetcher] No commo with the local service. Wait and retry') time.sleep(1) timeout -= 1 if timeout == 0: logging.error('[Fetcher] Fetch never connected to the cache. Shutting down...') self.terminationFlag.set() break elif info['loading']: logging.info('[Fetcher] Service is loading') time.sleep(10) else: conn.client_setname("fetcher") break except redis.BusyLoadingError as e: logging.info('[Fetcher] Service is loading') time.sleep(10) continue except redis.RedisError as e: if timeout == 0: logging.error('[Fetcher] Fetch never connected to the cache. Shutting down...') self.terminationFlag.set() break logging.info('[Fetcher] Cannot connect to serice. Will keep trying for %s seconds', timeout) time.sleep(1) timeout -= 1 continue # then exit if it cannot connect if not self.ping() or self.terminationFlag.is_set(): logging.error('[Fetcher] Cannot connect to the server') return logging.error('[Fetcher] Fetch found the service. Connecting as a client.') block_timeout = 60 #needed to check for service termination (MV-> setting) while True: if self.terminationFlag.is_set(): break # TODO: make this more dynamic try: request = conn.blpop(['request:deshaw', 'request:sim'], block_timeout) except redis.ConnectionError: logging.warning('[Fetcher] LOST local connection. Returning') return if request is None: logging.info('[Fetcher] Fetcher heartbeat.... timeout waiting on requests') continue logging.info('[Fetcher] Fetcher Got a request...') if request[0] == 'request:deshaw': fileno = int(request[1]) key = 'deshaw:%02d' % fileno dcd = deshaw.getDEShawfilename_prot(fileno, fullpath=True) pdb = deshaw.PDB_PROT else: key = 'sim:%s' % request[1] dcd = os.path.join(config.JOBDIR, request[1], request[1] + '.dcd') pdb = dcd.replace('dcd', 'pdb') if conn.exists(key): logging.info('File Load request for existing key: %s', key) continue logging.info('File Request!\n%-20s\n%-20s\n%-20s\n%-20s\n%-20s', request[0], request[1], dcd, pdb, key) logging.info('Loading Trajectory') if os.path.exists(dcd) and os.path.exists(pdb): traj = md.load(dcd, top=pdb) # TODO: Check Memory reporting: # 'used_memory' -> actual memory used by REDIS # 'used_memroy_rss' -> allocated memory (as reported by OS) used_mem = int(conn.info('memory')['used_memory']) available = capacity - used_mem logging.debug('Cache Capacity check: Used= %6dMB Capacity= %6dMB Avail=%d bytes Data=%d', \ (used_mem/(2**20)), (capacity/(2**20)), available, traj.xyz.nbytes) if available >= traj.xyz.nbytes: logging.info('Cache has available capacity') while available < traj.xyz.nbytes: logging.info('Cache is full. Evicting') lru_queue = conn.lrange('cache:lru', 0, -1) index = 1 while True: #Since cache hits perform a LRU update -- the evictor needs to check # the queue for any subsequent hits if index > len(lru_queue): logging.error("Eviction Error. The evictor can't find anything. We may need more capacity (overflow)") break candidate = conn.lpop('cache:lru') found = False # Linear scan to check remainder of the queue for any newer requests for i in range(index, len(lru_cache)): if candidate == lru_cache[i]: # Found a more recent cache request. Moving to next file in LRU: index +=1 found = True if not found: break # Evict this candidate (TODO: Separate the evictor to separate module(s)) conn.delete(candidate) used_mem = int(conn.info('memory')['used_memory']) available = capacity - used_mem logging.debug('Cache Capacity check: Used= %6dMB Capacity= %6dMB Avail=%d bytes Data=', \ (used_mem*(2**20)), (capacity*(2**20)), available, traj.xyz.nbytes) # Insert all xyz coords into cache logging.info('[Cache] Insering %d points to %s', len(traj.xyz), key) pipe = conn.pipeline() for i in traj.xyz: z = pipe.rpush(key, pickle.dumps(i)) pipe.execute() check = conn.llen(key) logging.info('[Cache] CHECK: LLEN= %d for KEY=%s', check, key)
def __init__(self, name): settings = systemsettings() self._name_app = name self.lockfile = '%s_%s.lock' % (self._name_app, 'RedisService') self.isconnected = False self.host = None self.port = None self.pool = None connect_wait = 0 while True: try: with open(self.lockfile, 'r') as conn: conn_string = conn.read().split(',') if len(conn_string) < 5: raise OverlayNotAvailable host = conn_string[0] port = conn_string[1] ttl = float(conn_string[3]) state = conn_string[4] ts = dt.now() if ts.timestamp() > ttl: logging.warning( "Overlay Service Master has expired. Using this at risk!" ) self.pool = redis.ConnectionPool(host=host, port=port, db=0, decode_responses=True) redis.StrictRedis.__init__(self, connection_pool=self.pool, decode_responses=True) logging.info( 'Connection is open... Checking to see if I can connect') self.host = host self.port = port # If set name suceeds. The Db is connected. O/W handle the error below accordingly self.client_setname(getUID()) logging.info( '[Redis Client] Connected as client to master at %s on port %s', host, port) self.isconnected = True break except FileNotFoundError: logging.warning( '[Redis Client] No Lockfile Found. Service unavailable: %s', self.lockfile) raise OverlayNotAvailable except redis.ReadOnlyError as e: logging.warning('[Redis Client] Connecting as read only') self.isconnected = True break except redis.BusyLoadingError as e: logging.warning( '[Redis Client] Current Master is starting up. Standing by.....' ) time.sleep(5) connect_wait += (dt.now() - start).total_seconds() continue except redis.RedisError: logging.warning( '[Redis Client] Service is not running. Cannot get master from lockfile: %s', self.lockfile) raise OverlayNotAvailable
print('Running a long pipeline...') pipe = client.pipeline() for i in range(40000): allkeys = pipe.keys('*') pipe.set('allkeys', str(allkeys)) result = pipe.execute() print('Pipeline complete.') print(client.incr('foo')) print(client.get('foo')) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('name') parser.add_argument('--client', action='store_true') parser.add_argument('--stop', action='store_true') args = parser.parse_args() if args.client: testClient(args.name) sys.exit(0) settings = systemsettings() settings.applyConfig('%s.json' % args.name) server = RedisService(args.name) if args.stop: server.stop() else: server.start()