def test_init(): directory = '/tmp/diskcache/index' mapping = {'a': 5, 'b': 4, 'c': 3, 'd': 2, 'e': 1} index = dc.Index(None, mapping) assert index == mapping rmdir(index.directory) del index rmdir(directory) index = dc.Index(directory, mapping) assert index.directory == directory assert index == mapping other = dc.Index(directory) assert other == index del index del other rmdir(directory) index = dc.Index(directory, mapping.items()) assert index == mapping del index rmdir(directory) index = dc.Index(directory, a=5, b=4, c=3, d=2, e=1) assert index == mapping
def wrapper(): index = dc.Index() try: func(index) except Exception: rmdir(index.directory) raise
def __init__(self, secos_path: str): super().__init__() sys.path.append(secos_path) import decompound_server self.decompound = decompound_server.make_decompounder( [ "decompound_server.py", f"{secos_path}data/denews70M_trigram__candidates", f"{secos_path}data/denews70M_trigram__WordCount", "50", "3", "3", "5", "3", "upper", "0.01", "2020", ] ) self.disk_cache = diskcache.Index("secos_cache") self.cache = {} for key in self.disk_cache: self.cache[key] = self.disk_cache[key]
async def load(self, log=logToConsole): # Load any files that exist (or create missing required files) for label in os.listdir(self.dbDir): self.datasets[label] = {} labelDir = os.path.join(self.dbDir, label) for ctype in diskCacheIndices: cpath = os.path.join(labelDir, ctype + '.diskCacheIndex') if os.path.exists(cpath): await log('Loading %s %s...' % (label, ctype)) self.datasets[label][ctype] = diskcache.Index(cpath) elif ctype in requiredDiskCacheIndices: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), cpath) for ptype in pickles: ppath = os.path.join(labelDir, ptype + '.pickle') if os.path.exists(ppath): await log('Loading %s %s...' % (label, ptype)) if ptype == 'intervalIndexes': await log('(may take a while if %s is large)' % label) self.datasets[label][ptype] = pickle.load(open( ppath, 'rb')) elif ptype in requiredPickleDicts: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), ppath) for listType in requiredMetaLists: self.datasets[label]['meta'][listType] = self.datasets[label][ 'meta'].get(listType, [])
def test(status=False): if os.environ.get('TRAVIS') == 'true': return if os.environ.get('APPVEYOR') == 'True': return random.seed(SEED) index = dc.Index(enumerate(range(KEYS))) processes = [] for count in range(8): process = mp.Process(target=stress, args=(SEED + count, index)) process.start() processes.append(process) for value in it.count(): time.sleep(1) if status: print('\r', value, 's', len(index), 'keys', ' ' * 20, end='') if all(not process.is_alive() for process in processes): break if status: print('') assert all(process.exitcode == 0 for process in processes)
def __init__(self, server_url: str): super().__init__() self.server_url = server_url self.disk_cache = diskcache.Index("secos_cache") self.cache = {} for key in self.disk_cache: self.cache[key] = self.disk_cache[key]
def createDataset(self, label): labelDir = os.path.join(self.dbDir, label) if label in self.datasets or os.path.exists(labelDir): self.purgeDataset(label) self.datasets[label] = {} os.makedirs(labelDir) for ctype in requiredDiskCacheIndices: cpath = os.path.join(labelDir, ctype + '.diskCacheIndex') self.datasets[label][ctype] = diskcache.Index(cpath) for ptype in requiredPickleDicts: self.datasets[label][ptype] = {} for listType in requiredMetaLists: self.datasets[label]['meta'][listType] = self.datasets[label][ 'meta'].get(listType, [])
def createDataset(self): datasetId = self.generateUniqueDatasetId() idDir = os.path.join(self.dbDir, datasetId) if datasetId in self or os.path.exists(idDir): del self[datasetId] self.datasets[datasetId] = {} os.makedirs(idDir) for ctype in requiredDiskCacheIndices: cpath = os.path.join(idDir, ctype + '.diskCacheIndex') self[datasetId][ctype] = diskcache.Index(cpath) for ptype in requiredPickleDicts: self[datasetId][ptype] = {} for key, defaultValue in defaultInfo.items(): self[datasetId]['info'][key] = self[datasetId]['info'].get(key, deepcopy(defaultValue)) self[datasetId]['info']['datasetId'] = datasetId return self[datasetId]
async def load(self, log=logToConsole): # Load any files that exist (or create missing required files) for datasetId in os.listdir(self.dbDir): self.datasets[datasetId] = {} idDir = os.path.join(self.dbDir, datasetId) for ctype in diskCacheIndices: cpath = os.path.join(idDir, ctype + '.diskCacheIndex') if os.path.exists(cpath): await log('Loading %s %s...' % (datasetId, ctype)) self[datasetId][ctype] = diskcache.Index(cpath) elif ctype in requiredDiskCacheIndices: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), cpath) for ptype in pickles: ppath = os.path.join(idDir, ptype + '.pickle') if os.path.exists(ppath): await log('Loading %s %s...' % (datasetId, ptype)) self[datasetId][ptype] = pickle.load(open(ppath, 'rb')) elif ptype in requiredPickleDicts: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), ppath) for key, defaultValue in defaultInfo.items(): self[datasetId]['info'][key] = self[datasetId]['info'].get(key, deepcopy(defaultValue)) self[datasetId]['info']['datasetId'] = datasetId await log('Finished loading %s (%s)' % (datasetId, self[datasetId]['info']['label']))
len(reg_q), list(reg_q))) logger.debug('{} node(s) in wait queue: {}'.format( len(wait_q), list(wait_q))) manage_incoming_nodes(node_q, reg_q, wait_q) if len(reg_q) > 0: drain_msg_queue(reg_q, pub_q, addr='127.0.0.1') logger.debug('{} node(s) in node queue: {}'.format( len(node_q), list(node_q))) logger.debug('{} node(s) in pub queue: {}'.format( len(pub_q), list(pub_q))) logger.debug('{} node(s) in active queue: {}'.format( len(cfg_q), list(cfg_q))) except Exception as exc: logger.error('peerstate exception was: {}'.format(exc)) raise exc cache = dc.Index(get_cachedir()) cfg_q = dc.Deque(directory=get_cachedir('cfg_queue')) node_q = dc.Deque(directory=get_cachedir('node_queue')) off_q = dc.Deque(directory=get_cachedir('off_queue')) wdg_q = dc.Deque(directory=get_cachedir('wedge_queue')) pub_q = dc.Deque(directory=get_cachedir('pub_queue')) reg_q = dc.Deque(directory=get_cachedir('reg_queue')) tmp_q = dc.Deque(directory=get_cachedir('tmp_queue')) wait_q = dc.Deque(directory=get_cachedir('wait_queue')) loop = asyncio.get_event_loop() loop.run_until_complete(main())
def index(): index = dc.Index() yield index rmdir(index.directory)
def do_scheduling(): set_initial_role() network_cruft_cleaner() schedule.run_all(1, 'base-tasks') validate_role() node_role = NODE_SETTINGS['node_role'] mode = NODE_SETTINGS['mode'] if node_role is None and mode == 'peer': NODE_SETTINGS['use_localhost'] = True if mode == 'peer': if node_role is None: check_time = 33 baseCheckJob = schedule.every(check_time).seconds baseCheckJob.do(run_net_check).tag('base-tasks', 'route-status') try: data = wait_for_moon(timeout=45) except Exception as exc: logger.error('ENODATA exception {}'.format(exc)) put_state_msg('ERROR') try: handle_moon_data(data) put_state_msg('STARTING') except MemberNodeError as exc: logger.error('ENODATA exception {}'.format(exc)) put_state_msg('ERROR') str_level = logging.getLevelName(logger.getEffectiveLevel()) logger.debug('Current log level is: {}'.format(str_level)) startup_handlers() else: if node_role == 'controller': netobj_q = dc.Deque(directory=get_cachedir('netobj_queue')) gen_netobj_queue(netobj_q) cache = dc.Index(get_cachedir()) for key_str in ['peer', 'moon', 'mstate']: delete_cache_entry(cache, key_str) elif node_role == 'moon': cln_q = dc.Deque(directory=get_cachedir('clean_queue')) pub_q = dc.Deque(directory=get_cachedir('pub_queue')) schedule.every(37).seconds.do(run_cleanup_check, cln_q, pub_q).tag( 'chk-tasks', 'cleanup') schedule.every(15).minutes.do(check_daemon_status).tag( 'chk-tasks', 'responder') schedule.every(15).minutes.do(check_daemon_status, script='msg_subscriber.py').tag( 'chk-tasks', 'subscriber') schedule.run_all(1, 'chk-tasks') elif mode == 'adhoc': logger.debug('Running in adhoc mode...') if NODE_SETTINGS['nwid']: logger.debug('ADHOC: found network {}'.format( NODE_SETTINGS['nwid'])) do_startup(NODE_SETTINGS['nwid']) else: logger.error('No network ID found in NODE_SETTINGS!!') logger.error('Have you created a network yet?') logger.debug('MODE: startup mode is {} and role is {}'.format( mode, node_role)) logger.info( 'You are running fpnd/node_tools version {}'.format(fpnd_version)) while True: schedule.run_pending() time.sleep(1)
self.date = datetime.strptime(response.headers["Date"], "%a, %d %b %Y %H:%M:%S %Z") # Global objects which keep track of the wait time needed for the x_rate_policy # The lock should be used for accessing, waiting, and modifying the wait time # Asyncio gets mad if you create a lock in a different loop, so i need to enumerate # the locks by loop. # This will not work with multithreading/processsing locks_by_policy: Dict[AbstractEventLoop, Dict[str, Lock]] = defaultdict(lambda: defaultdict(Lock)) tqdm_by_policy: Dict[str, tqdm] = dict() wait_times_by_policy: diskcache.Index = diskcache.Index( os.path.join(__diskcache_path__, f"x_rate_response")) class Waiter: policy: str tqdm: tqdm def __init__(self, policy: str): self.policy = policy loaded_wait = max(0, wait_times_by_policy[self.policy] - time.time()) self.tqdm = tqdm(total=int(loaded_wait), desc=f"{policy}-wait") if loaded_wait > 0: logging.info( f"Found existing wait time of {loaded_wait:02f} for {policy}")
def test(): random.seed(SEED) mapping = co.OrderedDict(enumerate(range(KEYS))) index = dc.Index(enumerate(range(KEYS))) stress(mapping, index) assert mapping == index
async def processOtf2(self, label, file, storeEvents=False, log=logToConsole): self.addSourceFile(label, file.name, 'otf2') # Set up database files labelDir = os.path.join(self.dbDir, label) primitives = self.datasets[label]['primitives'] intervals = self.datasets[label]['intervals'] = diskcache.Index( os.path.join(labelDir, 'intervals.diskCacheIndex')) intervalIndexes = self.datasets[label]['intervalIndexes'] = { 'primitives': {}, 'locations': {}, 'both': {} } procMetrics = self.datasets[label]['procMetrics'] = diskcache.Index( os.path.join(labelDir, 'procMetrics.diskCacheIndex')) guids = self.datasets[label]['guids'] = diskcache.Index( os.path.join(labelDir, 'guids.diskCacheIndex')) self.datasets[label]['meta']['storedEvents'] = storeEvents if storeEvents: self.datasets[label]['events'] = diskcache.Index( os.path.join(labelDir, 'events.diskCacheIndex')) # Temporary counters / lists for sorting numEvents = 0 self.sortedEventsByLocation = {} await log('Parsing OTF2 events (.=2500 events)') newR = seenR = 0 currentEvent = None includedMetrics = 0 skippedMetricsForMissingPrior = 0 skippedMetricsForMismatch = 0 async for line in file: eventLineMatch = eventLineParser.match(line) addAttrLineMatch = addAttrLineParser.match(line) metricLineMatch = metricLineParser.match(line) if currentEvent is None and eventLineMatch is None and metricLineMatch is None: # This is a blank / header line continue if metricLineMatch is not None: # This is a metric line location = metricLineMatch.group(1) timestamp = int(metricLineMatch.group(2)) metricType = metricLineMatch.group(3) value = int(float(metricLineMatch.group(4))) if metricType.startswith('PAPI'): if currentEvent is None: skippedMetricsForMissingPrior += 1 elif currentEvent['Timestamp'] != timestamp or currentEvent[ 'Location'] != location: #pylint: disable=unsubscriptable-object skippedMetricsForMismatch += 1 else: includedMetrics += 1 currentEvent['metrics'][metricType] = value #pylint: disable=unsubscriptable-object else: # do the other meminfo status io parsing here if metricType not in procMetrics: procMetrics[metricType] = {} if 'procMetricList' not in procMetrics: procMetrics['procMetricList'] = [] pm = procMetrics['procMetricList'] pm.append(metricType) procMetrics['procMetricList'] = pm val = procMetrics[metricType] val[str(timestamp)] = {'Timestamp': timestamp, 'Value': value} procMetrics[metricType] = val elif eventLineMatch is not None: # This is the beginning of a new event; process the previous one if currentEvent is not None: counts = self.processEvent(label, currentEvent, str(numEvents)) # Log that we've processed another event numEvents += 1 if numEvents % 2500 == 0: await log('.', end='') if numEvents % 100000 == 0: await log('processed %i events' % numEvents) # Add to primitive / guid counts newR += counts[0] seenR += counts[1] currentEvent = {'metrics': {}} currentEvent['Event'] = eventLineMatch.group(1) currentEvent['Location'] = eventLineMatch.group(2) currentEvent['Timestamp'] = int(eventLineMatch.group(3)) attrs = eventLineMatch.group(4) for attrMatch in re.finditer(attrParsers[currentEvent['Event']], attrs): currentEvent[attrMatch.group(1)] = attrMatch.group(2) else: # This line contains additional event attributes if currentEvent is None or addAttrLineMatch is None: print(currentEvent) print(addAttrLineMatch) print(line) assert currentEvent is not None and addAttrLineMatch is not None attrList = addAttrSplitter.split(addAttrLineMatch.group(1)) for attrStr in attrList: attr = addAttrParser.match(attrStr) assert attr is not None currentEvent[attr.group(1)] = attr.group(2) #pylint: disable=unsupported-assignment-operation # The last event will never have had a chance to be processed: if currentEvent is not None: counts = self.processEvent(label, currentEvent, str(numEvents)) newR += counts[0] seenR += counts[1] await log('') await log('Finished processing %i events' % numEvents) await log('New primitives: %d, References to existing primitives: %d' % (newR, seenR)) await log( 'Metrics included: %d; skpped for no prior ENTER: %d; skipped for mismatch: %d' % (includedMetrics, skippedMetricsForMissingPrior, skippedMetricsForMismatch)) # Now that we've seen all the locations, store that list in our metadata locationNames = self.datasets[label]['meta']['locationNames'] = sorted( self.sortedEventsByLocation.keys()) # Combine the sorted enter / leave events into intervals await log( 'Combining enter / leave events into intervals (.=2500 intervals)') numIntervals = mismatchedIntervals = 0 for location, eventList in self.sortedEventsByLocation.items(): lastEvent = None for _, event in eventList: assert event is not None if event['Event'] == 'ENTER': # Start an interval (don't output anything) if lastEvent is not None: # TODO: factorial data used to trigger this... why? await log( 'WARNING: omitting ENTER event without a following LEAVE event (%s)' % lastEvent['name']) #pylint: disable=unsubscriptable-object lastEvent = event elif event['Event'] == 'LEAVE': # Finish a interval if lastEvent is None: # TODO: factorial data used to trigger this... why? await log( 'WARNING: omitting LEAVE event without a prior ENTER event (%s)' % event['name']) continue intervalId = str(numIntervals) currentInterval = { 'enter': {}, 'leave': {}, 'intervalId': intervalId } # Copy all of the attributes from the OTF2 events into the interval object. If the values # differ (or it's the timestamp), put them in nested enter / leave objects. Otherwise, put # them directly in the interval object for attr in set(event.keys()).union(lastEvent.keys()): if attr not in event: currentInterval['enter'][attr] = lastEvent[attr] #pylint: disable=unsubscriptable-object elif attr not in lastEvent: #pylint: disable=E1135 currentInterval['leave'][attr] = event[attr] elif attr != 'Timestamp' and event[attr] == lastEvent[attr]: #pylint: disable=unsubscriptable-object currentInterval[attr] = event[attr] else: currentInterval['enter'][attr] = lastEvent[attr] #pylint: disable=unsubscriptable-object currentInterval['leave'][attr] = event[attr] # Count whether the primitive attribute differed between enter / leave if 'Primitive' not in currentInterval: mismatchedIntervals += 1 intervals[intervalId] = currentInterval # Log that we've finished the finished interval numIntervals += 1 if numIntervals % 2500 == 0: await log('.', end='') if numIntervals % 100000 == 0: await log('processed %i intervals' % numIntervals) lastEvent = None # Make sure there are no trailing ENTER events if lastEvent is not None: # TODO: fibonacci data triggers this... why? await log('WARNING: omitting trailing ENTER event (%s)' % lastEvent['Primitive']) del self.sortedEventsByLocation await log('') await log( 'Finished creating %i intervals; %i refer to mismatching primitives' % (numIntervals, mismatchedIntervals)) # Now for indexing: we want per-location indexes, per-primitive indexes, # as well as both filters at the same time (we key by locations first) # TODO: these are all built in memory... should probably find a way to # make a diskcache-like version of IntervalTree: for location in locationNames: intervalIndexes['locations'][location] = IntervalTree() intervalIndexes['both'][location] = {} for primitive in primitives.keys(): intervalIndexes['primitives'][primitive] = IntervalTree() for location in locationNames: intervalIndexes['both'][location][primitive] = IntervalTree() await log('Assembling interval indexes (.=2500 intervals)') count = 0 async def intervalIterator(): nonlocal count for intervalId, intervalObj in intervals.items(): enter = intervalObj['enter']['Timestamp'] leave = intervalObj['leave']['Timestamp'] + 1 # Need to add one because IntervalTree can't handle zero-length intervals # (and because IntervalTree is not inclusive of upper bounds in queries) iv = Interval(enter, leave, intervalId) # Add the interval to the appropriate indexes (piggybacked off # the construction of the main index): location = intervalObj['Location'] intervalIndexes['locations'][location].add(iv) if 'Primitive' in intervalObj: intervalIndexes['primitives'][intervalObj['Primitive']].add(iv) intervalIndexes['both'][location][ intervalObj['Primitive']].add(iv) elif 'Primitive' in intervalObj['enter']: intervalIndexes['primitives'][intervalObj['enter'] ['Primitive']].add(iv) intervalIndexes['both'][location][intervalObj['enter'] ['Primitive']].add(iv) count += 1 if count % 2500 == 0: await log('.', end='') if count % 100000 == 0: await log('processed %i intervals' % count) yield iv # Iterate through all intervals to construct the main index: intervalIndexes['main'] = IntervalTree( [iv async for iv in intervalIterator()]) # Store the domain of the data from the computed index as metadata self.datasets[label]['meta']['intervalDomain'] = [ intervalIndexes['main'].top_node.begin, intervalIndexes['main'].top_node.end ] await log('') await log('Finished indexing %i intervals' % count) await log('Connecting intervals with the same GUID (.=2500 intervals)') intervalCount = missingCount = newLinks = seenLinks = 0 for iv in intervalIndexes['main'].iterOverlap(endOrder=True): intervalId = iv.data intervalObj = intervals[intervalId] # Parent GUIDs refer to the one in the enter event, not the leave event guid = intervalObj.get('GUID', intervalObj['enter'].get('GUID', None)) if guid is None: missingCount += 1 else: if not guid in guids: guids[guid] = [] guids[guid] = guids[guid] + [intervalId] # Connect to most recent interval with the parent GUID parentGuid = intervalObj.get( 'Parent GUID', intervalObj['enter'].get('Parent GUID', None)) if parentGuid is not None and parentGuid in guids: foundPrior = False for parentIntervalId in reversed(guids[parentGuid]): parentInterval = intervals[parentIntervalId] if parentInterval['enter']['Timestamp'] <= intervalObj[ 'enter']['Timestamp']: foundPrior = True intervalCount += 1 # Store metadata about the most recent interval intervalObj['lastParentInterval'] = { 'id': parentIntervalId, 'location': parentInterval['Location'], 'endTimestamp': parentInterval['leave']['Timestamp'] } # Because intervals is a diskcache, it needs a copy to know that something changed intervals[intervalId] = intervalObj.copy() # While we're here, note the parent-child link in the primitive graph # (for now, only assume links from the parent's leave interval to the # child's enter when primitive names are mismatched) child = intervalObj.get( 'Primitive', intervalObj['enter'].get('Primitive', None)) parent = parentInterval.get( 'Primitive', intervalObj['leave'].get('Primitive', None)) if child is not None and parent is not None: l = self.addPrimitiveChild(label, parent, child, 'otf2')[1] newLinks += l seenLinks += 1 if l == 0 else 0 break if not foundPrior: missingCount += 1 else: missingCount += 1 if (missingCount + intervalCount) % 2500 == 0: await log('.', end='') if (missingCount + intervalCount) % 100000 == 0: await log('processed %i intervals' % (missingCount + intervalCount) ) await log('Finished connecting intervals') await log( 'Interval links created: %i, Intervals without prior parent GUIDs: %i' % (intervalCount, missingCount)) await log( 'New primitive links based on GUIDs: %d, Observed existing links: %d' % (newLinks, seenLinks))