예제 #1
0
def test_init():
    directory = '/tmp/diskcache/index'
    mapping = {'a': 5, 'b': 4, 'c': 3, 'd': 2, 'e': 1}
    index = dc.Index(None, mapping)

    assert index == mapping

    rmdir(index.directory)
    del index

    rmdir(directory)
    index = dc.Index(directory, mapping)

    assert index.directory == directory
    assert index == mapping

    other = dc.Index(directory)

    assert other == index

    del index
    del other
    rmdir(directory)
    index = dc.Index(directory, mapping.items())

    assert index == mapping

    del index
    rmdir(directory)
    index = dc.Index(directory, a=5, b=4, c=3, d=2, e=1)

    assert index == mapping
예제 #2
0
 def wrapper():
     index = dc.Index()
     try:
         func(index)
     except Exception:
         rmdir(index.directory)
         raise
예제 #3
0
    def __init__(self, secos_path: str):
        super().__init__()
        sys.path.append(secos_path)
        import decompound_server

        self.decompound = decompound_server.make_decompounder(
            [
                "decompound_server.py",
                f"{secos_path}data/denews70M_trigram__candidates",
                f"{secos_path}data/denews70M_trigram__WordCount",
                "50",
                "3",
                "3",
                "5",
                "3",
                "upper",
                "0.01",
                "2020",
            ]
        )

        self.disk_cache = diskcache.Index("secos_cache")
        self.cache = {}

        for key in self.disk_cache:
            self.cache[key] = self.disk_cache[key]
 async def load(self, log=logToConsole):
     # Load any files that exist (or create missing required files)
     for label in os.listdir(self.dbDir):
         self.datasets[label] = {}
         labelDir = os.path.join(self.dbDir, label)
         for ctype in diskCacheIndices:
             cpath = os.path.join(labelDir, ctype + '.diskCacheIndex')
             if os.path.exists(cpath):
                 await log('Loading %s %s...' % (label, ctype))
                 self.datasets[label][ctype] = diskcache.Index(cpath)
             elif ctype in requiredDiskCacheIndices:
                 raise FileNotFoundError(errno.ENOENT,
                                         os.strerror(errno.ENOENT), cpath)
         for ptype in pickles:
             ppath = os.path.join(labelDir, ptype + '.pickle')
             if os.path.exists(ppath):
                 await log('Loading %s %s...' % (label, ptype))
                 if ptype == 'intervalIndexes':
                     await log('(may take a while if %s is large)' % label)
                 self.datasets[label][ptype] = pickle.load(open(
                     ppath, 'rb'))
             elif ptype in requiredPickleDicts:
                 raise FileNotFoundError(errno.ENOENT,
                                         os.strerror(errno.ENOENT), ppath)
         for listType in requiredMetaLists:
             self.datasets[label]['meta'][listType] = self.datasets[label][
                 'meta'].get(listType, [])
def test(status=False):
    if os.environ.get('TRAVIS') == 'true':
        return

    if os.environ.get('APPVEYOR') == 'True':
        return

    random.seed(SEED)
    index = dc.Index(enumerate(range(KEYS)))
    processes = []

    for count in range(8):
        process = mp.Process(target=stress, args=(SEED + count, index))
        process.start()
        processes.append(process)

    for value in it.count():
        time.sleep(1)

        if status:
            print('\r', value, 's', len(index), 'keys', ' ' * 20, end='')

        if all(not process.is_alive() for process in processes):
            break

    if status:
        print('')

    assert all(process.exitcode == 0 for process in processes)
예제 #6
0
    def __init__(self, server_url: str):
        super().__init__()
        self.server_url = server_url

        self.disk_cache = diskcache.Index("secos_cache")
        self.cache = {}

        for key in self.disk_cache:
            self.cache[key] = self.disk_cache[key]
 def createDataset(self, label):
     labelDir = os.path.join(self.dbDir, label)
     if label in self.datasets or os.path.exists(labelDir):
         self.purgeDataset(label)
     self.datasets[label] = {}
     os.makedirs(labelDir)
     for ctype in requiredDiskCacheIndices:
         cpath = os.path.join(labelDir, ctype + '.diskCacheIndex')
         self.datasets[label][ctype] = diskcache.Index(cpath)
     for ptype in requiredPickleDicts:
         self.datasets[label][ptype] = {}
     for listType in requiredMetaLists:
         self.datasets[label]['meta'][listType] = self.datasets[label][
             'meta'].get(listType, [])
예제 #8
0
 def createDataset(self):
     datasetId = self.generateUniqueDatasetId()
     idDir = os.path.join(self.dbDir, datasetId)
     if datasetId in self or os.path.exists(idDir):
         del self[datasetId]
     self.datasets[datasetId] = {}
     os.makedirs(idDir)
     for ctype in requiredDiskCacheIndices:
         cpath = os.path.join(idDir, ctype + '.diskCacheIndex')
         self[datasetId][ctype] = diskcache.Index(cpath)
     for ptype in requiredPickleDicts:
         self[datasetId][ptype] = {}
     for key, defaultValue in defaultInfo.items():
         self[datasetId]['info'][key] = self[datasetId]['info'].get(key, deepcopy(defaultValue))
     self[datasetId]['info']['datasetId'] = datasetId
     return self[datasetId]
예제 #9
0
 async def load(self, log=logToConsole):
     # Load any files that exist (or create missing required files)
     for datasetId in os.listdir(self.dbDir):
         self.datasets[datasetId] = {}
         idDir = os.path.join(self.dbDir, datasetId)
         for ctype in diskCacheIndices:
             cpath = os.path.join(idDir, ctype + '.diskCacheIndex')
             if os.path.exists(cpath):
                 await log('Loading %s %s...' % (datasetId, ctype))
                 self[datasetId][ctype] = diskcache.Index(cpath)
             elif ctype in requiredDiskCacheIndices:
                 raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), cpath)
         for ptype in pickles:
             ppath = os.path.join(idDir, ptype + '.pickle')
             if os.path.exists(ppath):
                 await log('Loading %s %s...' % (datasetId, ptype))
                 self[datasetId][ptype] = pickle.load(open(ppath, 'rb'))
             elif ptype in requiredPickleDicts:
                 raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), ppath)
         for key, defaultValue in defaultInfo.items():
             self[datasetId]['info'][key] = self[datasetId]['info'].get(key, deepcopy(defaultValue))
         self[datasetId]['info']['datasetId'] = datasetId
         await log('Finished loading %s (%s)' % (datasetId, self[datasetId]['info']['label']))
예제 #10
0
                len(reg_q), list(reg_q)))
            logger.debug('{} node(s) in wait queue: {}'.format(
                len(wait_q), list(wait_q)))
            manage_incoming_nodes(node_q, reg_q, wait_q)
            if len(reg_q) > 0:
                drain_msg_queue(reg_q, pub_q, addr='127.0.0.1')

            logger.debug('{} node(s) in node queue: {}'.format(
                len(node_q), list(node_q)))
            logger.debug('{} node(s) in pub queue: {}'.format(
                len(pub_q), list(pub_q)))
            logger.debug('{} node(s) in active queue: {}'.format(
                len(cfg_q), list(cfg_q)))

        except Exception as exc:
            logger.error('peerstate exception was: {}'.format(exc))
            raise exc


cache = dc.Index(get_cachedir())
cfg_q = dc.Deque(directory=get_cachedir('cfg_queue'))
node_q = dc.Deque(directory=get_cachedir('node_queue'))
off_q = dc.Deque(directory=get_cachedir('off_queue'))
wdg_q = dc.Deque(directory=get_cachedir('wedge_queue'))
pub_q = dc.Deque(directory=get_cachedir('pub_queue'))
reg_q = dc.Deque(directory=get_cachedir('reg_queue'))
tmp_q = dc.Deque(directory=get_cachedir('tmp_queue'))
wait_q = dc.Deque(directory=get_cachedir('wait_queue'))
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
예제 #11
0
def index():
    index = dc.Index()
    yield index
    rmdir(index.directory)
예제 #12
0
def do_scheduling():
    set_initial_role()
    network_cruft_cleaner()
    schedule.run_all(1, 'base-tasks')
    validate_role()
    node_role = NODE_SETTINGS['node_role']
    mode = NODE_SETTINGS['mode']
    if node_role is None and mode == 'peer':
        NODE_SETTINGS['use_localhost'] = True

    if mode == 'peer':
        if node_role is None:
            check_time = 33
            baseCheckJob = schedule.every(check_time).seconds
            baseCheckJob.do(run_net_check).tag('base-tasks', 'route-status')

            try:
                data = wait_for_moon(timeout=45)
            except Exception as exc:
                logger.error('ENODATA exception {}'.format(exc))
                put_state_msg('ERROR')

            try:
                handle_moon_data(data)
                put_state_msg('STARTING')
            except MemberNodeError as exc:
                logger.error('ENODATA exception {}'.format(exc))
                put_state_msg('ERROR')

            str_level = logging.getLevelName(logger.getEffectiveLevel())
            logger.debug('Current log level is: {}'.format(str_level))
            startup_handlers()

        else:
            if node_role == 'controller':
                netobj_q = dc.Deque(directory=get_cachedir('netobj_queue'))
                gen_netobj_queue(netobj_q)
                cache = dc.Index(get_cachedir())
                for key_str in ['peer', 'moon', 'mstate']:
                    delete_cache_entry(cache, key_str)

            elif node_role == 'moon':
                cln_q = dc.Deque(directory=get_cachedir('clean_queue'))
                pub_q = dc.Deque(directory=get_cachedir('pub_queue'))
                schedule.every(37).seconds.do(run_cleanup_check, cln_q,
                                              pub_q).tag(
                                                  'chk-tasks', 'cleanup')
                schedule.every(15).minutes.do(check_daemon_status).tag(
                    'chk-tasks', 'responder')

            schedule.every(15).minutes.do(check_daemon_status,
                                          script='msg_subscriber.py').tag(
                                              'chk-tasks', 'subscriber')
            schedule.run_all(1, 'chk-tasks')

    elif mode == 'adhoc':
        logger.debug('Running in adhoc mode...')
        if NODE_SETTINGS['nwid']:
            logger.debug('ADHOC: found network {}'.format(
                NODE_SETTINGS['nwid']))
            do_startup(NODE_SETTINGS['nwid'])
        else:
            logger.error('No network ID found in NODE_SETTINGS!!')
            logger.error('Have you created a network yet?')

    logger.debug('MODE: startup mode is {} and role is {}'.format(
        mode, node_role))
    logger.info(
        'You are running fpnd/node_tools version {}'.format(fpnd_version))

    while True:
        schedule.run_pending()
        time.sleep(1)
예제 #13
0
        self.date = datetime.strptime(response.headers["Date"],
                                      "%a, %d %b %Y %H:%M:%S %Z")


# Global objects which keep track of the wait time needed for the x_rate_policy
# The lock should be used for accessing, waiting, and modifying the wait time
# Asyncio gets mad if you create a lock in a different loop, so i need to enumerate
# the locks by loop.
# This will not work with multithreading/processsing
locks_by_policy: Dict[AbstractEventLoop,
                      Dict[str, Lock]] = defaultdict(lambda: defaultdict(Lock))

tqdm_by_policy: Dict[str, tqdm] = dict()

wait_times_by_policy: diskcache.Index = diskcache.Index(
    os.path.join(__diskcache_path__, f"x_rate_response"))


class Waiter:
    policy: str
    tqdm: tqdm

    def __init__(self, policy: str):
        self.policy = policy
        loaded_wait = max(0, wait_times_by_policy[self.policy] - time.time())

        self.tqdm = tqdm(total=int(loaded_wait), desc=f"{policy}-wait")
        if loaded_wait > 0:
            logging.info(
                f"Found existing wait time of {loaded_wait:02f} for {policy}")
예제 #14
0
def test():
    random.seed(SEED)
    mapping = co.OrderedDict(enumerate(range(KEYS)))
    index = dc.Index(enumerate(range(KEYS)))
    stress(mapping, index)
    assert mapping == index
async def processOtf2(self, label, file, storeEvents=False, log=logToConsole):
    self.addSourceFile(label, file.name, 'otf2')

    # Set up database files
    labelDir = os.path.join(self.dbDir, label)
    primitives = self.datasets[label]['primitives']
    intervals = self.datasets[label]['intervals'] = diskcache.Index(
        os.path.join(labelDir, 'intervals.diskCacheIndex'))
    intervalIndexes = self.datasets[label]['intervalIndexes'] = {
        'primitives': {},
        'locations': {},
        'both': {}
    }
    procMetrics = self.datasets[label]['procMetrics'] = diskcache.Index(
        os.path.join(labelDir, 'procMetrics.diskCacheIndex'))
    guids = self.datasets[label]['guids'] = diskcache.Index(
        os.path.join(labelDir, 'guids.diskCacheIndex'))
    self.datasets[label]['meta']['storedEvents'] = storeEvents
    if storeEvents:
        self.datasets[label]['events'] = diskcache.Index(
            os.path.join(labelDir, 'events.diskCacheIndex'))

    # Temporary counters / lists for sorting
    numEvents = 0
    self.sortedEventsByLocation = {}
    await log('Parsing OTF2 events (.=2500 events)')
    newR = seenR = 0
    currentEvent = None
    includedMetrics = 0
    skippedMetricsForMissingPrior = 0
    skippedMetricsForMismatch = 0

    async for line in file:
        eventLineMatch = eventLineParser.match(line)
        addAttrLineMatch = addAttrLineParser.match(line)
        metricLineMatch = metricLineParser.match(line)
        if currentEvent is None and eventLineMatch is None and metricLineMatch is None:
            # This is a blank / header line
            continue

        if metricLineMatch is not None:
            # This is a metric line
            location = metricLineMatch.group(1)
            timestamp = int(metricLineMatch.group(2))
            metricType = metricLineMatch.group(3)
            value = int(float(metricLineMatch.group(4)))

            if metricType.startswith('PAPI'):
                if currentEvent is None:
                    skippedMetricsForMissingPrior += 1
                elif currentEvent['Timestamp'] != timestamp or currentEvent[
                        'Location'] != location:  #pylint: disable=unsubscriptable-object
                    skippedMetricsForMismatch += 1
                else:
                    includedMetrics += 1
                    currentEvent['metrics'][metricType] = value  #pylint: disable=unsubscriptable-object
            else:  # do the other meminfo status io parsing here
                if metricType not in procMetrics:
                    procMetrics[metricType] = {}
                    if 'procMetricList' not in procMetrics:
                        procMetrics['procMetricList'] = []
                    pm = procMetrics['procMetricList']
                    pm.append(metricType)
                    procMetrics['procMetricList'] = pm
                val = procMetrics[metricType]
                val[str(timestamp)] = {'Timestamp': timestamp, 'Value': value}
                procMetrics[metricType] = val
        elif eventLineMatch is not None:
            # This is the beginning of a new event; process the previous one
            if currentEvent is not None:
                counts = self.processEvent(label, currentEvent, str(numEvents))
                # Log that we've processed another event
                numEvents += 1
                if numEvents % 2500 == 0:
                    await log('.', end='')
                if numEvents % 100000 == 0:
                    await log('processed %i events' % numEvents)
                # Add to primitive / guid counts
                newR += counts[0]
                seenR += counts[1]
            currentEvent = {'metrics': {}}
            currentEvent['Event'] = eventLineMatch.group(1)
            currentEvent['Location'] = eventLineMatch.group(2)
            currentEvent['Timestamp'] = int(eventLineMatch.group(3))
            attrs = eventLineMatch.group(4)
            for attrMatch in re.finditer(attrParsers[currentEvent['Event']],
                                         attrs):
                currentEvent[attrMatch.group(1)] = attrMatch.group(2)
        else:
            # This line contains additional event attributes
            if currentEvent is None or addAttrLineMatch is None:
                print(currentEvent)
                print(addAttrLineMatch)
                print(line)
            assert currentEvent is not None and addAttrLineMatch is not None
            attrList = addAttrSplitter.split(addAttrLineMatch.group(1))
            for attrStr in attrList:
                attr = addAttrParser.match(attrStr)
                assert attr is not None
                currentEvent[attr.group(1)] = attr.group(2)  #pylint: disable=unsupported-assignment-operation
    # The last event will never have had a chance to be processed:
    if currentEvent is not None:
        counts = self.processEvent(label, currentEvent, str(numEvents))
        newR += counts[0]
        seenR += counts[1]
    await log('')
    await log('Finished processing %i events' % numEvents)
    await log('New primitives: %d, References to existing primitives: %d' %
              (newR, seenR))
    await log(
        'Metrics included: %d; skpped for no prior ENTER: %d; skipped for mismatch: %d'
        % (includedMetrics, skippedMetricsForMissingPrior,
           skippedMetricsForMismatch))

    # Now that we've seen all the locations, store that list in our metadata
    locationNames = self.datasets[label]['meta']['locationNames'] = sorted(
        self.sortedEventsByLocation.keys())

    # Combine the sorted enter / leave events into intervals
    await log(
        'Combining enter / leave events into intervals (.=2500 intervals)')
    numIntervals = mismatchedIntervals = 0
    for location, eventList in self.sortedEventsByLocation.items():
        lastEvent = None
        for _, event in eventList:
            assert event is not None
            if event['Event'] == 'ENTER':
                # Start an interval (don't output anything)
                if lastEvent is not None:
                    # TODO: factorial data used to trigger this... why?
                    await log(
                        'WARNING: omitting ENTER event without a following LEAVE event (%s)'
                        % lastEvent['name'])  #pylint: disable=unsubscriptable-object
                lastEvent = event
            elif event['Event'] == 'LEAVE':
                # Finish a interval
                if lastEvent is None:
                    # TODO: factorial data used to trigger this... why?
                    await log(
                        'WARNING: omitting LEAVE event without a prior ENTER event (%s)'
                        % event['name'])
                    continue
                intervalId = str(numIntervals)
                currentInterval = {
                    'enter': {},
                    'leave': {},
                    'intervalId': intervalId
                }
                # Copy all of the attributes from the OTF2 events into the interval object. If the values
                # differ (or it's the timestamp), put them in nested enter / leave objects. Otherwise, put
                # them directly in the interval object
                for attr in set(event.keys()).union(lastEvent.keys()):
                    if attr not in event:
                        currentInterval['enter'][attr] = lastEvent[attr]  #pylint: disable=unsubscriptable-object
                    elif attr not in lastEvent:  #pylint: disable=E1135
                        currentInterval['leave'][attr] = event[attr]
                    elif attr != 'Timestamp' and event[attr] == lastEvent[attr]:  #pylint: disable=unsubscriptable-object
                        currentInterval[attr] = event[attr]
                    else:
                        currentInterval['enter'][attr] = lastEvent[attr]  #pylint: disable=unsubscriptable-object
                        currentInterval['leave'][attr] = event[attr]
                # Count whether the primitive attribute differed between enter / leave
                if 'Primitive' not in currentInterval:
                    mismatchedIntervals += 1
                intervals[intervalId] = currentInterval

                # Log that we've finished the finished interval
                numIntervals += 1
                if numIntervals % 2500 == 0:
                    await log('.', end='')
                if numIntervals % 100000 == 0:
                    await log('processed %i intervals' % numIntervals)
                lastEvent = None
        # Make sure there are no trailing ENTER events
        if lastEvent is not None:
            # TODO: fibonacci data triggers this... why?
            await log('WARNING: omitting trailing ENTER event (%s)' %
                      lastEvent['Primitive'])
    del self.sortedEventsByLocation
    await log('')
    await log(
        'Finished creating %i intervals; %i refer to mismatching primitives' %
        (numIntervals, mismatchedIntervals))

    # Now for indexing: we want per-location indexes, per-primitive indexes,
    # as well as both filters at the same time (we key by locations first)
    # TODO: these are all built in memory... should probably find a way to
    # make a diskcache-like version of IntervalTree:
    for location in locationNames:
        intervalIndexes['locations'][location] = IntervalTree()
        intervalIndexes['both'][location] = {}
    for primitive in primitives.keys():
        intervalIndexes['primitives'][primitive] = IntervalTree()
        for location in locationNames:
            intervalIndexes['both'][location][primitive] = IntervalTree()

    await log('Assembling interval indexes (.=2500 intervals)')
    count = 0

    async def intervalIterator():
        nonlocal count
        for intervalId, intervalObj in intervals.items():
            enter = intervalObj['enter']['Timestamp']
            leave = intervalObj['leave']['Timestamp'] + 1
            # Need to add one because IntervalTree can't handle zero-length intervals
            # (and because IntervalTree is not inclusive of upper bounds in queries)

            iv = Interval(enter, leave, intervalId)

            # Add the interval to the appropriate indexes (piggybacked off
            # the construction of the main index):
            location = intervalObj['Location']
            intervalIndexes['locations'][location].add(iv)
            if 'Primitive' in intervalObj:
                intervalIndexes['primitives'][intervalObj['Primitive']].add(iv)
                intervalIndexes['both'][location][
                    intervalObj['Primitive']].add(iv)
            elif 'Primitive' in intervalObj['enter']:
                intervalIndexes['primitives'][intervalObj['enter']
                                              ['Primitive']].add(iv)
                intervalIndexes['both'][location][intervalObj['enter']
                                                  ['Primitive']].add(iv)

            count += 1
            if count % 2500 == 0:
                await log('.', end='')
            if count % 100000 == 0:
                await log('processed %i intervals' % count)

            yield iv

    # Iterate through all intervals to construct the main index:
    intervalIndexes['main'] = IntervalTree(
        [iv async for iv in intervalIterator()])

    # Store the domain of the data from the computed index as metadata
    self.datasets[label]['meta']['intervalDomain'] = [
        intervalIndexes['main'].top_node.begin,
        intervalIndexes['main'].top_node.end
    ]
    await log('')
    await log('Finished indexing %i intervals' % count)

    await log('Connecting intervals with the same GUID (.=2500 intervals)')
    intervalCount = missingCount = newLinks = seenLinks = 0
    for iv in intervalIndexes['main'].iterOverlap(endOrder=True):
        intervalId = iv.data
        intervalObj = intervals[intervalId]

        # Parent GUIDs refer to the one in the enter event, not the leave event
        guid = intervalObj.get('GUID', intervalObj['enter'].get('GUID', None))

        if guid is None:
            missingCount += 1
        else:
            if not guid in guids:
                guids[guid] = []
            guids[guid] = guids[guid] + [intervalId]

        # Connect to most recent interval with the parent GUID
        parentGuid = intervalObj.get(
            'Parent GUID', intervalObj['enter'].get('Parent GUID', None))

        if parentGuid is not None and parentGuid in guids:
            foundPrior = False
            for parentIntervalId in reversed(guids[parentGuid]):
                parentInterval = intervals[parentIntervalId]
                if parentInterval['enter']['Timestamp'] <= intervalObj[
                        'enter']['Timestamp']:
                    foundPrior = True
                    intervalCount += 1
                    # Store metadata about the most recent interval
                    intervalObj['lastParentInterval'] = {
                        'id': parentIntervalId,
                        'location': parentInterval['Location'],
                        'endTimestamp': parentInterval['leave']['Timestamp']
                    }
                    # Because intervals is a diskcache, it needs a copy to know that something changed
                    intervals[intervalId] = intervalObj.copy()

                    # While we're here, note the parent-child link in the primitive graph
                    # (for now, only assume links from the parent's leave interval to the
                    # child's enter when primitive names are mismatched)
                    child = intervalObj.get(
                        'Primitive',
                        intervalObj['enter'].get('Primitive', None))
                    parent = parentInterval.get(
                        'Primitive',
                        intervalObj['leave'].get('Primitive', None))
                    if child is not None and parent is not None:
                        l = self.addPrimitiveChild(label, parent, child,
                                                   'otf2')[1]
                        newLinks += l
                        seenLinks += 1 if l == 0 else 0
                    break
            if not foundPrior:
                missingCount += 1
        else:
            missingCount += 1

        if (missingCount + intervalCount) % 2500 == 0:
            await log('.', end='')
        if (missingCount + intervalCount) % 100000 == 0:
            await log('processed %i intervals' % (missingCount + intervalCount)
                      )

    await log('Finished connecting intervals')
    await log(
        'Interval links created: %i, Intervals without prior parent GUIDs: %i'
        % (intervalCount, missingCount))
    await log(
        'New primitive links based on GUIDs: %d, Observed existing links: %d' %
        (newLinks, seenLinks))