def test_irange():
    ss = SortedSet(load=7)

    assert [] == list(ss.irange())

    values = list(range(53))
    ss.update(values)

    for start in range(53):
        for end in range(start, 53):
            assert list(ss.irange(start, end)) == values[start:(end + 1)]
            assert list(ss.irange(start, end, reverse=True)) == values[start:(end + 1)][::-1]

    for start in range(53):
        for end in range(start, 53):
            assert list(range(start, end)) == list(ss.irange(start, end, (True, False)))

    for start in range(53):
        for end in range(start, 53):
            assert list(range(start + 1, end + 1)) == list(ss.irange(start, end, (False, True)))

    for start in range(53):
        for end in range(start, 53):
            assert list(range(start + 1, end)) == list(ss.irange(start, end, (False, False)))

    for start in range(53):
        assert list(range(start, 53)) == list(ss.irange(start))

    for end in range(53):
        assert list(range(0, end)) == list(ss.irange(None, end, (True, False)))

    assert values == list(ss.irange(inclusive=(False, False)))

    assert [] == list(ss.irange(53))
    assert values == list(ss.irange(None, 53, (True, False)))
Example #2
0
 def create(self, head_vars, affected_lines, primary_loc_keys, ircfg,
            address):
     self._address = hex(address) if address else "None"
     affected_exprs = {}
     dp = depgraph.DependencyGraph(ircfg, True)
     for block_loc_key in affected_lines:
         block = ircfg.blocks[block_loc_key]
         cur_affected_exprs = SortedSet(key=lambda x: str(x))
         for line_nb in affected_lines[block_loc_key]:
             affected_assignments = block.assignblks[line_nb]
             for ind, (dst, src) in enumerate(affected_assignments.items()):
                 if type(src) not in [ExprInt, ExprMem]:
                     res = next(
                         dp.get(block_loc_key, {dst}, ind, {block_loc_key}))
                     cur_affected_exprs.update(
                         filter(lambda x: not is_bad_expr(x),
                                res.pending.keys()))
         affected_exprs[block_loc_key] = cur_affected_exprs
     loop = FlatteningLoop(list(head_vars), primary_loc_keys,
                           affected_lines, affected_exprs,
                           self.loc_db.add_location())
     upd = {}
     for i in loop.primary_loc_keys:
         if i in self._loc_key_to_loop:
             raise RuntimeError(
                 "Overlap of primary blocks of the flattening loops")
         upd[i] = loop
     self._loc_key_to_loop.update(upd)
     self.loops.append(loop)
     return loop
Example #3
0
def test_islice():
    ss = SortedSet(load=7)

    assert [] == list(ss.islice())

    values = list(range(53))
    ss.update(values)

    for start in range(53):
        for stop in range(53):
            assert list(ss.islice(start, stop)) == values[start:stop]

    for start in range(53):
        for stop in range(53):
            assert list(ss.islice(start, stop,
                                  reverse=True)) == values[start:stop][::-1]

    for start in range(53):
        assert list(ss.islice(start=start)) == values[start:]
        assert list(ss.islice(start=start,
                              reverse=True)) == values[start:][::-1]

    for stop in range(53):
        assert list(ss.islice(stop=stop)) == values[:stop]
        assert list(ss.islice(stop=stop, reverse=True)) == values[:stop][::-1]
def test_irange():
    ss = SortedSet(load=7)

    assert [] == list(ss.irange())

    values = list(range(53))
    ss.update(values)

    for start in range(53):
        for end in range(start, 53):
            assert list(ss.irange(start, end)) == values[start:(end + 1)]
            assert list(ss.irange(start, end, reverse=True)) == values[start:(end + 1)][::-1]

    for start in range(53):
        for end in range(start, 53):
            assert list(range(start, end)) == list(ss.irange(start, end, (True, False)))

    for start in range(53):
        for end in range(start, 53):
            assert list(range(start + 1, end + 1)) == list(ss.irange(start, end, (False, True)))

    for start in range(53):
        for end in range(start, 53):
            assert list(range(start + 1, end)) == list(ss.irange(start, end, (False, False)))

    for start in range(53):
        assert list(range(start, 53)) == list(ss.irange(start))

    for end in range(53):
        assert list(range(0, end)) == list(ss.irange(None, end, (True, False)))

    assert values == list(ss.irange(inclusive=(False, False)))

    assert [] == list(ss.irange(53))
    assert values == list(ss.irange(None, 53, (True, False)))
Example #5
0
class Unit:
    __doc_id_count__ = 0

    @classmethod
    def genDocId(cls):
        cls.__doc_id_count__ += 1
        return cls.__doc_id_count__ - 1

    def __init__(self, file, unit_number):
        self.file = file
        self.unit_number = unit_number
        self.words = SortedSet([])
        self.docId = self.genDocId()

    def add(self, iterable):
        self.words.update(iterable)

    def keywords(self):
        return self.words

    def id(self):
        return self.docId

    def string(self):
        return "<" + str(self.docId) + ", " + self.file + ", unit " + str(
            self.unit_number) + ">"

    def __lt__(self, unit):
        return self.docId < unit.docId
Example #6
0
    def _calcGeneSNPcorr(self, cr, gene, REF, useAll=False):

        if self._joint and self._MAP is not None:
            G = self._GENEID[gene]
            P = SortedSet(REF[str(cr)][1].irange(G[1] - self._window,
                                                 G[2] + self._window))

            if gene in self._MAP:
                P.update(
                    list(REF[str(cr)][0].getSNPsPos(
                        list(self._MAP[gene].keys()))))
                #P = list(set(P))

        elif self._MAP is None:
            G = self._GENEID[gene]

            P = REF[str(cr)][1].irange(G[1] - self._window,
                                       G[2] + self._window)
        else:
            if gene in self._MAP:
                P = set(REF[str(cr)][0].getSNPsPos(list(
                    self._MAP[gene].keys())))
                useAll = True
            else:
                P = []

        DATA = REF[str(cr)][0].get(list(P))

        filtered = {}

        #use = []
        #RID = []

        # Sort out
        for D in DATA:
            # Select
            if (D[0] in self._GWAS or useAll) and (D[1] > self._MAF) and (
                    D[0] not in filtered or D[1] < filtered[D[0]][0]):
                filtered[D[0]] = [D[1], D[2]]
                #use.append(D[2])
                #RID.append(s)

        # Calc corr
        RID = list(filtered.keys())
        use = []
        for i in range(0, len(RID)):
            use.append(filtered[RID[i]][1])

        use = np.array(use)

        if len(use) > 1:
            if self._useGPU:
                C = cp.asnumpy(cp.corrcoef(cp.asarray(use)))
            else:
                C = np.corrcoef(use)
        else:
            C = np.ones((1, 1))

        return C, np.array(RID)
Example #7
0
class _CoinDataSet(object):
    def __init__(self, init_from_file=True):
        self._market = coinmarketcap.Market()
        self._data = SortedSet()
        if init_from_file:
            for filename in os.listdir(STORAGE_DIR):
                with open(os.path.join(STORAGE_DIR, filename), 'r') as fp:
                    datapoint_list = json.load(
                        fp, object_hook=_CDPEncoder.decode_hook)
                    self._data.update(datapoint_list)

    def _DownloadNewDataPoint(self):
        cmc_dict = self._market.ticker(limit=0)
        data_to_store = {
            coin["symbol"]: coin["price_usd"]
            for coin in cmc_dict
        }
        self._data.add(_CoinDataPoint(timestamp(), data_to_store))
        self._DumpCurrentDayToFile()

    def _DumpAllToFile(self, filestr):
        data_to_dump = list(self._data)

        with open(filestr, 'w') as fp:
            json.dump(data_to_dump, fp, cls=_CDPEncoder)

    def _DumpCurrentDayToFile(self):
        # Midnight in unix time (system time zone)
        midnight = datetime.combine(date.today(), time.min)
        midnight_unix = int(midnight.timestamp())

        # All data since midnight.
        data_to_dump = list(self._data.irange(_CoinDataPoint(midnight_unix)))

        filestr = os.path.join(STORAGE_DIR,
                               midnight.strftime('%Y-%m-%d.coinjson'))
        with open(filestr, 'w') as fp:
            json.dump(data_to_dump, fp, cls=_CDPEncoder)

    def GetValue(self, symbol, time=None):
        try:
            if not time:
                return float(self._data[-1].coin_data[symbol.upper()])
            else:
                bisect_point = self._data.bisect(_CoinDataPoint(time))
                if (bisect_point) is 0:
                    return None
                return float(self._data[bisect_point -
                                        1].coin_data[symbol.upper()])
        except (IndexError, KeyError):
            return None

    def GetDayChange(self, symbol):
        currentVal = self.GetValue(symbol)
        yesterday_time = datetime.today() - timedelta(days=1)
        oldVal = self.GetValue(symbol, yesterday_time.timestamp())
        if oldVal is None:
            return None
        return 100 * ((currentVal - oldVal) / oldVal)
Example #8
0
    def fit_note(self, note):
        # TODO: possibly add scale notes to valid notes
        chord = self.midi_state.active_notes(self.chord_channel)
        # TODO: this currently maps to black AND white keys, MelodicFlow maps only to white keys.
        # This extends the range on the keyboard, but this solution should be more easily compatible
        # with generated output, as we don't have to transpose the black keys.
        # TODO: do not recompute if same chord as before (cache valid notes)
        if chord:
            middle_octave_chords = 4
            middle_octave_melody = 8

            # If the input note is too low, transpose it upwards
            # to apply the harmonisation, then transpose it back down.
            transposed_octave = 0
            while note < (middle_octave_melody * 12):
                note += 12
                transposed_octave += 1

            # Root C note of all octaves
            octaves = list(range(0, 127, 12))

            # normalize chord to C0, then generate tranposed chords for every octave
            lowest, count = min(chord), -1
            while lowest >= 0:
                count += 1
                lowest -= 12
            mapped_over_range = [[e - (12 * count) + octave for e in chord]
                                 for octave in octaves]

            # get valid notes, split for positive and negative movement
            f_a = SortedSet([
                e for l in mapped_over_range[:middle_octave_chords] for e in l
            ])
            f_a.update(
                [e for l in major_notes[:middle_octave_chords] for e in l])
            f_b = SortedSet([
                e for l in mapped_over_range[middle_octave_chords:] for e in l
            ])
            f_b.update(
                [e for l in major_notes[middle_octave_chords:] for e in l])

            # get relative distance from played key to middle C of melody
            diff = note - octaves[middle_octave_melody]

            note -= transposed_octave * 12
            # clamp to valid note range
            diff = max(-len(f_a), min(diff, len(f_b) - 1))

            # jump to next valid note, either up or down
            if diff < 0:
                note = f_a[len(f_a) + diff]
            else:
                note = f_b[diff]

            # clamp note to valid MIDI note range
            note = max(0, min(note, 127))

        return note
    def getOperators(self, *channels):
        operator_set = SortedSet()
        if channels:
            for channel in channels:
                operator_set.update(self._operators[channel])
        else:
            operator_set.update(self.operators)

        return operator_set
Example #10
0
 def monotone_bfs(self, iu):
     seen = set([iu])
     layers = [SortedSet([iu])]
     while True:
         next_layer = SortedSet()
         for iv in layers[-1]:
             next_layer.update(set(self.in_neighbours[iv]) - seen)
         if len(next_layer) == 0:
             break
         seen.update(next_layer)
         layers.append(next_layer)
     return layers
Example #11
0
class SortedSetKey:
    def __init__(self):
        self.dict = dict()
        self.sorted_set = SortedSet(key=self.get_key)

    def __getitem__(self, item):
        return self.sorted_set[item]

    def __len__(self):
        return len(self.sorted_set)

    def __str__(self):
        return str(self.sorted_set)

    def get_key(self, value):
        return self.dict[value]

    def get_reversed_list(self, index, count):
        return self[-1 - index:-1 - index - count:-1]

    def values(self):
        for value in self.sorted_set:
            yield value

    def clear(self):
        self.sorted_set.clear()
        self.dict.clear()

    def destroy(self):
        self.sorted_set = None

    def index(self, value):
        return self.sorted_set.index(value)

    def pop(self, index=-1):
        return self.sorted_set.pop(index)

    def add(self, value, rank):
        if value in self.sorted_set:
            self.sorted_set.remove(value)
        self.dict[value] = rank
        self.sorted_set.add(value)

    def remove(self, value):
        self.sorted_set.remove(value)
        del self.dict[value]

    def update(self, value_list, rank_list):
        self.sorted_set.difference_update(value_list)
        for i, value in enumerate(value_list):
            self.dict[value] = rank_list[i]
        self.sorted_set.update(value_list)
Example #12
0
    def preprocArgs():
        '''
            For each verb and associated set of questions,
                for each question, get known argument if not already processed
                get the lemma for that argument, get the list of ... NOT SURE
        '''
        for r, qs in USP.rel_qs.items():
            ignoredQs = set()

            for q in qs:
                if q.getArg() in USP.arg_cis:
                    continue

                cis = SortedSet()
                ts = q.getArg().split()
                isIgnored = False

                for f in ts:
                    if f in ['the','of','in']:
                        continue

                    if f not in USP.form_lemma:
                        isIgnored = True
                        break
                    else:
                        ls = USP.form_lemma[f]

                        for l in ls:
                            if l in USP.lemma_clustIdxs:
                                cis.update(USP.lemma_clustIdxs[l])

                if isIgnored:
                    ignoredQs.add(q)
                    continue

                if len(ts) >= 2:
                    hs = USP.form_lemma[ts[-1]]
                    ds = USP.form_lemma[ts[-2]]

                    for h in hs:
                        for d in ds:
                            if (h, d) in USP.headDep_clustIdxs:
                                cis.add(USP.headDep_clustIdxs[(h, d)])

                USP.arg_cis[q.getArg()] = cis

            qs = [x for x in qs if x not in ignoredQs]
            USP.rel_qs[r] = qs

        return None
Example #13
0
    def __init__(self, sentences):

        # self.sentences = sentences
        self.word2idx = {}
        self.idx2word = {}

        vocabulary = SortedSet()
        for phrase in sentences:
            vocabulary.update(phrase.split(' '))

        self.word2idx['<pad>'] = 0
        for index, word in enumerate(vocabulary):
            self.word2idx[word] = index + 1

        for word, index in self.word2idx.items():
            self.idx2word[index] = word
Example #14
0
def get_probe_contexts(
    probes: SortedSet,
    tokens: List[str],
    context_size: int,
    preserve_order: bool,
    min_num_contexts: int = 2,
) -> Tuple[Dict[str, Tuple[str]], SortedSet, SortedSet]:
    # get all probe contexts
    probe2contexts = SortedDict({p: [] for p in probes})
    contexts_in_order = get_sliding_windows(context_size, tokens)
    context_types = SortedSet()
    for n, context in enumerate(contexts_in_order[:-context_size]):
        next_context = contexts_in_order[n + 1]

        # todo this only works for LEFT contexts
        target = next_context[-1]
        if target == 'Monster_cookie':
            print(target)
        if target in probes:

            if preserve_order:
                probe2contexts[target].append(context)
                context_types.add(context)
            else:
                single_word_contexts = [(w, ) for w in context]
                probe2contexts[target].extend(single_word_contexts)
                context_types.update(single_word_contexts)

    # exclude entries with too few contexts
    excluded = []
    included = SortedSet()
    for probe, contexts in probe2contexts.items():
        if len(contexts) < min_num_contexts:
            excluded.append(probe)
        else:
            included.add(probe)
    for p in excluded:
        print(
            f'WARNING: Excluding "{p}" because it occurs {len(probe2contexts[p])} times'
        )
        del probe2contexts[p]

    return probe2contexts, context_types, included
Example #15
0
class _PortfolioSet(object):
    def __init__(self, user_id):
        self.user_id = user_id
        self._file = os.path.join(STORAGE_DIR, str(user_id))
        self._data = SortedSet()
        if os.path.exists(self._file):
            with open(self._file, 'r') as fp:
                datapoint_list = json.load(
                    fp, object_hook=_PortfolioEncoder.decode_hook)
                self._data.update(datapoint_list)

    def GetPortfolio(self, timestamp=None):
        rval = None
        try:
            if not timestamp:
                rval = self._data[-1]
            else:
                bisect_point = self._data.bisect(
                    _PortfolioAtTimestamp(0, timestamp))
                if bisect_point > 0:
                    rval = self._data[bisect_point - 1]
        except (IndexError, KeyError):
            pass

        # No portfolio at the specified time, return empty portfolio.
        if not rval:
            return _PortfolioAtTimestamp(self.user_id, timestamp
                                         or time.time())
        # We need to use copy.deepcopy here because nobody old data needs
        # to be kept. The timestamp is set as requested, so the copied portfolio
        # can simply be saved.
        rval = copy.deepcopy(rval)
        rval.timestamp = timestamp or time.time()
        return rval

    def AddPortfolio(self, portfolio):
        self._data.add(portfolio)

    def Save(self):
        data_to_dump = list(self._data)
        with open(self._file, 'w') as fp:
            json.dump(data_to_dump, fp, cls=_PortfolioEncoder)
Example #16
0
    def __work(self, w: str):
        dw, dm = self.__dw, self.__dm
        sw = str.startswith

        lw = []
        for (k, v) in map(dw.peekitem, range(
                dw.bisect_left(w), len(self.__dw))):
            if not sw(k, w):
                break
            lw.append(v)

        lm = SortedSet(key=lambda x: x.word)
        for (k, v) in map(dm.peekitem, range(
                dm.bisect_left(w), len(self.__dm))):
            if not sw(k, w):
                break
            lm.update(v)

        lw.extend(lm)
        self.__curl = lw
Example #17
0
    def getTreeCis(ptId):
        cis = SortedSet()
        cis.add(USP.ptId_clustIdxStr[ptId][0])

        if ptId in USP.ptId_aciChdIds:
            for cids in USP.ptId_aciChdIds[ptId].values():
                for cid in cids:
                    if USP.ptId_parDep[cid] not in USP.allowedDeps:
                        continue

                    cis = cis.update(USP.getTreeCis(cid))

        return cis
def test_islice():
    ss = SortedSet(load=7)

    assert [] == list(ss.islice())

    values = list(range(53))
    ss.update(values)

    for start in range(53):
        for stop in range(53):
            assert list(ss.islice(start, stop)) == values[start:stop]

    for start in range(53):
        for stop in range(53):
            assert list(ss.islice(start, stop, reverse=True)) == values[start:stop][::-1]

    for start in range(53):
        assert list(ss.islice(start=start)) == values[start:]
        assert list(ss.islice(start=start, reverse=True)) == values[start:][::-1]

    for stop in range(53):
        assert list(ss.islice(stop=stop)) == values[:stop]
        assert list(ss.islice(stop=stop, reverse=True)) == values[:stop][::-1]
Example #19
0
class History(object):
    def __init__(self, history=None, modification_history=None):
        # Dict var_name -> Timeline
        self.chunk_history = Timeline() if history is None else Timeline(
            history)
        if modification_history is None:
            self.modification_history = {}
            for c in self.chunk_history:
                for p in c.modifications:
                    if p not in self.modification_history:
                        self.modification_history[p] = Timeline()
                    self.modification_history[p].add(c)
                for p in c.dependencies:
                    if p not in self.modification_history:
                        raise Exception(
                            'Illegal sequence of operations was supplied! Referenced dependency {} does not exist at time {}'
                            .format(p, c.stamp))
                    self.modification_history[p][-1].dependents.add(c)
        else:
            self.modification_history = modification_history
        self.dirty_chunks = SortedSet()

    def __iter__(self):
        return iter(self.chunk_history)

    def __len__(self):
        return len(self.modification_history)

    def get_time_stamp(self, before=None, after=None):
        if before is not None:
            pos, succ = self.chunk_history.get_ceil(before) if type(
                before) != Chunk else self.chunk_history.get_ceil(before.stamp)
            return 0.5 * (succ.stamp + self.chunk_history[pos - 1].stamp
                          ) if pos > 0 else succ.stamp - 1
        elif after is not None:
            pos, succ = self.chunk_history.get_floor(after) if type(
                after) != Chunk else self.chunk_history.get_floor(after.stamp)
            return 0.5 * (succ.stamp +
                          self.chunk_history[pos + 1].stamp) if pos < len(
                              self.chunk_history) - 1 else succ.stamp + 1
        return self.chunk_history[-1].stamp + 1 if len(
            self.chunk_history) > 0 else 1

    @profile
    def _insert_modification(self, chunk, path):
        if path not in self.modification_history:
            self.modification_history[path] = Timeline()
        _, pred = self.modification_history[path].get_floor(chunk.stamp)
        if pred is not None:
            to_remove = set()
            for d in pred.dependents:
                # Fetch all dependents from predecessor which are going to depend on the new chunk
                # Save them as dependents and mark them as dirty
                if d.stamp > chunk.stamp:
                    dep_overlap_diff = d.dependencies.difference(
                        chunk.modifications)
                    # Is there at least one element overlap
                    if len(dep_overlap_diff) < len(d.dependencies):
                        chunk.dependents.add(d)
                        self.dirty_chunks.add(d)
                        # If there is no remaining overlap with pred anymore, remove d
                        if len(dep_overlap_diff.difference(
                                pred.modifications)) == len(dep_overlap_diff):
                            to_remove.add(d)
            pred.dependents -= to_remove
        self.modification_history[path].add(chunk)

    @profile
    def insert_chunk(self, chunk):
        for p in chunk.dependencies:
            if p not in self.modification_history:
                raise Exception(
                    'Chunk depends on attribute without history!\n Operation "{}" at {}\n Attribute: {}\n'
                    .format(chunk.operation.name, chunk.stamp, p))
            _, pred = self.modification_history[p].get_floor(chunk.stamp)
            if pred is None:
                raise Exception(
                    'Chunk at time {} executing "{}" depends on attributes with empty history! Attributes:\n  {}'
                    .format(
                        chunk.stamp, chunk.operation.name, '\n  '.join([
                            str(p) for p in chunk.dependencies
                            if p not in self.modification_history
                            or self.modification_history[p].get_floor(
                                chunk.stamp)[1] is None
                        ])))
            pred.dependents.add(chunk)

        for p in chunk.modifications:
            self._insert_modification(chunk, p)

        self.chunk_history.add(chunk)

    @profile
    def remove_chunk(self, chunk):
        for p in chunk.modifications:
            if self.modification_history[p][0] == chunk and len(
                    chunk.dependents) > 0 and max(
                        [p in c.dependencies for c in chunk.dependents]):
                raise Exception(
                    'Can not remove chunk at timestamp {} because it is the founding chunk in the history of {} and would create dangling dependencies.'
                    .format(chunk.stamp, p))

        for p in chunk.modifications:
            self.modification_history[p].discard(chunk)
            _, pred = self.modification_history[p].get_floor(chunk.stamp)
            # Copy dependents that depend on this variable to predecessor
            if pred is not None:
                pred.dependents.update(
                    {d
                     for d in chunk.dependents if p in d.dependencies})

        for p in chunk.dependencies:
            pos, pred = self.modification_history[p].get_floor(chunk.stamp)
            if pred is None:
                raise Exception(
                    'Chunk depends on attribute with empty history!')
            # It can happen that this chunk modifies the variable it depends on.
            # In this case it needs to be removed from the history and from
            if pred == chunk:
                pos -= 1
                pred = self.modification_history[p][pos]
            pred.dependents.discard(chunk)

        self.chunk_history.remove(chunk)
        self.dirty_chunks.update(chunk.dependents)

    @profile
    def replace_chunk(self, c_old, c_new):
        if c_old.stamp != c_new.stamp:
            raise Exception(
                'Can only replace chunk if stamps match. Stamps:\n Old: {:>8.3f}\n New: {:>8.3f}'
                .format(c_old.stamp, c_new.stamp))

        overlap = c_old.modifications.intersection(c_new.modifications)
        if len(overlap) != len(c_old.modifications):
            raise Exception(
                'Chunks can only be replaced by others with at least the same definition coverage. Missing variables:\n {}'
                .format('\n '.join(
                    sorted(c_old.modifications.difference(
                        c_new.modifications)))))

        new_deps = {
            p: self.modification_history[p].get_floor(c_new.stamp)[1]
            if p in self.modification_history else None
            for p in c_new.dependencies.difference(overlap)
        }
        if None in new_deps.values():
            raise Exception(
                'Replacement chunk at {} tries to depend on variables with insufficient histories. variables:\n {}'
                .format('\n '.join(sorted(new_deps.keys()))))

        for p in overlap:
            pos, _ = self.modification_history[p].get_floor(c_old.stamp)
            # If we are already here, we might as well remove old and establish new deps
            if p in c_old.dependencies:
                self.modification_history[p][pos - 1].dependents.discard(c_old)
            if p in c_new.dependencies:
                self.modification_history[p][pos - 1].dependents.add(c_new)
            self.modification_history[p].remove(c_old)
            self.modification_history[p].add(c_new)

        c_new.dependents = c_old.dependents.copy()
        self.flag_dirty(*c_new.dependents)

        # Remove old, non-modified deps
        for p in c_old.dependencies.difference(overlap):
            self.modification_history[p].get_floor(
                c_old.stamp)[1].dependents.remove(c_old)

        # Insert additional modifications
        for p in c_new.modifications.difference(overlap):
            self._insert_modification(c_new, p)

        for c in new_deps.values():
            c.dependents.add(c_new)

        self.chunk_history.remove(c_old)
        self.chunk_history.add(c_new)

    def get_chunk_by_index(self, idx):
        return self.chunk_history[idx]

    def get_chunk(self, stamp):
        return self.get_chunk_pos(stamp)[0]

    def get_chunk_pos(self, stamp):
        pos, chunk = self.chunk_history.get_floor(stamp)
        return (chunk,
                pos) if chunk is None or chunk.stamp == stamp else (None, None)

    def flag_dirty(self, *chunks):
        self.dirty_chunks.update(chunks)

    def flag_clean(self, *chunks):
        for c in chunks:
            self.dirty_chunks.discard(c)

    def expand_dirty_set(self):
        active_set = set(self.dirty_chunks)
        while len(active_set) > 0:
            a = active_set.pop()
            u = a.dependents.difference(self.dirty_chunks)
            active_set.update(u)
            self.dirty_chunks.update(u)

    def get_dirty(self):
        return self.dirty_chunks.copy()

    def get_subhistory(self, time):
        if len(self.chunk_history) > 0 and self.chunk_history[0].stamp >= time:
            chunks = self.chunk_history[:self.chunk_history.get_floor(time
                                                                      )[0] + 1]
            mod_history = {
                p: Timeline(h[:h.get_floor(time)])
                for p, h in self.modification_history.items()
                if h[0].stamp >= time
            }
            return History(chunks, mod_history)
        return History()

    def get_history_of(self, *paths):
        out = set()
        remaining = set()
        for p in paths:
            if p in self.modification_history:
                remaining.update(self.modification_history[p])

        while len(remaining) > 0:
            chunk = remaining.pop()
            out.add(chunk)
            for p in chunk.dependencies:
                pos, dep = self.modification_history[p].get_floor(chunk.stamp)
                if dep == chunk:  # Catch if predecessor is chunk itself
                    dep = self.modification_history[p][pos - 1]
                if dep not in out:
                    remaining.add(dep)

        return Timeline(out)

    def str_history_of(self, p):
        if p not in self.modification_history:
            raise Exception('Path {} has no history.'.format(p))
        return '\n'.join([
            '{:>8.3f} : {}'.format(chunk.stamp, str(chunk.op))
            for chunk in self.modification_history[p]
        ])

    def str_history(self):
        return '\n'.join([
            '{:>8.3f} : {}'.format(chunk.stamp, str(chunk.op))
            for chunk in self.chunk_history
        ])

    def __eq__(self, other):
        if isinstance(other, History):
            return self.chunk_history == other.chunk_history
        return False
Example #20
0
    def in_place_stoplist(self, stoplist=None, freq=0):
        """ 
        Changes a Corpus object with words in the stoplist removed and with 
        words of frequency <= `freq` removed.
        
        :param stoplist: The list of words to be removed.
        :type stoplist: list

        :param freq: A threshold where words of frequency <= 'freq' are
            removed. Default is 0.
        :type freq: integer, optional
            
        :returns: Copy of corpus with words in the stoplist and words
            of frequnecy <= 'freq' removed.

        :See Also: :class:`Corpus`
        """
        from sortedcontainers import SortedSet, SortedList
        stop = SortedSet()

        if stoplist:
            for t in stoplist:
                if t in self.words_int:
                    stop.add(self.words_int[t])

        if freq:
            cfs = np.bincount(self.corpus)
            freq_stop = np.where(cfs <= freq)[0]
            stop.update(freq_stop)

        if not stop:
            # print 'Stop list is empty.'
            return self

        # print 'Removing stop words', datetime.now()
        f = np.vectorize(stop.__contains__)

        # print 'Rebuilding context data', datetime.now()
        context_data = []

        BASE = len(self.context_data) - 1

        # gathering list of new indicies from narrowest tokenization
        def find_new_indexes(INTO, BASE=-1):
            locs = np.where(
                np.in1d(self.context_data[BASE]['idx'],
                        self.context_data[INTO]['idx']))[0]

            # creating a list of lcoations that are non-identical
            new_locs = np.array([
                loc for i, loc in enumerate(locs)
                if i + 1 == len(locs) or self.context_data[BASE]['idx'][
                    locs[i]] != self.context_data[BASE]['idx'][locs[i + 1]]
            ])

            # creating a search for locations that ARE identical
            idxs = np.insert(self.context_data[INTO]['idx'], [0, -1], [-1, -1])
            same_spots = np.where(np.equal(idxs[:-1], idxs[1:]))[0]

            # readding the identical locations
            really_new_locs = np.insert(new_locs, same_spots,
                                        new_locs[same_spots - 1])
            return really_new_locs

        # Calculate new base tokens
        tokens = self.view_contexts(self.context_types[BASE])
        new_corpus = []
        spans = []
        for t in tokens:
            new_t = t[np.logical_not(f(t))] if t.size else t

            # TODO: append to new_corpus as well
            spans.append(new_t.size if new_t.size else 0)
            if new_t.size:
                new_corpus.append(new_t)

        # Stopped all words from Corpus
        if not new_corpus:
            return Corpus([])

        new_base = self.context_data[BASE].copy()
        new_base['idx'] = np.cumsum(spans)

        context_data = []
        # calculate new tokenizations for every context_type
        for i in range(len(self.context_data)):
            if i == BASE:
                context_data.append(new_base)
            else:
                context = self.context_data[i].copy()
                context['idx'] = new_base['idx'][find_new_indexes(i, BASE)]
                context_data.append(context)

        del self.context_data
        self.context_data = context_data

        # print 'Rebuilding corpus and updating stop words', datetime.now()
        self.corpus = np.concatenate(new_corpus)
        #self.corpus[f(self.corpus)]
        self.stopped_words.update(self.words[stop])

        #print 'adjusting words list', datetime.now()
        new_words = np.delete(self.words, stop)

        # print 'rebuilding word dictionary', datetime.now()
        new_words_int = dict((word, i) for i, word in enumerate(new_words))
        old_to_new = dict(
            (self.words_int[word], i) for i, word in enumerate(new_words))

        #print "remapping corpus", datetime.now()
        f = np.vectorize(old_to_new.__getitem__)
        self.corpus[:] = f(self.corpus)

        #print 'storing new word dicts', datetime.now()
        self.words = new_words
        self.words_int = new_words_int

        return self
Example #21
0
class ARG(object):
    '''
    Ancestral Recombination Graph
    '''
    def __init__(self):
        self.nodes = {}
        self.roots = bintrees.AVLTree()  # root indexes
        self.rec = bintrees.AVLTree()  # arg rec parents nodes
        self.coal = bintrees.AVLTree()  # arg CA parent node
        self.num_ancestral_recomb = 0
        self.num_nonancestral_recomb = 0
        self.branch_length = 0
        self.nextname = 1  # next node index
        self.available_names = SortedSet()

    def __iter__(self):
        '''iterate over nodes in the arg'''
        return list(self.nodes)

    def __len__(self):
        '''number of nodes'''
        return len(self.nodes)

    def __getitem__(self, index):
        '''returns node by key: item'''
        return self.nodes[index]

    def __setitem__(self, index, node):
        '''adds a node to the ARG'''
        node.index = index
        self.add(node)

    def __contains__(self, index):
        '''if ARG contains node key '''
        return index in self.nodes

    def copy(self):
        '''return a copy of the ARG'''
        arg = ARG()
        for node in self.nodes.values():
            arg.nodes[node.index] = node.copy()
        # connect nodes
        for node in self.nodes.values():
            node2 = arg.__getitem__(node.index)
            if node.left_child != None:
                node2.left_child = arg.__getitem__(node.left_child.index)
                node2.right_child = arg.__getitem__(node.right_child.index)
            if node.left_parent != None:
                node2.left_parent = arg.__getitem__(node.left_parent.index)
                node2.right_parent = arg.__getitem__(node.right_parent.index)
        arg.roots = self.roots.copy()  # root indexes
        arg.rec = self.rec.copy()  # arg rec parents nodes
        arg.coal = self.coal.copy()  # arg CA parent node
        arg.num_ancestral_recomb = self.num_ancestral_recomb
        arg.num_nonancestral_recomb = self.num_nonancestral_recomb
        arg.branch_length = self.branch_length
        arg.nextname = self.nextname  # next node index
        arg.available_names = self.available_names.copy()
        return arg

    def equal(self, other):
        '''if self is equal with other (structural equality)
        TODO : complete this'''
        if self.__len__() != other.__len__():
            return False
        else:
            for node in self.nodes.values():
                if node.index not in other:
                    return False
                if not node.equal(other[node.index]):
                    return False
            return True

    def leaves(self, node=None):
        """
        Iterates over the leaves of the ARG.
        """
        if node is None:
            for node in self.nodes.values():
                if node.left_child == None:
                    yield node
        else:
            for node in self.preorder(node):
                if node.left_child == None:
                    yield node

    def preorder(self, node=None):
        """
        Iterates through nodes in preorder traversal.
        """
        visit = set()
        if node is None:
            node = self.__getitem__(self.roots.max_key())
        queue = [node]
        for node in queue:
            if node in visit:
                continue
            yield node
            visit.add(node)
            if node.left_child != None:
                queue.append(node.left_child)
                if node.left_child.index != node.right_child.index:
                    queue.append(node.right_child)

    def postorder(self, node=None):
        """
        Iterates through nodes in postorder traversal.
        """
        visit = collections.defaultdict(lambda: 0)
        queue = list(self.leaves(node))

        for node in queue:
            yield node
            if node.left_parent != None:
                visit[node.left_parent] += 1
                if node.left_parent.left_child.index != node.left_parent.right_child.index:
                    num_child = 2
                else:
                    num_child = 1
                # if all child has been visited then queue parent
                if visit[node.left_parent] == num_child:
                    queue.append(node.left_parent)
                if node.right_parent.index != node.left_parent.index:
                    visit[node.right_parent] += 1
                    # if all child has been visited then queue parent
                    if visit[node.right_parent] == num_child:
                        queue.append(node.right_parent)

    def set_roots(self):
        self.roots.clear()
        for node in self.nodes.values():
            if node.left_parent is None:
                self.roots[node.index] = node.index

    def get_times(self):
        '''return a sorted set of the ARG node.time'''
        times = SortedSet()
        for node in self.nodes.values():
            times.add(node.time)
        return times

    def get_higher_nodes(self, t):
        ''':return nodes.index of nodes with node.time >= t
        TODO: a more efficient search option
        '''
        return [key for key in self.nodes if self.nodes[key].time >= t]

    #==========================
    # node manipulation
    def alloc_segment(self,
                      left=None,
                      right=None,
                      node=None,
                      samples=bintrees.AVLTree(),
                      prev=None,
                      next=None):
        """
        alloc a new segment
        """
        s = Segment()
        s.left = left
        s.right = right
        s.node = node
        s.samples = samples
        s.next = next
        s.prev = prev
        return s

    def alloc_node(self,
                   index=None,
                   time=None,
                   left_child=None,
                   right_child=None):
        """
        alloc a new Node
        """
        node = Node(index)
        node.time = time
        node.first_segment = None
        node.left_child = left_child
        node.right_child = right_child
        node.left_parent = None
        node.right_parent = None
        node.breakpoint = None
        node.snps = bintrees.AVLTree()
        return node

    def store_node(self, segment, node):
        '''store node with segments: segment'''
        x = segment
        if x is not None:
            while x.prev is not None:
                x = x.prev
            s = self.alloc_segment(x.left, x.right, node, x.samples.copy())
            node.first_segment = s
            x.node = node
            x = x.next
            while x is not None:
                s = self.alloc_segment(x.left, x.right, node, x.samples.copy(),
                                       s)
                s.prev.next = s
                x.node = node
                x = x.next
        else:  #
            node.first_segment = None
        self.nodes[node.index] = node

    def copy_node_segments(self, node):
        '''
        copy the segments of a node,
        in CA event or Rec events, we need to copy the first node
        in order to make changes on them
        '''
        x = node.first_segment
        if x is None:
            return None
        else:
            assert x.prev is None
            s = self.alloc_segment(x.left, x.right, node, x.samples.copy())
            x.node = node
            x = x.next
            while x is not None:
                s = self.alloc_segment(x.left, x.right, node, x.samples.copy(),
                                       s)
                s.prev.next = s
                x.node = node
                x = x.next
            return s

    def get_available_names(self):
        '''get free names from 0 to max(nodes)'''
        self.available_names = SortedSet()
        current_names = SortedSet(self.__iter__())
        counter = 0
        prev = current_names[0]
        while counter < len(current_names):
            if current_names[counter] != prev + 1:
                self.available_names.update(
                    range(prev + 1, current_names[counter]))
            prev = current_names[counter]
            counter += 1

    def new_name(self):
        '''returns a new name for a node'''
        if self.available_names:
            name = self.available_names.pop()
        else:
            name = self.nextname
            self.nextname += 1
        return name

    def add(self, node):
        ''' add a ready node to the ARG:
        '''
        self.nodes[node.index] = node
        return node

    def rename(self, oldindex, newindex):
        '''renames a node in the ARG'''
        node = self.nodes[oldindex]
        node.index = newindex
        del self.nodes[oldindex]
        self.nodes[newindex] = node

    def total_branch_length(self):
        '''the ARG total branch length'''
        total_material = 0
        for node in self.nodes.values():
            if node.left_parent is not None:
                age = node.left_parent.time - node.time
                seg = node.first_segment
                while seg is not None:
                    total_material += ((seg.right - seg.left) * age)
                    seg = seg.next
        return total_material

    #=======================
    #spr related

    def detach(self, node, sib):
        '''
        Detaches a specified coalescence node from the rest of the ARG
        '''
        # print("Detach()",node.index, "sib", sib.index, "p",node.left_parent.index)
        assert node.left_parent.index == node.right_parent.index
        parent = node.left_parent
        sib.left_parent = parent.left_parent
        sib.right_parent = parent.right_parent
        sib.breakpoint = parent.breakpoint
        grandparent = parent.left_parent
        if grandparent is not None:
            grandparent.update_child(parent, sib)
            grandparent = parent.right_parent
            grandparent.update_child(parent, sib)

    def reattach(self, u, v, t, new_names):
        # Reattaches node u above node v at time t, new_names is a avltree of all
        #new nodes.index in a new ARG in mcmc
        assert t > v.time
        # assert v.left_parent == None or t < v.left_parent.time
        if u.left_parent is None:  # new_name
            new_name = self.new_name()
            new_names[new_name] = new_name
            # self.coal[new_name] = new_name # add the new CA parent to the ARG.coal
            parent = self.add(self.alloc_node(new_name))
            parent.left_child = u
            u.left_parent = parent
            u.right_parent = parent
        else:
            assert u.left_parent.index == u.right_parent.index
            parent = u.left_parent
        parent.time = t
        parent.breakpoint = v.breakpoint
        v.breakpoint = None
        parent.left_parent = v.left_parent
        grandparent = v.left_parent
        if grandparent is not None:
            grandparent.update_child(v, parent)
        parent.right_parent = v.right_parent
        grandparent = v.right_parent
        if grandparent is not None:
            grandparent.update_child(v, parent)
        v.left_parent = parent
        v.right_parent = parent
        if parent.left_child.index == u.index:
            parent.right_child = v
        else:
            parent.left_child = v
        return new_names

    def push_mutation_down(self, node, x):
        '''
        for a given node push the mutation (at x) as down as possible
        normally mutations automatically should stay at their
        lowest possible position. This might be useful for initial ARG
        '''
        block = False
        while not block:
            node, block = node.push_snp_down(x)

    def push_all_mutations_down(self, node):
        '''push down all mutations on node as low as possible'''
        snp_keys = [k for k in node.snps]
        for x in snp_keys:
            self.push_mutation_down(node, x)
        # iter = len(node.snps)
        # i = 0
        #
        # while iter > 0:
        #     x = node.snps[i]
        #     self.push_mutation_down(node, x)
        #     iter -= 1
        #     if node.snps and len(node.snps) > i:
        #         if node.snps[i] == x:
        #             i += 1

    def find_tmrca(self, node, x):
        '''
        check the parent of node to see
        if it is mrca for site x
        '''
        if node.left_parent is None:
            block = True
            return node, block
        elif node.left_parent.index is not node.right_parent.index:
            assert node.left_parent.contains(x) + node.right_parent.contains(
                x) == 1
            block = False
            if node.left_parent.contains(x):
                return node.left_parent, block
            else:
                return node.right_parent, block
        elif node.left_parent.contains(x):
            block = False
            return node.left_parent, block
        else:  # it is mrca for x
            block = True
            return node.left_parent, block

    def tmrca(self, x):
        '''tmrca for site x
        1. start from a leaf
        2. follow the path of x until its mrca
        '''
        node = self.__getitem__(0)
        block = False
        while not block:
            node, block = self.find_tmrca(node, x)
        return node.time

    def total_tmrca(self, sequence_length):
        '''
        return the tmrca of all the sites in the ARG
        '''
        break_points = self.breakpoints(only_ancRec=True, set=True)
        break_points.add(0)
        break_points.add(sequence_length)
        tot_tmrca = np.zeros(int(sequence_length))
        count = 0
        while count < len(break_points) - 1:
            x_tmrca = self.tmrca(break_points[count])
            tot_tmrca[int(break_points[count]):int(break_points[count +
                                                                1])] = x_tmrca
            count += 1
        return tot_tmrca

    def mean_tmrca(self, sequence_length):
        '''return a value for tmrca of the ARG, which is the mean over all trmrcas'''
        break_points = self.breakpoints(only_ancRec=True, set=True)
        break_points.add(0)
        break_points.add(sequence_length)
        tmrca_list = []
        count = 0
        while count < len(break_points) - 1:
            x_tmrca = self.tmrca(break_points[count])
            tmrca_list.append(
                x_tmrca *
                (int(break_points[count + 1]) - int(break_points[count])))
            count += 1
        return np.mean(tmrca_list)

    def allele_age(self):
        ''':return a pd df with four columns:
            1. site: the genomic position of the SNP
            2. recent age: the most recent age for the allele
            3. mid age: the midpoint of node age and its parent (tree node) time
            4. latest age: the latest time (back in time) for the mutation
            The df is sorted based on site.
         '''
        #find the nodes with mutations
        snp_nodes = []  # nodes with len(snps) > 0
        for node in self.nodes.values():
            if node.snps:
                snp_nodes.append(node)
        # now for each node and find age for each mut
        age_df = pd.DataFrame(
            columns=["site", "recent age", "mid age", "latest age"])
        for node in snp_nodes:
            # num_branches = collections.defaultdict(list)
            node_time = node.time
            for x in node.snps:
                parent_age = node.tree_node_age(x, return_parent_time=True)
                age_df.loc[age_df.shape[0]] = [
                    x, node_time, (node_time + parent_age) / 2, parent_age
                ]
        age_df.sort_values(by=['site'], ascending=True, inplace=True)
        age_df.reset_index(inplace=True, drop=True)
        return age_df

    def invisible_recombs(self):
        '''return the proportion of invisible recombs '''
        invis_count = 0
        for node in self.nodes.values():
            if node.breakpoint != None and node.is_invisible_recomb():
                invis_count += 1
        return invis_count / (self.num_ancestral_recomb +
                              self.num_nonancestral_recomb)

    #@property

    def breakpoints(self, only_ancRec=False, set=True):
        '''
        :param only_ancRec: only ancestral rec with repetition
        :param set: if set, only uqique posittions are returned
        :param invisible count the number of invisible recombs
        :return: either a list/set of all recombs
            or a list of anc rec that has repetition
        '''
        if set:
            br = SortedSet()
        else:
            br = SortedList()
        if not only_ancRec:
            for node in self.nodes.values():
                if node.breakpoint != None:
                    br.add(node.breakpoint)
        else:
            for node in self.nodes.values():
                if node.breakpoint != None and\
                        node.contains(node.breakpoint):#ancestral
                    br.add(node.breakpoint)
        return br

    #========== probabilites
    def log_likelihood(self, mutation_rate, data):
        '''
        log_likelihood of mutations on a given ARG up to a normalising constant
         that depends on the pattern of observed mutations, but not on the ARG
         or the mutation rate.
         Note after spr and berfore clean up we might have NAM lineages,
         this method covers take this into account.
         :param m : is number of snps
         '''
        snp_nodes = []  # nodes with len(snps) > 0
        total_material = 0
        number_of_mutations = 0
        #get total matereial and nodes with snps
        for node in self.nodes.values():
            if node.first_segment != None:
                assert node.left_parent != None
                age = node.left_parent.time - node.time
                seg = node.first_segment
                assert seg.prev == None
                while seg is not None:
                    total_material += ((seg.right - seg.left) * age)
                    seg = seg.next
                if node.snps:
                    number_of_mutations += len(node.snps)
                    snp_nodes.append(node)
        self.branch_length = total_material
        # print("number_of_mutations", number_of_mutations, "m", len(data))
        assert number_of_mutations == len(data)  # num of snps
        if mutation_rate == 0:
            if number_of_mutations == 0:
                ret = 0
            else:
                ret = -float("inf")
        else:
            ret = number_of_mutations * math.log(total_material * mutation_rate) -\
                (total_material * mutation_rate)
        # now calc prob of having this particular mutation pattern
        for node in snp_nodes:
            # num_branches = collections.defaultdict(list)
            for x in node.snps:
                potential_branch_length = node.tree_node_age(x)
                ret += math.log(potential_branch_length / total_material)
            # # verify the mutation is on the correct spot
            verify_mutation_node(node, data)
        return ret

    def log_prior(self,
                  sample_size,
                  sequence_length,
                  recombination_rate,
                  Ne,
                  NAM=True,
                  new_roots=False,
                  kuhner=False):
        '''
        probability of the ARG under coalescen with recombination
        this is after a move and before clean up. then there might be some
         extra NAM lineages, we ignore them.
         :param NAM: no-ancestral material node. If NAm node is allowed. note after spr and
            before clean up step there might be some NAM in the ARG which is ok. But after clean up
            or on the initial ARG there should not be any.
         '''
        # order nodes by time
        #TODO: find an efficient way to order nodes
        ordered_nodes = [
            v for k, v in sorted(self.nodes.items(),
                                 key=lambda item: item[1].time)
        ]
        number_of_lineages = sample_size
        number_of_links = number_of_lineages * (sequence_length - 1)
        number_of_nodes = self.__len__()
        counter = sample_size
        time = 0
        ret = 0
        rec_count = 0
        coal_count = 0
        roots = bintrees.AVLTree()
        new_coal = bintrees.AVLTree()
        if kuhner:
            self.rec.clear()
        self.num_ancestral_recomb = 0
        self.num_nonancestral_recomb = 0
        while counter < number_of_nodes:
            node = ordered_nodes[counter]
            assert node.time >= time  # make sure it is ordered]
            rate = (number_of_lineages * (number_of_lineages - 1) /
                    (4 * Ne)) + (number_of_links * (recombination_rate))
            # ret -= rate * (node.time - time)
            if node.left_child.index == node.right_child.index:  #rec
                assert node.left_child.first_segment != None
                assert node.left_child.left_parent.first_segment != None
                assert node.left_child.right_parent.first_segment != None
                ret -= rate * (node.time - time)
                gap = node.left_child.num_links()-\
                      (node.left_child.left_parent.num_links() +
                       node.left_child.right_parent.num_links())
                ret += math.log(recombination_rate)
                assert gap >= 1
                if gap == 1:
                    self.num_ancestral_recomb += 1
                else:
                    self.num_nonancestral_recomb += 1
                number_of_links -= gap
                number_of_lineages += 1
                if kuhner:  # add rec
                    self.rec[node.index] = node.index
                    self.rec[ordered_nodes[counter +
                                           1].index] = ordered_nodes[counter +
                                                                     1].index
                counter += 2
                time = node.time
                rec_count += 1
            elif node.left_child.first_segment != None and\
                        node.right_child.first_segment != None:
                ret -= rate * (node.time - time)
                ret -= math.log(2 * Ne)
                if node.first_segment == None:
                    node_numlink = 0
                    number_of_lineages -= 2
                    counter += 1
                    if new_roots:
                        roots[node.index] = node.index
                else:
                    node_numlink = node.num_links()
                    number_of_lineages -= 1
                    counter += 1
                lchild_numlink = node.left_child.num_links()
                rchild_numlink = node.right_child.num_links()
                number_of_links -= (lchild_numlink +
                                    rchild_numlink) - node_numlink
                time = node.time
                coal_count += 1
                if new_roots:
                    new_coal[node.index] = node.index
            else:
                counter += 1
            if not NAM:
                assert node.left_child.first_segment != None
                assert node.right_child.first_segment != None
        if new_roots:
            return ret, roots, new_coal
        else:
            return ret

    def dump(self, path=' ', file_name='arg.arg'):
        output = path + "/" + file_name
        with open(output, "wb") as file:
            pickle.dump(self, file)

    def load(self, path=' '):
        with open(path, "rb") as file:
            return pickle.load(file)

    def verify(self):
        '''
        verify arg:
        1. a node with parent must have seg
        2. a node with no parent a. must be in roots b. different child
        3. node.parent_time > node.time
        4. arg name == node.index
        5. recomb parent must have self.snps.empty()
        6. nodes with child = None must be leaf
        7. number coal + rec + roots check
        8. seg.samples is not empty, seg.left < seg.right
        '''
        for node in self.nodes.values():
            assert self.nodes[node.index].index == node.index
            if node.left_parent is None:  #roots
                if node.first_segment is not None:
                    print("in verrify node is ", node.index)
                    self.print_state()
                assert node.first_segment == None
                assert node.index in self.roots
                assert node.breakpoint == None
                assert node.left_child.index != node.right_child.index
                assert node.right_parent == None
                assert node.index in self.coal
                assert node.time > node.left_child.time
                assert node.time > node.right_child.time
            else:  # rest
                assert node.first_segment != None
                assert node.first_segment.prev == None
                assert node.get_tail().next == None
                assert node.index not in self.roots
                assert node.left_parent.time > node.time
                if node.left_child is None:  #leaves
                    assert node.right_child is None
                    assert node.time == 0
                if node.left_parent.index != node.right_parent.index:
                    assert node.breakpoint != None
                    assert node.left_parent.left_child.index ==\
                           node.left_parent.right_child.index
                    assert node.right_parent.left_child.index ==\
                        node.right_parent.right_child.index
                    assert node.right_parent.left_child.index == node.index
                    assert not node.left_parent.snps
                    assert not node.right_parent.snps
                    assert node.left_parent.time == node.right_parent.time
                    assert node.left_parent.index in self.rec
                    assert node.right_parent.index in self.rec
                    if node.left_parent.first_segment.left > node.right_parent.first_segment.left:
                        print("in verify node", node.index)
                        print("node.left_parent", node.left_parent.index)
                        print("node.right_parent", node.right_parent.index)
                    assert node.left_parent.first_segment.left < node.right_parent.first_segment.left
                else:
                    assert node.left_parent.index in self.coal
                    assert node.left_parent.left_child.index !=\
                           node.left_parent.right_child.index
                    assert node.breakpoint == None
            if node.first_segment is not None:
                seg = node.first_segment
                assert seg.prev is None
                while seg is not None:
                    assert seg.samples
                    assert seg.left < seg.right
                    assert seg.node.index == node.index
                    seg = seg.next

    def print_state(self):
        print("self.arg.coal", self.coal)
        print("self.arg.rec", self.rec)
        print("self.arg.roots", self.roots)
        print("node",
              "time",
              "left",
              "right",
              "l_chi",
              "r_chi",
              "l_par",
              "r_par",
              "l_bp",
              "snps",
              "fir_seg_sam",
              sep="\t")
        for j in self.nodes:
            node = self.__getitem__(j)
            if node.left_parent is not None or node.left_child is not None:
                s = node.first_segment
                if s is None:
                    print(j,
                          "%.5f" % node.time,
                          "root",
                          "root",
                          node.left_child.index,
                          node.right_child.index,
                          node.left_parent,
                          node.right_parent,
                          node.breakpoint,
                          node.snps,
                          None,
                          sep="\t")

                while s is not None:
                    l = s.left
                    r = s.right
                    if node.left_child is None:
                        print(j,
                              "%.5f" % node.time,
                              l,
                              r,
                              "Leaf",
                              "Leaf",
                              node.left_parent.index,
                              node.right_parent.index,
                              node.breakpoint,
                              node.snps,
                              s.samples,
                              sep="\t")  #
                    elif node.left_parent is None:
                        print(j,
                              "%.5f" % node.time,
                              l,
                              r,
                              node.left_child.index,
                              node.right_child.index,
                              "Root",
                              "Root",
                              node.breakpoint,
                              node.snps,
                              s.samples,
                              sep="\t")
                    else:
                        print(j,
                              "%.5f" % node.time,
                              l,
                              r,
                              node.left_child.index,
                              node.right_child.index,
                              node.left_parent.index,
                              node.right_parent.index,
                              node.breakpoint,
                              node.snps,
                              s.samples,
                              sep="\t")
                    s = s.next
    if state == '1:start':
        prev_len = len(available_res)
        (new_available, alloc) = take_first_resources(
            available_res, job.nb_res)
        available_res = new_available
        job.resources = alloc

        if len(job.resources) != job.nb_res:
            raise Exception('Invalid number of resources ({}, expected {})'
                            .format(job.resources, job.nb_res))
        if len(available_res) != prev_len - job.nb_res:
            raise Exception('Invalid number of available resources '
                            '({}, expected {})'
                            .formta(len(available_res), prev_len - job.nb_res))
    elif state == '0:finish':
        available_res.update(job.resources)

##############
# Export CSV #
##############

writer = csv.DictWriter(args.outputCSV,
                        fieldnames=["job_id",
                                    "submission_time",
                                    "requested_number_of_resources",
                                    "requested_time",
                                    "success",
                                    "starting_time",
                                    "execution_time",
                                    "finish_time",
                                    "waiting_time",
Example #23
0
 def extend_function(self, sortedSet, a):
     SortedSet.update(sortedSet, a)
Example #24
0
        prev_len = len(available_res)
        (new_available, alloc) = take_first_resources(available_res,
                                                      job.nb_res)
        available_res = new_available
        job.resources = alloc

        if len(job.resources) != job.nb_res:
            raise Exception(
                'Invalid number of resources ({}, expected {})'.format(
                    job.resources, job.nb_res))
        if len(available_res) != prev_len - job.nb_res:
            raise Exception('Invalid number of available resources '
                            '({}, expected {})'.formta(len(available_res),
                                                       prev_len - job.nb_res))
    elif state == '0:finish':
        available_res.update(job.resources)

##############
# Export CSV #
##############

writer = csv.DictWriter(args.outputCSV,
                        fieldnames=[
                            "job_id", "submission_time",
                            "requested_number_of_processors", "requested_time",
                            "success", "starting_time", "execution_time",
                            "finish_time", "waiting_time", "turnaround_time",
                            "stretch", "consumed_energy",
                            "allocated_processors"
                        ])
writer.writeheader()
Example #25
0
File: base.py Project: HANNATH/vsm
    def in_place_stoplist(self, stoplist=None, freq=0):
        """ 
        Changes a Corpus object with words in the stoplist removed and with 
        words of frequency <= `freq` removed.
        
        :param stoplist: The list of words to be removed.
        :type stoplist: list

        :param freq: A threshold where words of frequency <= 'freq' are
            removed. Default is 0.
        :type freq: integer, optional
            
        :returns: Copy of corpus with words in the stoplist and words
            of frequnecy <= 'freq' removed.

        :See Also: :class:`Corpus`
        """
        from sortedcontainers import SortedSet, SortedList
        stop = SortedSet()

        if stoplist:
            for t in stoplist:
                if t in self.words_int:
                    stop.add(self.words_int[t])

        if freq:
            cfs = np.bincount(self.corpus)
            freq_stop = np.where(cfs <= freq)[0]
            stop.update(freq_stop)


        if not stop:
            # print 'Stop list is empty.'
            return self
    
        # print 'Removing stop words', datetime.now()
        f = np.vectorize(stop.__contains__)

        # print 'Rebuilding context data', datetime.now()
        context_data = []

        BASE = len(self.context_data) - 1
        # gathering list of new indicies from narrowest tokenization
        def find_new_indexes(INTO, BASE=-1):
            locs = np.where(np.in1d(self.context_data[BASE]['idx'], self.context_data[INTO]['idx']))[0]

            # creating a list of lcoations that are non-identical
            new_locs = np.array([loc for i, loc in enumerate(locs)
                                     if i+1 == len(locs) or self.context_data[BASE]['idx'][locs[i]] != self.context_data[BASE]['idx'][locs[i+1]]])

            # creating a search for locations that ARE identical
            idxs = np.insert(self.context_data[INTO]['idx'], [0,-1], [-1,-1])
            same_spots = np.where(np.equal(idxs[:-1], idxs[1:]))[0]

            # readding the identical locations
            really_new_locs = np.insert(new_locs, same_spots, new_locs[same_spots-1])
            return really_new_locs

        # Calculate new base tokens
        tokens = self.view_contexts(self.context_types[BASE])
        new_corpus = []
        spans = []
        for t in tokens:
            new_t = t[np.logical_not(f(t))] if t.size else t
            
            # TODO: append to new_corpus as well
            spans.append(new_t.size if new_t.size else 0)
            if new_t.size:
                new_corpus.append(new_t)

        # Stopped all words from Corpus
        if not new_corpus:
            return Corpus([])

        new_base = self.context_data[BASE].copy()
        new_base['idx'] = np.cumsum(spans)

        context_data = []
        # calculate new tokenizations for every context_type
        for i in xrange(len(self.context_data)):
            if i == BASE:
                context_data.append(new_base)
            else:
                context = self.context_data[i].copy()
                context['idx'] = new_base['idx'][find_new_indexes(i, BASE)]
                context_data.append(context)

        del self.context_data
        self.context_data = context_data

        # print 'Rebuilding corpus and updating stop words', datetime.now()
        self.corpus = np.concatenate(new_corpus)
        #self.corpus[f(self.corpus)]
        self.stopped_words.update(self.words[stop])

        #print 'adjusting words list', datetime.now()
        new_words = np.delete(self.words, stop)

        # print 'rebuilding word dictionary', datetime.now()
        new_words_int = dict((word,i) for i, word in enumerate(new_words)) 
        old_to_new =  dict((self.words_int[word],i) for i, word in enumerate(new_words)) 

        #print "remapping corpus", datetime.now()
        f = np.vectorize(old_to_new.__getitem__)
        self.corpus[:] = f(self.corpus)

        #print 'storing new word dicts', datetime.now()
        self.words = new_words
        self.words_int = new_words_int

        return self
Example #26
0
class Crawler:
    def __init__(self, platform):
        # setup configuration
        self.config = Config()
        term = self.config.types[platform]['term']
        extension = self.config.types[platform]['extension']

        self.language = self.config.types[platform]['language']
        self.platform = self.config.types[platform]['platform']

        # setup request handler
        self.requester = RequestHandler(term, extension)

        # setup data handler
        self.data = DataHandler()

        # configure crawler specifics
        self.size_range = SortedSet()
        self.size_range.update([0, 384001])  # stick to GitHub size restrictions
        self.initial_items = []
        print "Started GitHub crawler at {}".format(asctime(localtime(time())))

    def crawl(self):
        total_count = self.requester.get_total_count()
        target_count = total_count
        print "Crawler found {} items to store and fetch".format(total_count)
        item_count = 0

        current_item = 0
        next_item = 1

        start_time = int(time())

        # sort items differently to get more items
        # order_state 0 = default ordering (best match according to "score")
        # order_state 1 = last indexed
        # order_state 2 = first indexed
        order_state = 1

        # GitHub only provides 1000 items per request
        while item_count < total_count:
            print "Crawler looks in range {} to {} Byte".format(
                self.size_range[current_item],
                (self.size_range[next_item] - 1),
            )

            # We might get everything from just one request
            if (len(self.size_range) is 2) and (total_count < 1000):
                # excluding the lower and upper bound will use the items we got from our initial request
                lower = None
                upper = None
            # in case we need more then one request
            else:
                lower = self.size_range[current_item]
                upper = self.size_range[next_item]
                print "Setting lower and upper bound to {} and {}".format(lower, upper)

            # get items, request item count and incomplete status
            items, this_item_count, incomplete_items = self.requester.get_items(lower, upper, target_count, order_state)

            # update item count
            item_count += this_item_count
            print "Crawler got {} out of {} items".format(this_item_count, target_count)

            # write the items we got in this request to the DB
            new_items = 0
            updated_items = 0
            for item in items:
                self.data.update_owner_table(item)
                self.data.update_repository_table(item)
                local_path, download_url, content = self.requester.store_locally(
                        item["url"],
                        item["repository"]["id"],
                        item["path"]
                )
                new, updated = self.data.update_code_table(
                        item=item,
                        language=self.language,
                        platform=self.platform,
                        local_path=local_path,
                        download_url=download_url,
                        content=content,
                )
                if (new or updated) is 1:
                    self.requester.download(local_path, download_url)
                new_items += new
                updated_items += updated

            # update target count for new items
            target_count -= (new_items + updated_items)

            print "Crawler stored {} new items and updated {} items in the database".format(new_items, updated_items)

            # in case our results are incomplete or we have more than 1000 items we need to narrow down our search field
            if (incomplete_items or (this_item_count > 1000)) and ((next_item + 1) is len(self.size_range)):
                # get items with different ordering
                if order_state is 0:
                    order_state = 1
                elif order_state is 1:
                    order_state = 2
                elif order_state is 2:
                    new_boundaries = []
                    for i in xrange(len(self.size_range) - 1):
                        new_boundaries.append(int((self.size_range[i] + self.size_range[i + 1]) / 2) + 1)
                    self.size_range.update(new_boundaries)  # include the new boundary into our sorted list
                    print "Crawler introduced new boundaries: {}".format(self.size_range)

                    current_item = 0
                    next_item = 1

                    order_state = 1
            # jump to the next search area until we are at the end
            elif (next_item + 1) < len(self.size_range):
                current_item += 1
                next_item += 1

            timeout = True if (start_time < (int(time() - (60 * 60 * 8)))) else False

            if (target_count is 0) or timeout:
                print "Crawler is finished"
                if timeout:
                    print "Timeout after 8 hours"
                break
Example #27
0
def test_update():
    temp = SortedSet(range(0, 80))
    temp._reset(7)
    temp.update(range(80, 90), range(90, 100))
    assert all(temp[val] == val for val in range(100))
class RequestHandler:
    def __init__(self, term, extension):
        # setup configuration
        self.config = Config()

        # setup GitHub OAuth
        self.auth = HTTPBasicAuth(self.config.github['user'],
                                  self.config.github['token'])

        # configure crawler specifics
        self.github_url = 'https://api.github.com/search/code?q='  # use the GitHub search API
        self.query = '{}+extension:{}'.format(
            term,
            extension)  # search for contract in files with extension .sol
        self.sort = '&sort='
        self.order = '&order='
        self.size_range = SortedSet()
        self.size_range.update([0,
                                384001])  # stick to GitHub size restrictions
        self.initial_items = []

    def rate_limit(self, request):
        limit = requests.get('https://api.github.com/rate_limit',
                             auth=self.auth)
        limit_json = limit.json()

        if request is 'search':
            remaining_search = limit_json["resources"]["search"]["remaining"]
            reset_time = limit_json["resources"]["search"]["reset"]

            if remaining_search is 0:
                # wait until we can do search requests again
                sleep_time = reset_time - int(time())
                print "Search limit reached. Waiting {} seconds".format(
                    sleep_time)
                sleep(sleep_time)
        elif request is 'core':
            remaining_download = limit_json["resources"]["core"]["remaining"]
            reset_time = limit_json["resources"]["core"]["reset"]

            if remaining_download is 0:
                # wait until we can do search requests again
                sleep_time = reset_time - int(time())
                print "Core limit is reached. Waiting {} seconds".format(
                    sleep_time)
                sleep(sleep_time)

    def search_github(self, lower, upper, order_state):
        self.rate_limit(request='search')
        if isinstance(lower, int) and isinstance(upper, int) and isinstance(
                order_state, int):
            base_url = self.github_url + self.query + "+size:>{}+size:<{}+size:{}".format(
                lower, upper, upper)
            if order_state is 1:
                url = base_url + self.sort + "indexed" + self.order + "desc"
            elif order_state is 2:
                url = base_url + self.sort + "indexed" + self.order + "asc"
            else:
                url = base_url

            print "Get contracts from {}".format(url)
            response = requests.get(url, auth=self.auth)
        else:
            response = requests.get(self.github_url + self.query,
                                    auth=self.auth)

        if response.status_code is 200:
            result = response.json()
        else:
            print "No valid GitHub credentials found."
            result = None

        return result

    def get_total_count(self):
        incomplete_results = True
        result = dict()

        # Get total number of files that contain search term
        while incomplete_results:
            print "Get total number of contracts from {}".format(
                self.github_url + self.query)
            try:
                result = self.search_github(None, None, None)
                incomplete_results = result["incomplete_results"]
            except TypeError:
                print "Could not search GitHub"
                break

        # in case we have less then 1000 results, store this to limit API calls
        self.initial_items = result["items"]
        total_count = result["total_count"]

        return total_count

    def get_items(self, lower, upper, target_count, order_state):
        items = self.initial_items
        this_item_count = len(items)
        incomplete_items = False

        try:
            result = self.search_github(lower, upper, order_state)
            items = result["items"]
            this_item_count = len(items)
            incomplete_items = True if (
                this_item_count < target_count) else False
        except TypeError:
            print "Could not search GitHub"

        return items, this_item_count, incomplete_items

    def get_download_url_content(self, url):
        self.rate_limit(request='core')

        # GitHub only gives you the download url when you request it for each file
        response = requests.get(url, auth=self.auth)
        if response.status_code is 200:
            result = response.json()
            download_url = result["download_url"]
            # This is the hash for the complete file line by line
            content_full = result["content"]
            # We want just one hash for the whole file for faster comparison of changes
            content = hashlib.md5(content_full).hexdigest()
        else:
            print "No valid GitHub credentials found."
            download_url = None
            content = None

        return download_url, content

    def store_locally(self, url, repository_id, remote_path):
        # get download url
        download_url, content = self.get_download_url_content(url)

        # create folder structure
        current_path = path.dirname(path.abspath(__file__))
        file_path = '{}/code-folder/{}/{}'.format(current_path, repository_id,
                                                  remote_path)
        local_path = file_path.rpartition("/")[0]

        if not path.exists(local_path):
            makedirs(local_path)

        return file_path, download_url, content

    def download(self, file_path, download_url):
        self.rate_limit(request='core')

        print "Downloading {}".format(file_path)
        response = requests.get(download_url, auth=self.auth)
        with open(file_path, 'wb') as out_file:
            out_file.write(response.content)
        del response
Example #29
0
def stress_issubset(sst):
    that = SortedSet(sst)
    that.update(range(1000))
    assert sst.issubset(that)
Example #30
0
    def _calcMultiGeneSNPcorr(self, cr, genes, REF, wAlleles=True):

        filtered = {}

        use = []
        RID = []
        pos = []

        for gene in genes:
            DATA = []
            if self._joint and self._MAP is not None:
                G = self._GENEID[gene]
                P = SortedSet(REF[str(cr)][1].irange(G[1] - self._window,
                                                     G[2] + self._window))

                if gene in self._MAP:
                    P.update(
                        list(REF[str(cr)][0].getSNPsPos(self._MAP[gene][0])))
                    #P = list(set(P))

            elif self._MAP is None:
                G = self._GENEID[gene]

                P = REF[str(cr)][1].irange(G[1] - self._window,
                                           G[2] + self._window)
            else:
                if gene in self._MAP:
                    P = set(REF[str(cr)][0].getSNPsPos(self._MAP[gene][0]))
                else:
                    P = []

            DATA = REF[str(cr)][0].get(list(P))

            # Sort out
            for D in DATA:
                # Select
                if D[0] in self._GWAS and D[1] > self._MAF and (
                        D[0] not in filtered or filtered[D[0]][0] < D[1]) and (
                            not wAlleles or
                            (self._GWAS_alleles[D[0]][0] == D[3]
                             and self._GWAS_alleles[D[0]][1] == D[4])):
                    filtered[D[0]] = [D[1], D[2]]
                    #use.append(D[2])
                    #RID.append(s)

            pos.append(len(filtered))

        # Calc corr
        RID = list(filtered.keys())
        use = []
        for i in range(0, len(RID)):
            use.append(filtered[RID[i]][1])

        use = np.array(use)

        if len(use) > 1:
            if self._useGPU:
                C = cp.asnumpy(cp.corrcoef(cp.asarray(use)))
            else:
                C = np.corrcoef(use)
        else:
            C = np.ones((1, 1))

        return C, np.array(RID), pos
def stress_issubset(sst):
    that = SortedSet(sst)
    that.update(range(1000))
    assert sst.issubset(that)
def test_update():
    temp = SortedSet(range(0, 80), load=7)
    temp.update(range(80, 90), range(90, 100))
    assert all(temp[val] == val for val in range(100))
def jogar(lista):
    global jogos, universo, valortotal, valorporjogador

    print
    print 'lista de jogos do usuario:', lista
    print

    # iterar lista de tuplas
    for tupla in lista:

        # tratar argumentos do script e transformar cada string em uma lista
        # adicionar o terceiro elemento (cotas) se nao for passado pelo usuario
        # e dividir em tres variaveis
        t = list(literal_eval(tupla))
        if (len(t) < 3): t.append(1)
        print t
        n, d, c = t

        # mega sena aceita no maximo 15 dezenas
        if d > 15:
            print 'numero de dezenas nao pode ser maior que 15'
            print
            sys.exit()

        # calcular precos
        ci, ct = preco(n, d)
        cb = bolao(ct, c)
        valortotal += ct

        print 'universo tem', len(universo), 'numeros disponiveis'
        print 'gerando', n, 'jogo(s) de', d, 'dezenas dividido(s) em', c, 'cota(s)'
        print 'custo', 'individual:', ci, '| total:', ct, '| cota bolao:', cb
        print

        # iterar quantidade de jogos solicitados
        for j in range(0, n):

            # se nao tivermos a quantidade de numeros necessarios no universo
            # usar os que ainda temos
            # definir novamente o universo com os numeros 1 a 60
            # preencher os numeros que faltavam no jogo com uma amostra aleatoria
            if len(universo) < d:
                print 'universo tem', len(
                    universo), 'numeros disponiveis mas precisamos de', d
                print 'adicionando numeros ao universo'
                print

                jogo = SortedSet(universo)
                universo = range(1, 61)

                while len(jogo) < d:
                    jogo.update(random.sample(universo, (d - len(jogo))))

            # se tivermos a quantidade de numeros necessarios no universo
            # pegar uma amostra aleatoria
            else:
                jogo = SortedSet(random.sample(universo, d))

            # retirar do universo os numeros usados
            universo = [e for e in universo if e not in jogo]

            # adicionar o jogo na lista de jogos
            jogos.append(jogo)

    valorporjogador = valortotal / c
Example #34
0
 def update_keys(self, x):
     global X
     temp_lines = SortedSet()
     temp_lines.update(self.lines)
     self.lines = temp_lines
Example #35
0
class SchedSimulator(object):
    def __init__(self, app_list, pe_list):
        self.app_list = app_list
        # self.app_list = sorted(app_list, key=lambda _app: _app.priority)
        self.layer_list = [l for _app in app_list for l in _app.layer_list]
        self.layer_set = set(self.layer_list)
        self.gene2fit = {}

        # FIXME
        self.single_mode = False
        first_node = app_list[0].layer_list[0]
        if len(app_list) == 1 and first_node.get_index() != 0:
            self.single_mode = True

        self.num_layer = len(self.layer_list)
        self.throughput_thresh = 100

        self.draw_iteration = 1

        self.num_pe = len(pe_list)

        self.prio_step = len(self.layer_list)  # Total number of layers
        self.pe_list = pe_list

        # variables for CPU utilization constraint
        self.elapsed_time_per_pe = [0] * self.num_pe

        self._ready_queues = [PriorityQueue() for _ in range(self.num_pe)]
        self._rq_set = set()

    def _init_ready_queue(self):
        self._ready_queues = [PriorityQueue() for _ in range(self.num_pe)]
        self._rq_set = set()

    def _init_all_apps(self):
        # application initialization
        #   => Edge, Layer initialization
        for app in self.app_list:
            app.do_init()

    def do_init(self):
        self.iteration = [0] * self.num_layer
        self.response_time = [0.0] * len(self.app_list)

        self.pe_start_time = [-1 for _ in range(self.num_pe)]
        self.pe_end_time = [[-1] for _ in range(self.num_pe)]

        self._init_ready_queue()
        self._init_all_apps()

        self.timeline = SortedSet(
        )  # FIXME need to change name. (next_sim_time?)
        offset_set = set()
        for l in self.layer_list:
            if l.offset >= 0:
                offset_set.add(l.offset)
        self.timeline.update(list(offset_set))
        self.occupy_times = [0] * self.num_pe

    def find_runnable_layers(self, t):
        runnable_layers = []
        layer_list = self.layer_set - self._rq_set
        for app in self.app_list:
            for l in app.layer_list:
                if l.need_in_edge_check and l.need_out_edge_check \
                        and l.offset <= t and app.check_runnable(l, t):
                    runnable_layers.append(l)

        return runnable_layers

    def _enqueue(self, t):
        runnable_layers = self.find_runnable_layers(t)
        for l in runnable_layers:
            if self.single_mode:
                idx = l.get_app_index()
            else:
                idx = l.get_index()
            pe = l.pe.get_idx()
            # prio = l.get_priority() + l.iteration * self.prio_step
            prio = l.get_app_priority() * self.prio_step + l.iteration
            # print("Queue " + str(pe) + " insert : " + l.name + " prio : " + str(prio))
            self._ready_queues[pe].insert(prio, l)
            self._rq_set.add(l)

    def _set_pe_time(self, pe, iteration, start_time, end_time):
        if self.pe_start_time[pe] == -1:
            self.pe_start_time[pe] = start_time
        try:
            if self.pe_end_time[pe][iteration] < end_time:
                self.pe_end_time[pe][iteration] = end_time
        except IndexError:
            assert iteration == len(self.pe_end_time[pe])
            self.pe_end_time[pe].append(end_time)

    @staticmethod
    def _get_csts_and_objs(fitness, mapping):
        objs = []
        csts = []
        for idx, cst in enumerate(fitness.csts):
            if cst is not None:
                cst_value = cst.constraint_function(mapping)
                csts.append(cst_value)
            else:
                csts.append((0, ))

        for idx, obj in enumerate(fitness.objs):
            obj_value = obj.objective_function(mapping)
            objs.append(obj_value)
        return csts, objs

    def _draw_gantt(self, gantt, gantt_name, mapping, fitness):
        csts, objs = SchedSimulator._get_csts_and_objs(fitness, mapping)
        available_results = True
        for idx, (cst, value) in enumerate(zip(fitness.csts, csts)):
            if value[0] != 0:
                available_results = False
                break
        config.available_results = available_results

        if available_results:
            print("\nPE Mapping per layer: " + str(mapping))

            if len(objs) > config.num_of_app:
                print("\n\t[ Whole Objective ]")
            for idx, value in enumerate(objs):
                if idx >= config.num_of_app:
                    print(
                        "\t\tObjective function value [by Energy Consumption] :\t %d"
                        % value[0])

            if len(csts) > config.num_of_app:
                print("\n\t[ Whole Constraint ]")
            for idx, (cst, value) in enumerate(zip(fitness.csts, csts)):
                if idx >= config.num_of_app:
                    print(
                        "\t\tConstraint function value [by %s] :\t %.2f -> %.2f"
                        % (type(cst).__name__, value[-1], value[0]))

            objs_result = []
            for idx, app in enumerate(self.app_list):
                print("\n\t[ %s (Period: %d, Priority: %d) ]" %
                      (app.name, app.get_period(), app.get_priority()))
                print("\t\tObjective function value [by %s]:\t%.2f" %
                      (config.app_to_obj_dict[idx], objs[idx][0]))
                if config.app_to_cst_dict[idx] != 'None':
                    print("\t\tConstraint function value [by %s]:\t%.2f" %
                          (config.app_to_cst_dict[idx], csts[idx][-1]))
                config.objs_result_by_app[idx].append(round(objs[idx][0], 2))
                objs_result.append(round(objs[idx][0], 2))

            config.file_name = "{}{}_{}_{}_{}_{}_{}_{}_{}_{}_{}".format(
                config.save_path + "/" + config.name, str(config.sched_method),
                str(config.hyper_parameter), str(config.processor),
                str(config.priority), str(config.period),
                str(config.cpu_config), str(config.objs), str(objs_result),
                str(config.csts), config.analyzer)
            gantt.file_name = config.file_name + "#{}.png".format(
                config.gantt_chart_idx)
            gantt.draw_gantt_chart()

    def _pop_and_get_layer_info(self, q):
        _, l = q.pop()  # pop layer from _ready_queues
        self._rq_set.remove(l)
        if self.single_mode:
            layer_idx = l.get_app_index()
        else:
            layer_idx = l.get_index()
        pe = l.pe.get_idx()
        app = l.get_app()
        return l, layer_idx, pe, app

    def _update_timeline(self, l, time):
        timeline = self.timeline
        if l.offset >= 0:
            # print("ID: {} Name: {} Time: {} {} update".format(id(l), l.name, l.offset, l.offset + l.get_period()))
            # timeline.add(l.offset)
            # timeline.add((l.iteration + 1) * l.get_period())
            timeline.add(l.get_period() + l.offset)
            l.set_offset(l.get_period() + l.offset)
            # print l.get_period()
        timeline.add(time)

    def do_simulation(self,
                      mapping,
                      iterations=(0, 1),
                      draw_gantt=False,
                      gantt_name="test.png",
                      fitness=None):
        if draw_gantt:
            pe_names = [pe.name for pe in self.pe_list]
            gantt = GanttChart(gantt_name, pe_names)

        sim_iteration = iterations[0]
        end_iteration = iterations[1]
        timeline = self.timeline
        occupy_times = self.occupy_times
        sched = Schedule(self.num_pe)
        # Start scheduling simulation
        while sim_iteration < end_iteration:
            t = timeline.pop(0)
            self._enqueue(t)

            # Check every PE's ready_queue(Priority queue)
            for pe_idx, q in enumerate(self._ready_queues):
                if occupy_times[pe_idx] > t or q.size() == 0:
                    continue

                l, layer_idx, pe, app = self._pop_and_get_layer_info(q)

                execution_time, transition_time, transition_time_list = app.do_layer(
                    l, pe, t)
                end_time = t + execution_time
                occupy_times[pe_idx] = end_time + transition_time

                # Update iteration's end time
                self._set_pe_time(pe, l.iteration, t, occupy_times[pe_idx])

                self.iteration[layer_idx] = self.iteration[layer_idx] + 1

                # XXX: Fix for Gantt chart bug (SqueezeNet transition time issue)
                # XXX: Option 4 chosen.
                # 1. Original
                # self._update_timeline(l, occupy_times[pe_idx])

                # 2. was only possible in single app scheduling
                # self._update_timeline(l, occupy_times[pe_idx])
                # for time in transition_time_list:
                #     self._update_timeline(l, time)

                # 3. multiple app scheduling possible, but '[]' happens in transition_time_list
                # for time in transition_time_list:
                #     self._update_timeline(l, time)

                # 4. Final implementation
                if transition_time_list == []:
                    self._update_timeline(l, occupy_times[pe_idx])
                else:
                    for time in transition_time_list:
                        self._update_timeline(l, time)

                l.increase_iter()

                if l.iteration <= 1:
                    time_tuple = (pe, l, t, end_time, transition_time)
                    sched.add_sched(time_tuple)

                # FIXME What is second condition?
                if draw_gantt and occupy_times[
                        pe_idx] != t and l.iteration <= self.draw_iteration:
                    time_tuple = (l.get_name(), self.pe_list[pe].name, t,
                                  end_time, transition_time)
                    gantt.add_task(time_tuple)

                self.elapsed_time_per_pe[pe_idx] += (end_time - t)

                if l.is_end_node and l.iteration == 1:
                    self.response_time[self.app_list.index(app)] = end_time

            inc_sim_iteration = True
            for n in self.iteration:
                if n < end_iteration:
                    inc_sim_iteration = False
            if inc_sim_iteration:
                sim_iteration += 1

        if draw_gantt:
            self._draw_gantt(gantt, gantt_name, mapping, fitness)

        return sched

    def get_response_time(self, app):
        return self.response_time[self.app_list.index(app)]
Example #36
0
# All nodes adjacent to our current subgraph
adjacents = SortedSet(startNodes)

answer = []
startNode = adjacents[0]
visited.add(startNode)

while visited != allNodes:
    # The next node we visit is the 1. the first in the alphabet 2. whose prereqs have all been visited.
    # We represent this by checking each possible node's prereqs and seeing if it is a subset of the
    # visited set.
    idx = 0
    while not (SortedSet(prereqs[adjacents[idx]]) <= visited):
        idx += 1

    nextNode = adjacents.pop(idx)
    visited.add(nextNode)

    adjacents.update(graph[nextNode])
    answer.append(nextNode)

answer = ''.join(answer)

answerFile = open('part1output.txt', 'w')
answerFile.write(answer)

assert len(answer) == len(visited)

print(answer)