def test_clear():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping)
    assert len(temp) == 26
    assert list(temp.items()) == mapping
    temp.clear()
    assert len(temp) == 0
예제 #2
0
 def __init__(self, my_book):
     super().__init__(None)
     # Save reference to the book
     self.my_book = my_book
     # Save reference to the metamanager
     self.metamgr = my_book.get_meta_manager()
     # Save reference to the edited document
     self.document = my_book.get_edit_model()
     # Save reference to a speller, which will be the default
     # at this point.
     self.speller = my_book.get_speller()
     # The vocabulary list as a sorted dict.
     self.vocab = SortedDict()
     # Key and Values views on the vocab list for indexing by table row.
     self.vocab_kview = self.vocab.keys()
     self.vocab_vview = self.vocab.values()
     # The count of available words based on the latest sort
     self.active_word_count = 0
     # The good- and bad-words sets and the scannos set.
     self.good_words = set()
     self.bad_words = set()
     self.scannos = set()
     # A dict of words that use an alt-dict tag. The key is a word and the
     # value is the alt-dict tag string.
     self.alt_tags = SortedDict()
     # Cached sort vectors, see get_sort_vector()
     self.sort_up_vectors = [None, None, None]
     self.sort_down_vectors = [None, None, None]
     self.sort_key_funcs = [None, None, None]
     # Register metadata readers and writers.
     self.metamgr.register(C.MD_GW, self.good_read, self.good_save)
     self.metamgr.register(C.MD_BW, self.bad_read, self.bad_save)
     self.metamgr.register(C.MD_SC, self.scanno_read, self.scanno_save)
     self.metamgr.register(C.MD_VL, self.word_read, self.word_save)
예제 #3
0
    def predict(self, X):
        y = np.zeros(len(X))
        for i,x in enumerate(X): # test points
            sd = SortedDict() # distance -> class
            for j,xt in enumerate(self.X): # training points
                d = np.linalg.norm(x - xt)
                # print d, sd
                if len(sd) < self.k:
                        sd[d] = self.y[j]
                else:
                    last = sd.viewkeys()[-1]
                    if d < last:
                        del sd[last]
                        sd[d] = self.y[j]
            # print "sd:", sd

            # vote
            votes = {}
            # print "viewvalues:", sd.viewvalues()
            for v in sd.viewvalues():
                # print "v:", v
                votes[v] = votes.get(v,0) + 1
            # print "votes:", votes, "true:", Ytest[i]
            max_votes = 0
            max_votes_class = -1
            for v,count in votes.iteritems():
                if count > max_votes:
                    max_votes = count
                    max_votes_class = v
            y[i] = max_votes_class
        return y
예제 #4
0
 def get_sort_vector( self, col, order, key_func = None, filter_func = None ) :
     if filter_func : # is not None,
         # create a sort vector from scratch, filtered
         getter_func = self._make_key_getter( col )
         sorted_dict = SortedDict( key_func )
         for j in range( len( self.vocab ) ) :
             if filter_func( self.vocab_kview[j], self.vocab_vview[j][1] ) :
                 k = getter_func( j )
                 sorted_dict[ k ] = j
         vector = sorted_dict.values()
         if order != Qt.AscendingOrder :
             vector = [j for j in reversed( vector ) ]
     else : # no filter_func, try to reuse a cached vector
         vector = self.sort_up_vectors[ col ]
         if not vector or key_func is not self.sort_key_funcs[ col ] :
             # there is no ascending vector for this column, or there
             # is one but it was made with a different key_func.
             getter_func = self._make_key_getter( col )
             sorted_dict = SortedDict( key_func )
             for j in range( len( self.vocab ) ) :
                 k = getter_func( j )
                 sorted_dict[ k ] = j
             vector = self.sort_up_vectors[ col ] = sorted_dict.values()
             self.sort_key_funcs[ col ] = key_func
         if order != Qt.AscendingOrder :
             # what is wanted is a descending order vector, do we have one?
             if self.sort_down_vectors[ col ] is None :
                 # no, so create one from the asc. vector we now have
                 self.sort_down_vectors[ col ] = [ j for j in reversed( vector ) ]
             # yes we do (now)
             vector = self.sort_down_vectors[ col ]
     # one way or another, vector is a sort vector
     # note the actual word count available through that vector
     self.active_word_count = len(vector)
     return vector
예제 #5
0
파일: testFormat.py 프로젝트: NLeSC/ShiCo
 def setUpClass(self):
     self._vocab = SortedDict({
         '1950_1959': [('w1', 1.0), ('w2', 1.0)],
         '1951_1960': [('w3', 1.0), ('w4', 1.0)],
         '1952_1961': [('w5', 1.0), ('w6', 1.0)],
         '1953_1962': [('w7', 1.0), ('w8', 1.0)]
     })
     self._links = SortedDict({
         '1950_1959': {'w1': [('w1', 0.0), ('w2', 1.0)]},
         '1951_1960': {'w3': [('w3', 0.0), ('w4', 1.0)]},
         '1952_1961': {'w5': [('w5', 0.0), ('w6', 1.0)]},
         '1953_1962': {'w7': [('w7', 0.0), ('w8', 1.0)]}
     })
     self._aggVocab = SortedDict({
         '1954': [('w1', 1.0), ('w2', 1.0)],
         '1955': [('w3', 1.0), ('w4', 1.0)],
         '1956': [('w5', 1.0), ('w6', 1.0)],
         '1957': [('w7', 1.0), ('w8', 1.0)]
     })
     self._aggPeriods = SortedDict({
         '1954': ['1950_1959'],
         '1955': ['1951_1960'],
         '1956': ['1952_1961'],
         '1957': ['1953_1962']
     })
def test_pickle():
    import pickle
    alpha = SortedDict(negate, zip(range(10000), range(10000)))
    alpha._reset(500)
    beta = pickle.loads(pickle.dumps(alpha))
    assert alpha == beta
    assert alpha._key == beta._key
def test_valuesview():
    if hexversion < 0x02070000: return

    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping[:13])
    values = get_valuesview(temp)

    assert len(values) == 13
    assert 0 in values
    assert list(values) == [pos for val, pos in mapping[:13]]
    assert values[0] == 0
    assert values[-3:] == [10, 11, 12]
    assert list(reversed(values)) == list(reversed(range(13)))
    assert values.index(5) == 5
    assert values.count(10) == 1

    temp.update(mapping[13:])

    assert len(values) == 26
    assert 25 in values
    assert list(values) == [pos for val, pos in mapping]

    that = dict(mapping)
    that_values = get_valuesview(that)

    values = get_valuesview(SortedDict(mapping[:2]))
    assert repr(values) == "SortedDict_values([0, 1])"
예제 #8
0
 def sort( self, col, order ) :
     self.active_sort_vector = []
     if 0 == len(self.message_tuples) : # nothing to display
         return
     self.layoutAboutToBeChanged.emit([],QAbstractItemModel.VerticalSortHint)
     # treat columns 0 and 1 the same
     if col : # is 1 or 2
         col -= 1 # make it 0 or 1
     # we need an ascending vector in all cases.
     vector = self.sort_vectors_ascending[ col ]
     if vector is None : # we need to create the ascending vector
         sorted_dict = SortedDict()
         for j in range( len( self.message_tuples ) ) :
             line_col_msg_tuple = self.message_tuples[ j ]
             if col : # is 1, meaning sort on messages
                 key = line_col_msg_tuple[2]+line_col_msg_tuple[0]
             else : # col is 0, sort on line#+col#
                 key = line_col_msg_tuple[0]+line_col_msg_tuple[1]
             key += str(j) # ensure uniqueness
             sorted_dict[key] = j
             vector = self.sort_vectors_ascending[ col ] = sorted_dict.values()
     # vector now has an ascending sort vector which is cached..
     if order == Qt.DescendingOrder : # ..but we need the descending one
         if self.sort_vectors_descending[ col ] is None : # we need to make it
             self.sort_vectors_descending[ col ] = [ j for j in reversed( vector ) ]
         vector = self.sort_vectors_descending[ col ]
     self.active_sort_vector = vector
     self.layoutChanged.emit([],QAbstractItemModel.VerticalSortHint)
def test_irange():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(7, mapping)
    for start in range(26):
        for stop in range(start + 1, 26):
            result = list(string.ascii_lowercase[start:stop])
            assert list(temp.irange(result[0], result[-1])) == result
예제 #10
0
    def __init__(self, hs):
        self.server_name = hs.hostname
        self.clock = hs.get_clock()
        self.notifier = hs.get_notifier()
        self.is_mine_id = hs.is_mine_id

        self.presence_map = {}  # Pending presence map user_id -> UserPresenceState
        self.presence_changed = SortedDict()  # Stream position -> user_id

        self.keyed_edu = {}  # (destination, key) -> EDU
        self.keyed_edu_changed = SortedDict()  # stream position -> (destination, key)

        self.edus = SortedDict()  # stream position -> Edu

        self.device_messages = SortedDict()  # stream position -> destination

        self.pos = 1
        self.pos_time = SortedDict()

        # EVERYTHING IS SAD. In particular, python only makes new scopes when
        # we make a new function, so we need to make a new function so the inner
        # lambda binds to the queue rather than to the name of the queue which
        # changes. ARGH.
        def register(name, queue):
            LaterGauge("synapse_federation_send_queue_%s_size" % (queue_name,),
                       "", [], lambda: len(queue))

        for queue_name in [
            "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed",
            "edus", "device_messages", "pos_time",
        ]:
            register(queue_name, getattr(self, queue_name))

        self.clock.looping_call(self._clear_queue, 30 * 1000)
예제 #11
0
class PositionMapping:

    __slots__ = ('_pos', '_posmap')

    DUPLICATION_CHECK = True

    def __init__(self):
        self._pos = 0
        self._posmap = SortedDict()

    def items(self):
        return self._posmap.items()

    #
    # Properties
    #

    @property
    def pos(self):
        return self._pos

    @pos.setter
    def pos(self, v):
        self._pos = v

    #
    # Public methods
    #

    def add_mapping(self, start_pos, length, obj):
        # duplication check
        if self.DUPLICATION_CHECK:
            try:
                pre = next(self._posmap.irange(maximum=start_pos, reverse=True))
                if start_pos in self._posmap[pre]:
                    raise ValueError("New mapping is overlapping with an existing element.")
            except StopIteration:
                pass

        self._posmap[start_pos] = PositionMappingElement(start_pos, length, obj)

    def tick_pos(self, delta):
        self._pos += delta

    def get_node(self, pos):
        element = self.get_element(pos)
        if element is None:
            return None
        return element.obj

    def get_element(self, pos):
        try:
            pre = next(self._posmap.irange(maximum=pos, reverse=True))
        except StopIteration:
            return None

        element = self._posmap[pre]
        if pos in element:
            return element
        return None
예제 #12
0
def signal_crosses(short_moving_averages, long_moving_averages):
    short_moving_averages = SortedDict(short_moving_averages)
    long_moving_averages = SortedDict(long_moving_averages)

    short_len = len(short_moving_averages.values())
    long_len  = len(long_moving_averages.values())

    if(short_len != long_len):
        print "[Error] signal_crosses: inputs must be same size"
        return {}

    signal_crosses = {}
    last_diff_dir = 0
    for date, short_average in short_moving_averages.iteritems():
        long_average = long_moving_averages[date]
        diff = short_average - long_average

        if(last_diff_dir == 0):
            signal_crosses[date] = HOLD
            if(diff != 0):
                last_diff_dir = sign(diff)
            continue

        if(sign(diff) != last_diff_dir):
            signal_crosses[date] = BUY if last_diff_dir < 0 else SELL
            last_diff_dir = -last_diff_dir
        else:
            signal_crosses[date] = HOLD

    return SortedDict(signal_crosses)
def test_copy():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping)
    dup = temp.copy()
    assert len(temp) == 26
    dup.clear()
    assert len(temp) == 0
예제 #14
0
def _adaptiveAggregation(V, n, yIntervals, weightF, param, freq):
    '''Apply adaptive aggregation algorithm to the given vocabulary.
    Algorithm 2 from paper.
    '''
    # Initialize returned parameters
    finalVocabs = SortedDict()
    periodGroups = SortedDict()

    # Select weighting function
    f = _selectWeightingFunction(weightF, param)

    # Iterate over time frames
    for t in _arrangeIntervals(V, yIntervals, freq):
        mu_t = getRangeMiddle(t[0], t[-1])
        V_prime = SortedDict({tx: V[tx] for tx in t})

        score = defaultdict(float)
        for years_v, words_v in V_prime.iteritems():
            mu_v = getRangeMiddle(years_v)
            fvt = f(mu_v, mu_t)
            for word, score_wv in words_v:
                score[word] += fvt * score_wv

        # Top n terms w sorted by score_w
        scoreList = [(k, v) for k, v in score.iteritems()]
        scoreList = sorted(scoreList, key=lambda pair: pair[1], reverse=True)
        topN = scoreList[:n]

        finalVocabs[str(int(mu_t))] = topN
        periodGroups[str(int(mu_t))] = t
    return finalVocabs, periodGroups
예제 #15
0
class PrioritizedIntensity(object):
    _MIN_VALUE = 0.005

    def __init__(self):
        self._values = SortedDict()

    def set(self, value, priority=100):
        value = float(value)
        if value < self._MIN_VALUE and priority in self._values:
            del self._values[priority]
        else:
            self._values[priority] = value

    def eval(self):
        if not self._values:
            return 0.0
        return self._values[self._values.iloc[- 1]]

    def top_priority(self):
        if not self._values:
            return 0
        return self._values.keys()[len(self._values) - 1]

    def reset(self):
        self._values.clear()
예제 #16
0
    def dist_player_stats(cls, stats, strength=False):
        """Order the stats to create distribution
        previously 'hand strength'"""
        # logger.info('distributing the player stats')
        dist = SortedDict(pos, {0: 'f'})
        p = 0
        for o in ['f', 's', 'l', 'k', 'c', 'b', 'r', 'a']:
            if o in stats:
                dist[p] = o
                p += max(0.01, stats[o])
                dist[p - 0.001] = o
        dist[1] = 'a'
        logger.info(f'dist = {dist}')

        logger.debug(f'strength? {strength}')
        if strength is False:
            return dist

        r = ''
        logger.debug(f'dist = {type(dist)}')
        for _ in range(20):
            p = _ * 5 / 100
            i_pos = dist.bisect_key_left(p)
            logger.debug(f'i_pos {i_pos} / {len(dist)}')
            k = dist.iloc[i_pos]
            v = dist[k]
            r += v.upper() if (1 - strength) <= p <= 1 else v.lower()
            logger.debug(f'bisected {v} from {k} at {r}%')
        logger.debug(f'dist_stats {r}')
        return r
def test_islice():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(7, mapping)

    for start in range(30):
        for stop in range(30):
            assert list(temp.islice(start, stop)) == list(string.ascii_lowercase[start:stop])
def test_stress(repeat=1000):
    sdict = SortedDict((val, -val) for val in range(1000))

    for rpt in range(repeat):
        action = random.choice(actions)
        action(sdict)

        try:
            sdict._check()
        except AssertionError:
            print(action)
            raise

        start_len = len(sdict)

        while len(sdict) < 500:
            key = random.randrange(0, 2000)
            sdict[key] = -key

        while len(sdict) > 2000:
            key = random.randrange(0, 2000)
            if key in sdict:
                del sdict[key]

        if start_len != len(sdict):
            sdict._check()
def test_irange_key():
    temp = SortedDict(modulo, 7, ((val, val) for val in range(100)))
    values = sorted(range(100), key=modulo)

    for start in range(10):
        for stop in range(start, 10):
            result = list(temp.irange_key(start, stop))
            assert result == values[(start * 10):((stop + 1) * 10)]
예제 #20
0
 def __init__(self):
     self.median = 0
     self.highMarker = TxGraph.WINDOW_SIZE
     self.lowMarker = 1
     self.txMap = SortedDict() #sorted by unix epoch (timestamp)
     self.edgeMap = SortedDict() #sorted by edge name
     self.nodeMap = SortedDict() #sorted by node name
     self.degreeList = SortedList() #sorted by degreeList
def test_update():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict()
    temp.update()
    temp.update(mapping)
    temp.update(dict(mapping))
    temp.update(mapping[5:7])
    assert list(temp.items()) == mapping
예제 #22
0
 def __init__(self, product_id='BTC-USD', log_to=None):
     super(OrderBook, self).__init__(products=product_id)
     self._asks = SortedDict()
     self._bids = SortedDict()
     self._client = PublicClient()
     self._sequence = -1
     self._log_to = log_to
     if self._log_to:
         assert hasattr(self._log_to, 'write')
     self._current_ticker = None
예제 #23
0
파일: itertools2.py 프로젝트: goulu/Goulib
def occurences(iterable):
    """ count number of occurences of each item in a finite iterable
    
    :param iterable: finite iterable
    :return: dict of int count indexed by item
    """
    from sortedcontainers import SortedDict
    occur=SortedDict()
    for x in iterable:
        occur[x]=occur.get(x, 0) + 1
    return occur
예제 #24
0
def arrayRDP(arr, epsilon=0.0, n=None):
    """
    This is a slightly modified version of the _aRDP function, that accepts
    as arguments the tolerance in the distance and the maximum number of points
    the algorithm can select.
    **Note:** The results of this algoritm should be identical to the arrayRDP
    function if the *n* parameter is not specified. In that case, the
    performance is slightly worse, although the asymptotic complexity is the
    same. For this reason, this function internally delegates the solution in
    that function if the *n* parameter is missing.

    Parameters
    ----------
    arr:
        Array of values of consecutive points.
    epsilon:
        Maximum difference allowed in the simplification process.
    n:
        Maximum number of points of the resulted simplificated array.

    Returns
    -------
    out:
        Array of indices of the selected points.
    """
    if n is None:
        return _aRDP(arr, epsilon)
    if epsilon <= 0.0:
        raise ValueError('Epsilon must be > 0.0')
    n = n or len(arr)
    if n < 3:
        return arr
    fragments = SortedDict()
    #We store the distances as negative values due to the default order of
    #sorteddict
    dist, idx = max_vdist(arr, 0, len(arr) - 1)
    fragments[(-dist, idx)] = (0, len(arr) - 1)
    while len(fragments) < n-1:
        (dist, idx), (first, last) = fragments.popitem(last=False)
        if -dist <= epsilon:
            #We have to put again the last item to prevent loss
            fragments[(dist, idx)] = (first, last)
            break
        else:
            #We have to break the fragment in the selected index
            dist, newidx = max_vdist(arr, first, idx)
            fragments[(-dist, newidx)] = (first, idx)
            dist, newidx = max_vdist(arr, idx, last)
            fragments[(-dist, newidx)] = (idx, last)
    #Now we have to get all the indices in the keys of the fragments in order.
    result = SortedList(i[0] for i in fragments.itervalues())
    result.add(len(arr) - 1)
    return np.array(result)
class TwoDimensionTable:
    def __init__(self):
        self.__rows = SortedDict()

    @classmethod
    def from_json(cls, input_file):
        with open(input_file, 'r') as json_f:
            json_data = json.load(json_f)

        table = TwoDimensionTable()

        for d in json_data:
            speedup = d['speedup_mine_over_sol']
            input_size = d['input_size']
            proc_count = d['proc_count']
            table.add(row=proc_count, col=input_size, val=speedup)

        return table

    def add(self, row, col, val):
        if row not in self.__rows:
            self.__add_new_row(row)

        for r, c in self.__rows.items():
            if r == row:
                c[col] = val
            elif col not in c:
                c[col] = None

    def row_items(self):
        return self.__rows.items()

    def __add_new_row(self, row_name):
        self.__rows[row_name] = SortedDict()
        for r, c in self.__rows.items():
            if r is not row_name:
                for c_title, _ in c.items():
                    self.__rows[row_name][c_title] = None
                break

    def __repr__(self):
        return self.__str__()

    def __str__(self):
        table = ['rows ' + u'\u2193' + ' columns ' + u'\u2192']
        for r_title, col in self.row_items():
            row = [str(r_title), '->']
            for c_title, val in col.items():
                row.append('({0}, {1})'.format(c_title, val))

            table.append(' '.join(row))

        return '\n'.join(table)
예제 #26
0
def compute_pagerank(urls, inlinks, outlinks, b=.85, iters=20):
    """ Return a dictionary mapping each url to its PageRank.
    The formula is R(u) = (1/N)(1-b) + b * (sum_{w in B_u} R(w) / (|F_w|)

    Initialize all scores to 1.0.

    Params:
      urls.......SortedList of urls (names)
      inlinks....SortedDict mapping url to list of in links (backlinks)
      outlinks...Sorteddict mapping url to list of outlinks
    Returns:
      A SortedDict mapping url to its final PageRank value (float)

    >>> urls = SortedList(['a', 'b', 'c'])
    >>> inlinks = SortedDict({'a': ['c'], 'b': set(['a']), 'c': set(['a', 'b'])})
    >>> outlinks = SortedDict({'a': ['b', 'c'], 'b': set(['c']), 'c': set(['a'])})
    >>> sorted(compute_pagerank(urls, inlinks, outlinks, b=.5, iters=0).items())
    [('a', 1.0), ('b', 1.0), ('c', 1.0)]
    >>> iter1 = compute_pagerank(urls, inlinks, outlinks, b=.5, iters=1)
    >>> iter1['a']  # doctest:+ELLIPSIS
    0.6666...
    >>> iter1['b']  # doctest:+ELLIPSIS
    0.333...
    """
    ###TODO
    
    """
    R(u) = (1 - b)/N + b * sum( inlinks of u/outlink-number)
    
    """
        
    Ru = SortedDict()
    Rv = SortedDict()
    
    size = len(urls)
    
    # Initialize to 1.0 all URL's
    for k in urls:
        Ru.setdefault(k, 1.0)
    
    # Page Rank definition
    for i in range(iters):
        
        for url in urls:
        	
            try:
                Ru[url] = ((1-b)/size) + b * sum([Ru[x]/len(outlinks[x]) for x in inlinks[url] if len(outlinks[x])])
            except:
                pass
    
    return Ru
def test_setitem():
    temp = SortedDict()

    for pos, key in enumerate(string.ascii_lowercase):
        temp[key] = pos
        temp._check()

    assert len(temp) == 26

    for pos, key in enumerate(string.ascii_lowercase):
        temp[key] = pos
        temp._check()

    assert len(temp) == 26
예제 #28
0
def createTermIndex():
    sortTepDic = SortedDict()
    # Structure for each term
    #   sortTepDic['term']=({'DocId1':['Pos1','Pos2'],'DocId2':['Pos1','Pos2']},'termFreq','DocFreq')

    for root, dirs, files in os.walk(Contants.DATA_DIRECTORY_NAME, topdown=True):
        for name in files:
            file_name = os.path.join(root, name)
            #         'r' when the file will only be read
            #         'w' for only writing (an existing file with the same name will be erased)
            #         'a' opens the file for appending; any data written to the file is automatically added to the end.
            #         'r+' opens the file for both reading and writing.

            mode = "r"
            file_object = open(file_name, mode)
            DocId = os.path.split(file_name)[1]

            wordPos = 0
            for word in file_object.read().split():

                wordPos = wordPos + 1  # increment word location
                lamma = applyFilters(word)

                if lamma:
                    if lamma not in sortTepDic:
                        sortTepDic[lamma] = [{DocId: [wordPos]}, 1, 1]  # add a new term

                    else:

                        sortTepDic[lamma][1] = sortTepDic[lamma][1] + 1  # increment the term frequency

                        if DocId in sortTepDic[lamma][0]:
                            sortTepDic[lamma][0][DocId].append(
                                wordPos
                            )  # add new word position for the existing document
                        else:
                            sortTepDic[lamma][0][DocId] = [wordPos]  # add a new document ID and he word position
                            sortTepDic[lamma][2] = sortTepDic[lamma][2] + 1  # increment the document frequecy

    # covert lists to tuples
    for key in sortTepDic.keys():
        for DocId in sortTepDic[key][0]:
            sortTepDic[key][0][DocId] = tuple(sortTepDic[key][0][DocId])
        sortTepDic[key] = tuple(sortTepDic[key])

    Data.write_dataStruct_to_file(Contants.WORD_INDEX_FILE_NAME, sortTepDic)
    createLexicon(sortTepDic)
    createPostingList(sortTepDic)
예제 #29
0
 def __init__(self):
     self.price_map = SortedDict() # Dictionary containing price : OrderList object
     self.prices = self.price_map.keys()
     self.order_map = {} # Dictionary containing order_id : Order object
     self.volume = 0 # Contains total quantity from all Orders in tree
     self.num_orders = 0 # Contains count of Orders in tree
     self.depth = 0 # Number of different prices in tree (http://en.wikipedia.org/wiki/Order_book_(trading)#Book_depth)
예제 #30
0
class ProductReport(object):

    """Read overview page of one job group and generate a report for the product."""

    def __init__(self, browser, job_group_url, root_url, args):
        """Construct a product report object with options."""
        self.args = args
        self.job_group_url = job_group_url
        self.group = job_group_url.split('/')[-1]
        current_url, previous_url = get_build_urls_to_compare(browser, job_group_url, args.builds, args.against_reviewed, args.running_threshold)
        # read last finished
        current_details = browser.get_soup(current_url)
        previous_details = browser.get_soup(previous_url)
        for details in current_details, previous_details:
            assert sum(int(badge.text) for badge in details.find_all(class_='badge')) > 0, \
                "invalid page with no test results found, make sure you specified valid builds (leading zero missing?)"
        current_summary = parse_summary(current_details)
        previous_summary = parse_summary(previous_details)

        changes = {k: v - previous_summary.get(k, 0) for k, v in iteritems(current_summary) if k != 'none' and k != 'incomplete'}
        log.info("Changes since last build:\n\t%s" % '\n\t'.join("%s: %s" % (k, v) for k, v in iteritems(changes)))

        self.build = get_build_nr(current_url)
        self.ref_build = get_build_nr(previous_url)

        # for each architecture iterate over all
        cur_archs, prev_archs = (set(arch.text for arch in details.find_all('th', id=re.compile('flavor_'))) for details in [current_details, previous_details])
        archs = cur_archs
        if args.arch:
            assert args.arch in cur_archs, "Selected arch {} was not found in test results {}".format(args.arch, cur_archs)
            archs = [args.arch]
        self.missing_archs = sorted(prev_archs - cur_archs)
        if self.missing_archs:
            log.info("%s missing completely from current run: %s" %
                     (pluralize(len(self.missing_archs), "architecture is", "architectures are"), ', '.join(self.missing_archs)))

        # create arch reports
        self.reports = SortedDict()
        progress_browser = progress_browser_factory(args) if args.query_issue_status else None
        bugzilla_browser = bugzilla_browser_factory(args) if args.query_issue_status else None
        for arch in sorted(archs):
            results = get_arch_state_results(arch, current_details, previous_details, args.output_state_results)
            self.reports[arch] = ArchReport(arch, results, args, root_url, progress_browser, bugzilla_browser, browser)

    def __str__(self):
        """Return report for product."""
        now_str = datetime.datetime.now().strftime('%Y-%m-%d - %H:%M')
        missing_archs_str = '\n * **Missing architectures**: %s' % ', '.join(self.missing_archs) if self.missing_archs else ''

        build_str = self.build
        if self.args.verbose_test and self.args.verbose_test > 1:
            build_str += ' (reference %s)' % self.ref_build

        openqa_review_report_product = openqa_review_report_product_template.substitute({
            'now': now_str,
            'build': build_str,
            'common_issues': common_issues(missing_archs_str, self.args.show_empty),
            'arch_report': '<hr>'.join(map(str, self.reports.values()))
        })
        return openqa_review_report_product
예제 #31
0
def calculateTrust():
    if request.headers['Content-Type'] == 'application/json':
        d = json.dumps(request.json)
    d = json.loads(d)
    vector = []
    n = d['nodes']  #getting the number of nodes

    for i in range(0, n):
        temp = np.random.randint(0, 10, n)
        vector.append(temp)
    cosinedictionary = {}

    for i in range(0, n + 1):
        for j in range(i + i, n):
            res = np.dot(vector[i], vector[j])
            cosinedictionary[res] = [i, j]

    #print(cosinedictionary)

    numberofuntrystablenodes = np.random.randint(1, n - (n % 3))
    trustfactor = {}

    nodecombinationdict = {}

    def remove_duplicatenodecombination():
        for key, value in cosinedictionary.items():
            if value not in nodecombinationdict.values():
                nodecombinationdict[key] = value

    remove_duplicatenodecombination()

    def calculate_similarity():
        for i in range(0, n):
            sum = 0
            for key, value in nodecombinationdict.items():
                if int(value[0]) == i or int(value[1] == i):
                    sum = sum + key

            trustfactor[sum] = i

    calculate_similarity()

    result = SortedDict(trustfactor)

    factorsum = 0
    for key, value in result.items():
        factorsum = factorsum + key

    #calculating the final trust factor of all the nodes and printing it
    trustvaluedictres = {}
    for key, value in result.items():
        trustvaluedictres[key / factorsum] = value

    print(trustvaluedictres)

    count = 0
    l = []
    for key, value in trustvaluedictres.items():
        if count != numberofuntrystablenodes:
            print("untrustable node ", value, " Trustfactor :", key)
            a = "untrustable node " + str(value) + " Trustfactor :" + str(key)
            l.append(a)
            count = count + 1
        else:
            break
    return jsonify(l)
예제 #32
0
class OrderBook:
    def __init__(self, enable_qos=True, enable_statistics=True):
        self.bid_order_book = SortedDict()
        self.ask_order_book = SortedDict()
        self.snapshot_received = False
        self._cond_len = 1000
        self._mid_price_cond = deque(maxlen=self._cond_len)
        self._bid_ask_cond = deque(maxlen=self._cond_len)
        self.best_adjusted_bid = None
        self.best_adjusted_ask = None
        self.mid_price = None
        self.qos = 1.0  # 1.0: perfect synchronised stream, <0.9 degraded stream.
        self.ups = NumUpdatesPerSeconds()
        self.enable_qos = enable_qos
        self.enable_statistics = enable_statistics

    @property
    def best_bid(self):
        if self.best_adjusted_bid is not None:
            return self.best_adjusted_bid
        try:
            i = -1
            while True:
                val = self.bid_order_book.peekitem(i)
                if val[1] != 0:
                    return val[0]
                i -= 1
        except IndexError:
            return None

    @property
    def best_ask(self):
        if self.best_adjusted_ask is not None:
            return self.best_adjusted_ask
        try:
            i = 0
            while True:
                val = self.ask_order_book.peekitem(i)
                if val[1] != 0:
                    return val[0]
                i += 1
        except IndexError:
            return None

    def updates_per_second(self):
        return self.ups.rate

    def liquidity_for(self, quantity: float):
        if not self.snapshot_received:
            return 0, 0, 0, 0
        bid_total_value = 0
        bid_total_quantity = 0
        level_price = 0
        for i in range(-1, -len(self.bid_order_book), -1):
            level_price, liquidity = self.bid_order_book.peekitem(i)
            if bid_total_quantity < quantity:
                bid_total_value += level_price * liquidity
                bid_total_quantity += liquidity
            else:
                break
        bid_average_price = int(bid_total_value / bid_total_quantity)
        bid_lowest_price = level_price

        ask_total_value = 0
        ask_total_quantity = 0
        for i in range(len(self.ask_order_book)):
            level_price, liquidity = self.ask_order_book.peekitem(i)
            if ask_total_quantity < quantity:
                ask_total_value += level_price * liquidity
                ask_total_quantity += liquidity
            else:
                break
        ask_average_price = int(ask_total_value / ask_total_quantity)
        ask_highest_price = level_price
        return bid_average_price, ask_average_price, bid_lowest_price, ask_highest_price

    def snapshot_update(self, snapshot):
        if self.enable_statistics:
            self.ups.count()
        self.best_adjusted_bid = None
        self.best_adjusted_ask = None
        self.mid_price = snapshot['mid_price']
        self.bid_order_book = SortedDict()
        self.ask_order_book = SortedDict()
        for bid in snapshot['bids']:
            self._single_book_update(bid['price'], bid['size'], is_bid=True)
        for ask in snapshot['asks']:
            self._single_book_update(ask['price'], ask['size'], is_bid=False)
        assert self.best_bid <= snapshot['mid_price'] <= self.best_ask
        self.snapshot_received = True

    def _single_book_update(self, price, size, is_bid=True):
        if self.enable_statistics:
            self.ups.count()
        price = int(price)
        book = self.bid_order_book if is_bid else self.ask_order_book
        book[price] = size

    def book_update(self, update: dict):
        self.best_adjusted_bid = None
        self.best_adjusted_ask = None
        self.mid_price = update['mid_price']
        for bid in update['bids']:
            self._single_book_update(bid['price'], bid['size'], is_bid=True)
        for ask in update['asks']:
            self._single_book_update(ask['price'], ask['size'], is_bid=False)

        if self.enable_qos:
            self._mid_price_cond.append(
                self.best_bid <= update["mid_price"] <= self.best_ask)
            self._bid_ask_cond.append(self.best_bid <= self.best_ask)

        # It should never happen in practice.
        # But sometimes the messages don't arrive sequentially.
        if self.best_bid >= update["mid_price"]:
            self.best_adjusted_bid = update["mid_price"] - 1
        if update["mid_price"] >= self.best_ask:
            self.best_adjusted_ask = update["mid_price"] + 1
        if self.best_bid >= self.best_ask:
            self.best_adjusted_bid = update["mid_price"] - 1
        if self.best_ask <= self.best_bid:
            self.best_adjusted_ask = update["mid_price"] + 1
        assert self.best_bid < self.best_ask
        assert self.best_bid <= update["mid_price"] <= self.best_ask
        if self.enable_qos and len(self._mid_price_cond) == self._cond_len:
            self.qos = (0.5 * np.mean(self._bid_ask_cond) +
                        0.5 * np.mean(self._mid_price_cond))
예제 #33
0
파일: utils.py 프로젝트: mohamad007/dswd
def loadjson(filename):
	with open(filename + '.json') as f:
		data = json.load(f)
		if filename == "suggestion" or filename == 'todo':
			data = SortedDict(data)
	return data
예제 #34
0
write_input(sys.argv[1], sys.argv[2])


def print_menu():
    """This script is used to stores user's favorite NHL teams. The user
	passes in their name and favorite team, which then get stored in a dictionary
	that can be used for analysis of the entries."""
    print("1. Print Fans")
    print("2. Add Fan")
    print("3. View Team Fan Counts")
    print("4. Graph Team Fan Counts")
    print("5. Quit")


# Create dictionary with key = Names, value = user_name
nhlpicks = SortedDict()
nhlpicks['Tristan'] = 'San Jose Sharks'
nhlpicks['Daniel'] = 'Colorado Avalanche'
nhlpicks['Robby'] = 'San Jose Sharks'
nhlpicks['Robb'] = 'Colorado Avalanche'
nhlpicks['CJ'] = 'Pittsburgh Penguins'
nhlpicks['Katie'] = 'Chicago Blackhawks'

nhlteams = [
    "Carolina Hurricanes", "Columbus Blue Jackets", "Colorado Avalanche",
    "San Jose Sharks", "New Jersey Devils", "New York Islanders",
    "New York Rangers", "Philadelphia Flyers", "Pittsburgh Penguins",
    "Washington Capitals", "Boston Bruins", "Buffalo Sabres",
    "Detroit Red Wings", "Florida Panthers", "Montreal Canadiens",
    "Ottawa Senators", "Tampa Bay Lightning", "Toronto Maple Leafs",
    "Chicago Blackhawks", "Dallas Stars", "Minnesota Wild",
예제 #35
0
class Numbers:
    def __init__(self, reddit, settings) -> None:
        """
        This class deals with the key number handling.
        """
        self.reddit = reddit
        self.settings = settings

        # Define the numbers container (this is number to username).
        # This is also using an automatically sorted dictionary for efficiency.
        self.numbers = SortedDict({})

        # Load the numbers from Reddit.
        self.load_numbers()

        # Handle the current max number.
        self.current_max_number = 0
        self.set_max_number()

        # Load the subclasses.
        self.search = self.search(self)
        self.generation = self.generation(self)
        self.assignment = self.assignment(self)

        self.checks = NumberChecks()

    def load_numbers(self) -> None:
        """
        Loads the numbers from Reddit.
        """
        for flair in self.reddit.subreddit(
                self.settings.reddit.subreddit).flair(limit=None):
            try:
                number = int("".join([
                    char for char in flair["flair_text"].lower().lstrip("#")
                    if char.isnumeric() or char == "-"
                ]))
                self.numbers[number] = flair["user"].name
            except Exception:
                pass
        print(f"Loaded {len(self.numbers)} numbers.")

    def set_max_number(self) -> None:
        """
        Sets the current max number based on the configuration.
        """
        if self.settings.reddit.assignment.numbers["static_max"] is False:
            numbers_assigned = len(list(self.numbers.keys()))
            self.current_max_number = numbers_assigned + len(
                self.settings.reddit.assignment.numbers["blacklist"]
            ) + self.settings.reddit.assignment.numbers["max"]
        else:
            self.current_max_number = self.settings.reddit.assignment.numbers[
                "max"]
        print(
            f"Set max number to {self.settings.reddit.assignment.numbers['max']}"
        )

    class search:
        def __init__(self, parent) -> None:
            self.parent = parent

        def num_to_user(self, number) -> str:
            """
            Gets the username of the user who has a specific number (if any).
            :param number: The number to search for.
            :return: The user who has it or None.
            """
            try:
                return self.parent.numbers[number]
            except Exception:
                return None

        def user_to_num(self, username) -> int:
            """
            Gets the number of a specific user.
            :param username: The username to find the number of.
            :return: The number of that user (or None if they don't have one).
            """
            try:
                return int(
                    list(self.parent.numbers.keys())[[
                        user_with_number.lower()
                        for user_with_number in self.parent.numbers.values()
                    ].index(username.lower())])
            except Exception:
                return None

    class generation:
        def __init__(self, parent) -> None:
            self.parent = parent

        def get_random_number(self) -> int:
            """
            Generates a randomly avaliable number.
            :return: The random number.
            """
            random_number = randint(
                self.parent.settings.reddit.assignment.numbers["min"],
                self.parent.current_max_number)
            if random_number in (
                    list(self.parent.numbers.keys()) +
                    self.parent.settings.reddit.assignment.blacklisted_numbers
            ):
                return self.get_random_number()
            else:
                return random_number

        def get_random_user(self) -> tuple:
            """
            Gets a user with a number.
            :return: A random user and their number.
            """
            return choice(list(self.parent.numbers.items()))

    class assignment:
        def __init__(self, parent) -> None:
            self.parent = parent

        def assign_number(self, username: str, number: int = None) -> int:
            """
            Assign a number to a user.
            :param number: Optional. The number to assign the user.
            :return: The number assigned.
            """
            number = self.parent.generation.get_random_number(
            ) if number is None else number

            # Remove a previous number (if the user had one)
            old_number = self.parent.search.user_to_num(username)
            if old_number is not None:
                del self.parent.numbers[old_number]

            # Assign the user a flair.
            self.parent.reddit.subreddit(
                self.parent.settings.reddit.subreddit
            ).flair.set(
                username,
                self.parent.settings.reddit.assignment.flair["text"].format(
                    number),
                flair_template_id=(
                    self.parent.settings.reddit.assignment.flair["template_id"]
                    if
                    self.parent.settings.reddit.assignment.flair["template_id"]
                    else None),
            )

            # Add the user to the numbers dictionary.
            self.parent.numbers[number] = username

            # Approve on the relevant subreddits.
            self.approve_number_subreddits(username, number)

            # Increase the max possible number.
            self.parent.current_max_number += 1

            # Print a success message.
            print(
                f"Succesfully set a user's number. (u/{username} as #{number})"
            )

            return number

        def approve_number_subreddits(self, username: str,
                                      number: int) -> None:
            """
            Approves a user on all the subreddits relevant to their number.
            """
            subreddits = [f"NUM{get_number_nation(number)}"]
            if self.parent.checks.is_descendant_of_3(number):
                subreddits.append("descendantsof3")
            if self.parent.checks.is_seven_seas(number):
                subreddits.append("SevenSeasFaction")
            if self.parent.checks.is_prime_number(number):
                subreddits.append("the_primes")

            # Approve the user on the subreddits.
            for subreddit in subreddits:
                try:
                    self.parent.reddit.subreddit(subreddit).contributor.add(
                        username)
                except Exception:
                    pass

    @property
    def statistics(self) -> dict:
        """
        Gets some statistics on the currently assigned numbers.
        :return: The calculated statistics.
        """
        number_list = self.numbers.keys()
        stats = {
            "numbers_given": 0,  # Amount of Numbers Given
            "sum_of_numbers": 0,  # The sum of all the numbers.
            "lowest_positive": 0,  # The lowest positive number.
            "mean": 0,  # The mean of all the numbers.
            "median": 0,  # The median of all the numbers.
            "evens": 0,  # The amount of even numbers.
            "odds": 0,  # The amount of odd numbers.
            "below_500": 0,  # The amount of numbers below 500.
            "below_1000": 0,  # The amount of numbers below below 1000.
            "below_2500": 0,  # The amount of numbers below 2500.
        }

        # Find the lowest positive number.
        for number in number_list:
            if number >= 1:
                stats["lowest_positive"] = number
                break

        # Gather some statistics by looking through the numbers.
        for number in number_list:
            # Odd or Even
            if number % 2 == 0:
                stats["evens"] += 1
            else:
                stats["odds"] += 1

            # Below 500, 1000 or 2500
            if number <= 500:
                stats["below_500"] += 1
            if number <= 1000:
                stats["below_1000"] += 1
            if number <= 2500:
                stats["below_2500"] += 1

        # Work out these statistics.
        stats["numbers_given"] = len(number_list)
        stats["sum"] = sum(number_list)
        stats["mean"] = stats["sum"] / stats["numbers_given"]
        stats["median"] = median(number_list)

        # Switch these to percentages.
        stats["below_500"] = "{0:.2f}%".format(
            float((stats["below_500"] / stats["numbers_given"]) * 100))
        stats["below_1000"] = "{0:.2f}%".format(
            float((stats["below_1000"] / stats["numbers_given"]) * 100))
        stats["below_2500"] = "{0:.2f}%".format(
            float((stats["below_2500"] / stats["numbers_given"]) * 100))

        # Get the highest and lowest numbers.
        stats["highest"] = number_list[-1]
        stats["lowest"] = number_list[0]

        # Format these stats for the message.
        stats["highest_info"] = "#{number} (u/{username})".format(
            number=stats["highest"],
            username=self.search.num_to_user(stats["highest"]),
        )
        stats["lowest_positive_info"] = "#{number} (u/{username})".format(
            number=stats["lowest_positive"],
            username=self.search.num_to_user(stats["lowest_positive"]),
        )
        stats["lowest_info"] = "#{number} (u/{username})".format(
            number=stats["lowest"],
            username=self.search.num_to_user(stats["lowest"]),
        )

        return stats

    def __str__(self) -> str:
        return str(self.numbers)

    def __repr__(self) -> SortedDict:
        return self.numbers
예제 #36
0
    def entrenamiento(self, dataset, datosTrain):

        # Cargamos todos los datos de la clase del dataset desde la matriz de datos
        clasesTrain = dataset.extraeDatos(datosTrain)
        self.numClases = clasesTrain[:, -1]

        # Contamos las apariciones de cada uno para luego calcular la probabilidad a priori de cada clase
        counter = Counter(self.numClases)

        # Calculamos la probabilidad de la clase y lo metemos en un diccionario ordenado segun el numero
        # correspondiente a cada clase asignado en el diccionario
        self.dictPrioris = {}
        for k in counter:
            k = int(k)
            counter[k] = counter[k] / len(self.numClases)
            self.dictPrioris[k] = counter[k]

        # Aqui ordenamos el diccionario para que esten en el mismo orden de como extraemos los datos del dataset
        self.dictPrioris = SortedDict(self.dictPrioris)

        # Calcular tablas de probabilidades del entrenamiento. Tenemos que calcular por cada atributo una cuenta
        # de las apariciones en cada clase
        # Creamos una lista de matrices, donde vamos almacenar todos los datos que hemos obtenido en los datos de Test
        self.posteriori = np.zeros(len(dataset.nombreAtributos) - 1,
                                   dtype=object)

        # Recorremos todos los datos de la matriz sin llegar a la clase
        for i in range(len(dataset.nombreAtributos) - 1):

            # Si el dato que obtenemos es Nominal haremos el recuento de todas las veces que sale la P(D|H)
            if dataset.nominalAtributos[i] == True:

                # Creamos una matriz de tamaño X: Número de Atributos menos la clase Y: Número de clases
                post = np.zeros(
                    (len(dataset.listaDicts[i]), len(dataset.listaDicts[-1])))

                # Aqui contamos todos las datos que queremos del datos Train para construir la matriz de entrenamiento
                for c in range(len(dataset.listaDicts[-1])):
                    datosEnt = dataset.extraeDatos(datosTrain)
                    dat = datosEnt[:, i]
                    repes = Counter(dat[datosEnt[:, -1] == c])
                    for r in repes:
                        post[int(r), c] = repes[r]
                    if self.laplace == True:
                        self.posteriori[i] = post + 1
                    else:
                        self.posteriori[i] = post

            # Si el dato es Continuo obtendremos la media y la desviación tipica de la clase
            else:

                # Creamos una matriz de X: Los datos de Media y Desivación típica Y: Número de clases
                post = np.zeros((2, len(dataset.listaDicts[-1])))

                # Aqui obtenemos la media y desviación tipica de cada clase, despues de tener los datos de entrenamiento
                for c in range(len(dataset.listaDicts[-1])):
                    datosEnt = dataset.extraeDatos(datosTrain)
                    dat = datosEnt[:, i]
                    datos = dat[datosEnt[:, -1] == c]
                    post[0][c] = np.mean(datos)
                    post[1][c] = np.std(datos)
                self.posteriori[i] = post

        # Calculamos los valores de los posteriori de todos las tablas anteriores
        for i in range(len(dataset.listaDicts) - 1):
            if dataset.nominalAtributos[i] == True:
                self.posteriori[i] /= sum(self.posteriori[i])
예제 #37
0
def similarity_matrix(theta=None, M_T=10, M_O=10):
        """
        Compute the similarity matrix for a corpus, given 
        the distribution of topics therein.

        Arguments:
                @theta: The document-topic distribution from LDA
                @M_T  : The number of topics to consider
                @M_O  : The number of other documents (opinions) to consider 
        Return:
                NxN sparse matrix in CSR format (see function body for
                explanation).
        Notes:
                This needs to be cleaned up bigtime, it's a waste.
        """

        LOG.info("Building similarity matrix M_T="+str(M_T)+" M_O="+str(M_O));

        # doc_id => M_T highest-probability topic_ids 
        DOC_TOPICS = {};

        # topic_id => M_O highest-probability doc_ids 
        TOPIC_DOCS = {};

        # For each document in the corpus 
        for doc_id in range(len(theta)): 

                topics = topicmodel.format_theta(theta[doc_id]);

                DOC_TOPICS[doc_id] = [];

                for i in range(len(topics)):

                        top_id = topics[i][0];
                        top_pr = topics[i][1];

                        #
                        # Build the collection of highest-probability
                        # documents for each topic.
                        #
                        if top_id not in TOPIC_DOCS:
                                TOPIC_DOCS[top_id] = SortedDict();
                                TOPIC_DOCS[top_id][top_pr] = doc_id;

                        # Don't bother attempting to insert if the probability
                        # is less than the lowest already in the collection.
                        elif top_pr >= TOPIC_DOCS[top_id].peekitem(0)[0]:

                                if top_pr not in TOPIC_DOCS[top_id]:
                                        TOPIC_DOCS[top_id][top_pr] = doc_id;
                                else:
                                        # If two docs have an equal probability 
                                        # of expressing the topic, then which 
                                        # should we favor? We can only choose a 
                                        # certain number. Ignore for now.
                                        LOG.warn("Equal probabilities.");

                                if len(TOPIC_DOCS[top_id]) > M_O:
                                        # Remember, dict is sorted, so this 
                                        # will only discard the least one.
                                        TOPIC_DOCS[top_id].popitem(last=False);
                                        
                        #
                        # Build the collection of highest-probability 
                        # topics for each document.
                        #
                        if i < M_O:
                                DOC_TOPICS[doc_id].append(top_id);

        #
        # Build this matrix thing to join docs to "similar" docs
        #
        MATRIX = {};

        for doc_id in DOC_TOPICS:

                MATRIX[doc_id] = [];

                for i in range(len(DOC_TOPICS[doc_id])):
                        topic_id = DOC_TOPICS[doc_id][i];

                        MATRIX[doc_id].append(TOPIC_DOCS[topic_id].values());

        #
        # Build dictionary to count occurrences. 
        #
        W = {};

        for doc_id_A in DOC_TOPICS:
                W[doc_id_A] = {};

                # Count occurrences of doc_id_B in matrix of doc_id_A 
                for i in range(len(MATRIX[doc_id_A])):
                        for j in range(len(MATRIX[doc_id_A][i])):

                                doc_id_B = MATRIX[doc_id_A][i][j];

                                if doc_id_B not in W[doc_id_A]:
                                        W[doc_id_A][doc_id_B] = 1;
                                else:
                                        W[doc_id_A][doc_id_B] += 1;
        #
        # Build the similarity matrix
        #
        # FIXME: Why dok?
        T_sim = scipy.sparse.dok_matrix((len(theta), len(theta)), dtype=numpy.float);

        for a in W:
                total = 0;
                for b in W[a]:
                        if W[a][b] > 0:
                                total += W[a][b];
                        
                for b in W[a]:
                        if W[a][b] > 0:
                                T_sim[a,b] = float(W[a][b])/total;
                
        return T_sim.tocsr();
# irange(minimum=None, maximum=None, inclusive=True, True, reverse=False)
rangeList = list(sl.irange(10, 14, inclusive=[True, False]))
print(rangeList)

print(sl.index(10))  # 3
# print(sl.index(-99)) # Throw Error

s2 = SortedList([1, 7, 7, 7, 7, 10, 11, 13, 14])
print(f"left most idx: {s2.bisect_left(7)}")
print(f"right most idx: {s2.bisect_right(7)}")
print(f"out of boundary < min, idx={s2.bisect_left(-100)}")
print(f"out of boundary > max, len={len(s2)}, idx={s2.bisect_left(100)}")

# %% SortedDict
sd = SortedDict()
sd["c"] = 3
sd["a"] = 1
sd["b"] = 2
del sd["a"]
print(sd)

sd.pop("c")  # return and remove
sd.popitem("b")  # return key-value pair and remove
print(sd)  # print {}

sd2 = SortedDict({1: "a", 2: "b", 4: "c"})
idx = sd2.bisect_left(3)
print(idx)
print(sd2.peekitem(index=idx))
 def __init__(self):
     # 两个TreeMap
     self.timeToPrice = SortedDict()  # 时间->价格,key已排序
     self.priceToTimes = SortedDict()  # 价格->Set<该价格的时间>,key已排序
예제 #40
0
 def __init__(self):
     self.mp = SortedDict()
예제 #41
0
class OrderBook(WebsocketClient):
    def __init__(self,
                 url="wss://ws-feed.pro.coinbase.com",
                 product_id='BTC-USD',
                 api_key="",
                 api_secret="",
                 api_passphrase="",
                 channels=None,
                 log_to=None):

        super(OrderBook, self).__init__(url=url, products=product_id,
            api_key=api_key, api_secret=api_secret,
            api_passphrase=api_passphrase, channels=channels)
        self._asks = SortedDict()
        self._bids = SortedDict()
        self._client = PublicClient()
        self._sequence = -1
        self._log_to = log_to
        if self._log_to:
            assert hasattr(self._log_to, 'write')
        self._current_ticker = None

    @property
    def product_id(self):
        ''' Currently OrderBook only supports a single product even though it is stored as a list of products. '''
        return self.products[0]

    def on_open(self):
        self._sequence = -1
        #print("-- Subscribed to OrderBook! --\n")

    def on_close(self):
        self.is_done = False
        #print("\n-- OrderBook Socket Closed! --")

    def reset_book(self):
        self._asks = SortedDict()
        self._bids = SortedDict()
        res = self._client.get_product_order_book(product_id=self.product_id, level=3)
        for bid in res['bids']:
            self.add({
                'id': bid[2],
                'side': 'buy',
                'price': Decimal(bid[0]),
                'size': Decimal(bid[1])
            })
        for ask in res['asks']:
            self.add({
                'id': ask[2],
                'side': 'sell',
                'price': Decimal(ask[0]),
                'size': Decimal(ask[1])
            })
        self._sequence = res['sequence']

    def on_message(self, message):
        if self._log_to:
            pickle.dump(message, self._log_to)

        sequence = message.get('sequence', -1)
        if self._sequence == -1:
            self.reset_book()
            return
        if sequence <= self._sequence:
            # ignore older messages (e.g. before order book initialization from getProductOrderBook)
            return
        elif sequence > self._sequence + 1:
            self.on_sequence_gap(self._sequence, sequence)
            return

        msg_type = message['type']
        if msg_type == 'open':
            self.add(message)
        elif msg_type == 'done' and 'price' in message:
            self.remove(message)
        elif msg_type == 'match':
            self.match(message)
            self._current_ticker = message
        elif msg_type == 'change':
            self.change(message)

        self._sequence = sequence

    def on_sequence_gap(self, gap_start, gap_end):
        self.reset_book()
        print('Error: messages missing ({} - {}). Re-initializing  book at sequence.'.format(
            gap_start, gap_end, self._sequence))


    def add(self, order):
        order = {
            'id': order.get('order_id') or order['id'],
            'side': order['side'],
            'price': Decimal(order['price']),
            'size': Decimal(order.get('size') or order['remaining_size'])
        }
        if order['side'] == 'buy':
            bids = self.get_bids(order['price'])
            if bids is None:
                bids = [order]
            else:
                bids.append(order)
            self.set_bids(order['price'], bids)
        else:
            asks = self.get_asks(order['price'])
            if asks is None:
                asks = [order]
            else:
                asks.append(order)
            self.set_asks(order['price'], asks)

    def remove(self, order):
        price = Decimal(order['price'])
        if order['side'] == 'buy':
            bids = self.get_bids(price)
            if bids is not None:
                bids = [o for o in bids if o['id'] != order['order_id']]
                if len(bids) > 0:
                    self.set_bids(price, bids)
                else:
                    self.remove_bids(price)
        else:
            asks = self.get_asks(price)
            if asks is not None:
                asks = [o for o in asks if o['id'] != order['order_id']]
                if len(asks) > 0:
                    self.set_asks(price, asks)
                else:
                    self.remove_asks(price)

    def match(self, order):
        size = Decimal(order['size'])
        price = Decimal(order['price'])

        if order['side'] == 'buy':
            bids = self.get_bids(price)
            if not bids:
                return
            assert bids[0]['id'] == order['maker_order_id']
            if bids[0]['size'] == size:
                self.set_bids(price, bids[1:])
            else:
                bids[0]['size'] -= size
                self.set_bids(price, bids)
        else:
            asks = self.get_asks(price)
            if not asks:
                return
            assert asks[0]['id'] == order['maker_order_id']
            if asks[0]['size'] == size:
                self.set_asks(price, asks[1:])
            else:
                asks[0]['size'] -= size
                self.set_asks(price, asks)

    def change(self, order):
        try:
            new_size = Decimal(order['new_size'])
        except KeyError:
            return

        try:
            price = Decimal(order['price'])
        except KeyError:
            return

        if order['side'] == 'buy':
            bids = self.get_bids(price)
            if bids is None or not any(o['id'] == order['order_id'] for o in bids):
                return
            index = [b['id'] for b in bids].index(order['order_id'])
            bids[index]['size'] = new_size
            self.set_bids(price, bids)
        else:
            asks = self.get_asks(price)
            if asks is None or not any(o['id'] == order['order_id'] for o in asks):
                return
            index = [a['id'] for a in asks].index(order['order_id'])
            asks[index]['size'] = new_size
            self.set_asks(price, asks)

        tree = self._asks if order['side'] == 'sell' else self._bids
        node = tree.get(price)

        if node is None or not any(o['id'] == order['order_id'] for o in node):
            return

    def get_current_ticker(self):
        return self._current_ticker

    def get_current_book(self):
        result = {
            'sequence': self._sequence,
            'asks': [],
            'bids': [],
        }
        for ask in self._asks:
            try:
                # There can be a race condition here, where a price point is removed
                # between these two ops
                this_ask = self._asks[ask]
            except KeyError:
                continue
            for order in this_ask:
                result['asks'].append([order['price'], order['size'], order['id']])
        for bid in self._bids:
            try:
                # There can be a race condition here, where a price point is removed
                # between these two ops
                this_bid = self._bids[bid]
            except KeyError:
                continue

            for order in this_bid:
                result['bids'].append([order['price'], order['size'], order['id']])
        return result

    def get_ask(self):
        return self._asks.peekitem(0)[0]

    def get_nasks(self, n):
        return [self.get_asks(self._asks.peekitem(n)[0]) for i in range(n+1)]

    def get_asks(self, price):
        return self._asks.get(price)

    def remove_asks(self, price):
        del self._asks[price]

    def set_asks(self, price, asks):
        self._asks[price] = asks

    def get_bid(self):
        return self._bids.peekitem(-1)[0]

    def get_nbids(self, n):
        return [self.get_bids(self._bids.peekitem(-(i+1))[0]) for i in range(n+1)]

    def get_bids(self, price):
        return self._bids.get(price)

    def remove_bids(self, price):
        del self._bids[price]

    def set_bids(self, price, bids):
        self._bids[price] = bids
예제 #42
0
파일: region_data.py 프로젝트: zyt755/angr
class RegionMap:
    """
    Mostly used in SimAbstractMemory, RegionMap stores a series of mappings between concrete memory address ranges and
    memory regions, like stack frames and heap regions.
    """
    def __init__(self, is_stack):
        """
        Constructor

        :param is_stack:    Whether this is a region map for stack frames or not. Different strategies apply for stack
                            regions.
        """
        self.is_stack = is_stack

        # A sorted list, which maps stack addresses to region IDs
        self._address_to_region_id = SortedDict()
        # A dict, which maps region IDs to memory address ranges
        self._region_id_to_address = {}

    #
    # Properties
    #

    def __repr__(self):
        return "RegionMap<%s>" % ("S" if self.is_stack else "H")

    @property
    def is_empty(self):
        return len(self._address_to_region_id) == 0

    @property
    def stack_base(self):
        if not self.is_stack:
            raise SimRegionMapError(
                'Calling "stack_base" on a non-stack region map.')

        return next(self._address_to_region_id.irange(reverse=True))

    @property
    def region_ids(self):
        return self._region_id_to_address.keys()

    #
    # Public methods
    #

    @SimStatePlugin.memo
    def copy(self, memo):  # pylint: disable=unused-argument
        r = RegionMap(is_stack=self.is_stack)

        # A shallow copy should be enough, since we never modify any RegionDescriptor object in-place
        r._address_to_region_id = self._address_to_region_id.copy()
        r._region_id_to_address = self._region_id_to_address.copy()

        return r

    def map(self, absolute_address, region_id, related_function_address=None):
        """
        Add a mapping between an absolute address and a region ID. If this is a stack region map, all stack regions
        beyond (lower than) this newly added regions will be discarded.

        :param absolute_address:            An absolute memory address.
        :param region_id:                   ID of the memory region.
        :param related_function_address:    A related function address, mostly used for stack regions.
        """

        if self.is_stack:
            # Sanity check
            if not region_id.startswith('stack_'):
                raise SimRegionMapError(
                    'Received a non-stack memory ID "%d" in a stack region map'
                    % region_id)

            # Remove all stack regions that are lower than the one to add
            while True:
                try:
                    addr = next(
                        self._address_to_region_id.irange(
                            maximum=absolute_address, reverse=True))
                    descriptor = self._address_to_region_id[addr]
                    # Remove this mapping
                    del self._address_to_region_id[addr]
                    # Remove this region ID from the other mapping
                    del self._region_id_to_address[descriptor.region_id]
                except StopIteration:
                    break

        else:
            if absolute_address in self._address_to_region_id:
                descriptor = self._address_to_region_id[absolute_address]
                # Remove this mapping
                del self._address_to_region_id[absolute_address]
                del self._region_id_to_address[descriptor.region_id]

        # Add this new region mapping
        desc = RegionDescriptor(
            region_id,
            absolute_address,
            related_function_address=related_function_address)

        self._address_to_region_id[absolute_address] = desc
        self._region_id_to_address[region_id] = desc

    def unmap_by_address(self, absolute_address):
        """
        Removes a mapping based on its absolute address.

        :param absolute_address: An absolute address
        """

        desc = self._address_to_region_id[absolute_address]
        del self._address_to_region_id[absolute_address]
        del self._region_id_to_address[desc.region_id]

    def absolutize(self, region_id, relative_address):
        """
        Convert a relative address in some memory region to an absolute address.

        :param region_id:           The memory region ID
        :param relative_address:    The relative memory offset in that memory region
        :return:                    An absolute address if converted, or an exception is raised when region id does not
                                    exist.
        """

        if region_id == 'global':
            # The global region always bases 0
            return relative_address

        if region_id not in self._region_id_to_address:
            raise SimRegionMapError('Non-existent region ID "%s"' % region_id)

        base_address = self._region_id_to_address[region_id].base_address
        return base_address + relative_address

    def relativize(self, absolute_address, target_region_id=None):
        """
        Convert an absolute address to the memory offset in a memory region.

        Note that if an address belongs to heap region is passed in to a stack region map, it will be converted to an
        offset included in the closest stack frame, and vice versa for passing a stack address to a heap region.
        Therefore you should only pass in address that belongs to the same category (stack or non-stack) of this region
        map.

        :param absolute_address:    An absolute memory address
        :return:                    A tuple of the closest region ID, the relative offset, and the related function
                                    address.
        """

        if target_region_id is None:
            if self.is_stack:
                # Get the base address of the stack frame it belongs to
                base_address = next(
                    self._address_to_region_id.irange(minimum=absolute_address,
                                                      reverse=False))

            else:
                try:
                    base_address = next(
                        self._address_to_region_id.irange(
                            maximum=absolute_address, reverse=True))

                except StopIteration:
                    # Not found. It belongs to the global region then.
                    return 'global', absolute_address, None

            descriptor = self._address_to_region_id[base_address]

        else:
            if target_region_id == 'global':
                # Just return the absolute address
                return 'global', absolute_address, None

            if target_region_id not in self._region_id_to_address:
                raise SimRegionMapError(
                    'Trying to relativize to a non-existent region "%s"' %
                    target_region_id)

            descriptor = self._region_id_to_address[target_region_id]
            base_address = descriptor.base_address

        return descriptor.region_id, absolute_address - base_address, descriptor.related_function_address
예제 #43
0
 def __init__(self):
     self._data = SortedDict()  # type: SortedDict[K, deque[V]]
예제 #44
0
    def cleaner(self):
        self.log.debug('Cleaner started')
        clean_flag = False
        store_file_list = None

        if int(self.cfg['cleaner_store_max_gb']) != 0:
            store_file_list = glob2.glob(
                os.path.join(self.cfg['cap_dir'], '**'))
            self.log.debug('Found files: %s' % store_file_list)

            store_file_total_size_bytes = 0
            for store_file in store_file_list:
                store_file_total_size_bytes += os.path.getsize(store_file)

            store_file_total_size_gigabytes = 1.0 * store_file_total_size_bytes / 1024 / 1024 / 1024
            self.log.debug('Store files size, Gb: %f' %
                           store_file_total_size_gigabytes)

            if store_file_total_size_gigabytes > float(
                    self.cfg['cleaner_store_max_gb']):
                self.log.info(
                    'Current store size / Configured max store size, Gb: %.3f/%.3f'
                    % (store_file_total_size_gigabytes,
                       self.cfg['cleaner_store_max_gb']))
                clean_flag = True

        if int(self.cfg['cleaner_store_keep_free_gb']) != 0:
            store_stat = os.statvfs(self.cfg['cap_dir'])
            store_free_gb = 1.0 * store_stat.f_bavail * store_stat.f_frsize / 1024 / 1024 / 1024
            self.log.debug('Store free space, Gb: %f' % store_free_gb)

            if store_free_gb < float(self.cfg['cleaner_store_keep_free_gb']):
                self.log.info(
                    'Current store free space / Configured keep store free space, Gb: %.3f/%.3f'
                    % (store_free_gb, self.cfg['cleaner_store_keep_free_gb']))
                clean_flag = True

        if clean_flag:
            self.log.info('Clean is active')

            if store_file_list is None:
                store_file_list = glob2.glob(
                    os.path.join(self.cfg['cap_dir'], '**'))

            store_file_list_sorted = SortedDict()
            for store_file in store_file_list:
                store_file_list_sorted.update(
                    {os.path.getmtime(store_file): store_file})

            self.log.debug(
                'Sorted files list (with last modification date): %s' %
                store_file_list_sorted)
            self.log.debug(
                'Sorted files list (by last modification date): %s' %
                store_file_list_sorted.values())

            removes = 0
            for file_name in store_file_list_sorted.values():
                if os.path.isfile(file_name):
                    file_size = os.path.getsize(file_name)
                    self.log.info('Remove file: ' + file_name)
                    os.remove(file_name)

                    if file_size > int(
                            self.cfg['cleaner_force_remove_file_less_bytes']):
                        removes += 1
                    else:
                        self.log.warn(
                            'Removed "%s" file with the "%s" bytes size' %
                            (file_name, file_size))

                    if removes == int(self.cfg['cleaner_max_removes_per_run']):
                        self.log.debug('Max removes reached: ' +
                                       self.cfg['cleaner_max_removes_per_run'])
                        break

        self.log.debug('Cleaner finished')
예제 #45
0
 def _clear(self):
     self.best_price = [MIN_PRICE, MAX_PRICE]
     self.price_levels = [SortedDict(), SortedDict()]
     self.time = 0
     self.events = []
예제 #46
0
def load_gtf(
	gtf_path
):

	# open gtf file
	try:
		gtf_file = open(gtf_path, 'rU')
	except IOError as err:
		print (
			'!error: '
			'load_gtf(): '
			'cannot open gtf file: '+gtf_path
		)
		print err
		return False

	# initialize dict data structure
	gtf = {}
	genes = {}
	try:
		while True:
			line = gtf_file.next().strip()
			line_elems = re.split(r'\t', line)
			contig = line_elems[0]
			start = int(line_elems[3])
			stop = int(line_elems[4])
			strand = line_elems[6]
			frame = int(line_elems[7])
			properties = line_elems[8]

			regex = re.compile(r"gene_id \"(.*?)\"; transcript_id \"(.*?)\"; exon_number \"(.*?)\";")
			matches = regex.search(properties).groups()
			gene_id = matches[0]
			transcript_id = matches[1]
			exon_number = matches[2]

			if contig not in gtf:
				gtf[contig] = SortedDict()

			gtf[contig][start] = {
				'stop': stop,
				'gene_id': gene_id,
				'transcript_id': transcript_id,
				'exon_number': exon_number,
				'frame': frame
			}

			# keep track of gene start/stop
			if gene_id not in genes:
				genes[gene_id] = {
					'start': start,
					'stop': stop,
					'contig': contig,
					'strand': strand
				}
			else:
				if start < genes[gene_id]['start']:
					genes[gene_id]['start'] = start
				if stop > genes[gene_id]['stop']:
					genes[gene_id]['stop'] = stop

	except(StopIteration):
		gtf_file.close()

	# create sorted dict for gene start/stop
	genes_sorted = {}
	for gene_id in genes:
		contig = genes[gene_id]['contig']
		if contig not in genes_sorted:
			genes_sorted[contig] = SortedDict()
		genes_sorted[contig][genes[gene_id]['start']] = {
			'gene_id': gene_id,
			'stop': genes[gene_id]['stop']
		}

	return (gtf, genes_sorted, genes)
예제 #47
0
    def test_on_l2_updates(self):
        client = BcexClient(symbols=["BTC-USD"])

        msg = {
            "seqnum":
            2,
            "event":
            "snapshot",
            "channel":
            "l2",
            "symbol":
            "BTC-USD",
            "bids": [
                {
                    "px": 8723.45,
                    "qty": 1.45,
                    "num": 2
                },
                {
                    "px": 8124.45,
                    "qty": 123.45,
                    "num": 1
                },
            ],
            "asks": [
                {
                    "px": 8730.0,
                    "qty": 1.55,
                    "num": 2
                },
                {
                    "px": 8904.45,
                    "qty": 13.66,
                    "num": 2
                },
            ],
        }
        client._on_l2_updates(msg)

        assert client.l2_book["BTC-USD"] == {
            "bids": SortedDict({
                8124.45: 123.45,
                8723.45: 1.45
            }),
            "asks": SortedDict({
                8730.0: 1.55,
                8904.45: 13.66
            }),
        }

        msg = {
            "seqnum": 3,
            "event": "updated",
            "channel": "l2",
            "symbol": "BTC-USD",
            "bids": [{
                "px": 8723.45,
                "qty": 1.1,
                "num": 1
            }],
            "asks": [],
        }

        client._on_l2_updates(msg)

        assert client.l2_book["BTC-USD"] == {
            "bids": SortedDict({
                8124.45: 123.45,
                8723.45: 1.1
            }),
            "asks": SortedDict({
                8730.0: 1.55,
                8904.45: 13.66
            }),
        }

        msg = {
            "seqnum": 3,
            "event": "updated",
            "channel": "l2",
            "symbol": "BTC-USD",
            "bids": [{
                "px": 8124.45,
                "qty": 0,
                "num": 1
            }],
            "asks": [],
        }

        client._on_l2_updates(msg)
        assert client.l2_book["BTC-USD"] == {
            "bids": SortedDict({8723.45: 1.1}),
            "asks": SortedDict({
                8730.0: 1.55,
                8904.45: 13.66
            }),
        }

        msg = {
            "seqnum": 2,
            "event": "snapshot",
            "channel": "l2",
            "symbol": "BTC-USD",
            "bids": [{
                "px": 8723.45,
                "qty": 1.45,
                "num": 2
            }],
            "asks": [{
                "px": 8730.0,
                "qty": 1.55,
                "num": 2
            }],
        }
        client._on_l2_updates(msg)

        assert client.l2_book["BTC-USD"] == {
            "bids": SortedDict({8723.45: 1.45}),
            "asks": SortedDict({8730.0: 1.55}),
        }
예제 #48
0
파일: overhead.py 프로젝트: CTSRD-CHERI/cmt
# realloc(81d360600<6390>, 512) = 81d3b9800<6391>   # 256 bytes, allocated 1 lines ago, prev 81d360300, gap 512, next 81d363800, gap 12544, pages [8508256-8508256], 0 fewer pages
# realloc(81d3b9800<6391>, 1024) = 81d3bbc00<6392>  # 512 bytes, allocated 1 lines ago, prev 81d3b8590, gap 4708, next 81d3ba200, gap 2048, pages [8508345-8508345], 1 fewer pages
# free(81d220b28<6306>)                             # 6 bytes, allocated 87 lines ago, prev 81d220b20, gap 1, next 81d220b30, gap 2, pages [8507936-8507936], 0 fewer pages
# free(81d3b83b0<6307>)                             # 9 bytes, allocated 87 lines ago, prev 81d3b83a0, gap 4, next 81d3b83c0, gap 7, pages [8508344-8508344], 0 fewer pages
# free(81d3b83c0<6308>)                             # 10 bytes, allocated 87 lines ago, prev 81d3b83a0, gap 20, next 81d3b83d0, gap 6, pages [8508344-8508344], 0 fewer pages
# free(81d3b83d0<6309>)                             # 16 bytes, allocated 87 lines ago, prev 81d3b83a0, gap 36, next 81d3b83e0, gap 0, pages [8508344-8508344], 0 fewer pages
# free(81d3bd100<6310>)                             # 18 bytes, allocated 87 lines ago, prev 81d3bd0e0, gap 0, next 81d3bd120, gap 14, pages [8508349-8508349], 0 fewer pages
# free(81d3bd120<6311>)                             # 17 bytes, allocated 87 lines ago, prev 81d3bd0e0, gap 32, next 81d3bd140, gap 15, pages [8508349-8508349], 0 fewer pages
#

import re
import sys
from sortedcontainers import SortedDict

# Line number of the malloc(), indexed by pointer.
allocated = SortedDict()

# Size of the malloc(), indexed by pointer.
sizes = SortedDict()


def parse_ptr(l):
    return int(re.split("[<>]+", l)[0], 16)


def atop(ptr):
    return ptr >> 12


def ptoa(page):
    return page << 12
예제 #49
0
class ClasificadorNaiveBayes(Clasificador):
    def __init__(self, laplace):
        self.laplace = laplace
        self.lista_fpr = []
        self.lista_tpr = []

    def entrenamiento(self, dataset, datosTrain):

        # Cargamos todos los datos de la clase del dataset desde la matriz de datos
        clasesTrain = dataset.extraeDatos(datosTrain)
        self.numClases = clasesTrain[:, -1]

        # Contamos las apariciones de cada uno para luego calcular la probabilidad a priori de cada clase
        counter = Counter(self.numClases)

        # Calculamos la probabilidad de la clase y lo metemos en un diccionario ordenado segun el numero
        # correspondiente a cada clase asignado en el diccionario
        self.dictPrioris = {}
        for k in counter:
            k = int(k)
            counter[k] = counter[k] / len(self.numClases)
            self.dictPrioris[k] = counter[k]

        # Aqui ordenamos el diccionario para que esten en el mismo orden de como extraemos los datos del dataset
        self.dictPrioris = SortedDict(self.dictPrioris)

        # Calcular tablas de probabilidades del entrenamiento. Tenemos que calcular por cada atributo una cuenta
        # de las apariciones en cada clase
        # Creamos una lista de matrices, donde vamos almacenar todos los datos que hemos obtenido en los datos de Test
        self.posteriori = np.zeros(len(dataset.nombreAtributos) - 1,
                                   dtype=object)

        # Recorremos todos los datos de la matriz sin llegar a la clase
        for i in range(len(dataset.nombreAtributos) - 1):

            # Si el dato que obtenemos es Nominal haremos el recuento de todas las veces que sale la P(D|H)
            if dataset.nominalAtributos[i] == True:

                # Creamos una matriz de tamaño X: Número de Atributos menos la clase Y: Número de clases
                post = np.zeros(
                    (len(dataset.listaDicts[i]), len(dataset.listaDicts[-1])))

                # Aqui contamos todos las datos que queremos del datos Train para construir la matriz de entrenamiento
                for c in range(len(dataset.listaDicts[-1])):
                    datosEnt = dataset.extraeDatos(datosTrain)
                    dat = datosEnt[:, i]
                    repes = Counter(dat[datosEnt[:, -1] == c])
                    for r in repes:
                        post[int(r), c] = repes[r]
                    if self.laplace == True:
                        self.posteriori[i] = post + 1
                    else:
                        self.posteriori[i] = post

            # Si el dato es Continuo obtendremos la media y la desviación tipica de la clase
            else:

                # Creamos una matriz de X: Los datos de Media y Desivación típica Y: Número de clases
                post = np.zeros((2, len(dataset.listaDicts[-1])))

                # Aqui obtenemos la media y desviación tipica de cada clase, despues de tener los datos de entrenamiento
                for c in range(len(dataset.listaDicts[-1])):
                    datosEnt = dataset.extraeDatos(datosTrain)
                    dat = datosEnt[:, i]
                    datos = dat[datosEnt[:, -1] == c]
                    post[0][c] = np.mean(datos)
                    post[1][c] = np.std(datos)
                self.posteriori[i] = post

        # Calculamos los valores de los posteriori de todos las tablas anteriores
        for i in range(len(dataset.listaDicts) - 1):
            if dataset.nominalAtributos[i] == True:
                self.posteriori[i] /= sum(self.posteriori[i])

    def clasifica(self, dataset, datosTest):
        acum_probs = 1
        self.prediccion = []
        datTest = dataset.extraeDatos(datosTest)

        # Ahora vamos a estudiar la probabilidad de la clase con los datos obtenidos en el entrenamiento
        # Recorremos todos las datos de la matriz de los datos Test
        for dato in datTest:
            mapa = []
            # Aqui obtenemos los prioris de cada clase para poder obtener la probabilidad de cada una
            for clase in range(len(self.dictPrioris)):
                listaVerosimilitudes = []
                # Aqui obtenemos cada valor posteriori de nuestro entrenamiento de los datos, es decir, P(D|H)
                for atributo in range(len(self.posteriori)):
                    if dataset.nominalAtributos[atributo] == True:
                        prob = self.posteriori[atributo][int(
                            dato[atributo])][clase]
                        listaVerosimilitudes.append(prob)

                    # Aqui obtenemos la probabilidad de los atibutos continuos
                    else:
                        # Hacemos la formula de la distribucion normal
                        exp1 = 1 / (self.posteriori[atributo][1][clase] *
                                    math.sqrt(2 * math.pi))
                        exp2 = np.power((dato[atributo] -
                                         self.posteriori[atributo][0][clase]),
                                        2)
                        exp3 = np.power(self.posteriori[atributo][1][clase], 2)
                        exp4 = exp2 / exp3
                        exp4 = math.exp((-1 / 2) * exp4)
                        prob = exp1 * exp4
                        listaVerosimilitudes.append(prob)

                for verosimilitud in listaVerosimilitudes:
                    acum_probs *= verosimilitud
                acum_probs *= self.dictPrioris.get(clase)
                mapa.append(acum_probs)
                acum_probs = 1

            # Aqui obtenemos la predicción de mayor probabilidad y la guardamos en nuestra lista de predicciones
            self.prediccion.append(np.argmax(mapa))

        # Devolvemos la lista con la predicción de nuestro clasifica
        return self.prediccion
예제 #50
0
 def __init__(self, name: str):
     self.name: str = name
     self.__collections: SortedDict[str, MyMongoCollection] = SortedDict()
예제 #51
0
 def __init__(self):
     self.sorted_map = SortedDict()
예제 #52
0
class google:
    def __init__(self):
        self.SCOPES = ['https://www.googleapis.com/auth/calendar']
        self.calendars = []
        self.calendar_names = {}
        self.realevents = SortedDict({})
        self.isfree = False
        self.creds = {}
        self.tag = "google"

    def logger(self, msg, type="info", colour="none"):
        mainlogger().logger(self.tag, msg, type, colour)

    def timecleaner(self, time):
        if "+" in time:
            time = time[::-1].replace(":", "")[::-1]
            time = datetime.datetime.strptime(time, "%Y-%m-%dT%H%M%S%z")
        else:
            time = datetime.datetime.strptime(time, "%Y-%m-%d")
            time = pytz.timezone("Europe/Amsterdam").localize(time)
        return time

    def timedifference(self, duration):
        days, seconds = duration.days, duration.seconds
        hours = seconds // 3600
        minutes = (seconds % 3600) // 60
        seconds = seconds % 60
        timelist = []
        if days != 0:
            timelist.append(days)
        if hours != 0:
            timelist.append(hours)
        if minutes != 0:
            timelist.append(minutes)
        if seconds != 0:
            timelist.append(seconds)
            #pass
        return timelist

    def requestusercode(self, client_id):
        data = {
            'client_id': client_id,
            'scope': 'https://www.googleapis.com/auth/calendar'
        }
        url = 'https://accounts.google.com/o/oauth2/device/code'
        response = json.loads(requests.post(url, data=data).text)
        return response

    def poll(self, device_code, interval):
        data = {
            'client_id': client_id,
            'client_secret': client_secret,
            'code': device_code,
            'grant_type': "http://oauth.net/grant_type/device/1.0"
        }
        url = 'https://oauth2.googleapis.com/token'
        while True:
            response = json.loads(requests.post(url, data=data).text)
            if "access_token" in response.keys():
                # figure out when the token expires:
                expiredate = datetime.datetime.now() + datetime.timedelta(
                    seconds=response["expires_in"])
                # fill creds with infomration to figure out if accesstoken is expired or not
                olddict = {}
                with open("components/google/credentials.json", "w") as f:
                    olddict["access_token"] = response["access_token"]
                    olddict["refresh_token"] = response["refresh_token"]
                    olddict["expires_at"] = str(expiredate)
                    tmpdict = {"installed": olddict}
                    f.write(json.dumps(tmpdict))
                return olddict["access_token"]
            else:
                self.logger("not yet..", "debug", "yellow")
                sleep(interval)

    def refreshtoken(self, creddict):
        refresh_token = creddict["refresh_token"]
        data = {
            'client_id': client_id,
            'client_secret': client_secret,
            'refresh_token': refresh_token,
            'grant_type': "refresh_token"
        }
        url = 'https://oauth2.googleapis.com/token'
        response = json.loads(requests.post(url, data=data).text)
        expiredate = datetime.datetime.now() + datetime.timedelta(
            seconds=response["expires_in"])
        with open("components/google/credentials.json", "w") as f:
            creddict["access_token"] = response["access_token"]
            creddict["expires_at"] = str(expiredate)
            tmpdict = {"installed": creddict}
            f.write(json.dumps(tmpdict))
        return creddict["access_token"]

    def getcreds(self):
        """Shows basic usage of the Google Calendar API.
        Prints the start and name of the next 10 events on the user's calendar.
        """
        creds = os.path.exists("components/google/credentials.json")
        #creddict = json.loads(open("components/google/credentials.json").read())["installed"]

        if creds:
            self.logger("creds found!", colour="yellow")
            creddict = json.loads(
                open("components/google/credentials.json").read())["installed"]
            now = datetime.datetime.now()
            expiredate = datetime.datetime.strptime(creddict["expires_at"],
                                                    "%Y-%m-%d %H:%M:%S.%f")
            if now < expiredate:
                self.logger("Getting from credentials.json", "debug", "yellow")
                return creddict["access_token"]
            else:
                self.logger("creds need to be refreshed!", "debug", "yellow")
                creds = self.refreshtoken(creddict)
                return creds
        # If there are no (valid) credentials available, let the user log in.
        if not creds:
            self.logger("No creds found!", "debug", "red")
            response = self.requestusercode(client_id)
            self.logger("Please go to {} and enter this code: {}".format(
                response["verification_url"], response["user_code"]),
                        colour="blue")
            device_code, interval = response["device_code"], response[
                "interval"]
            creds = self.poll(device_code, interval)
            self.logger("Got access token, ready to rumble.", colour="green")
            return creds
            #creds = flow.run_local_server(port=0)
            # Save the credentials for the next run

    def main(self, numberofevents=1):
        creds = self.getcreds()
        service = Service(creds)

        # Call the Calendar API
        now = datetime.datetime.utcnow().isoformat(
        ) + 'Z'  # 'Z' indicates UTC time
        #print("getting calendars")
        page_token = None
        while True:
            calendar_list = service.calendarList()
            #sys.exit()
            for calendar_list_entry in calendar_list['items']:
                id = calendar_list_entry["id"]
                name = calendar_list_entry['summary']
                #print("calendar \"{}\" has id: {}".format(name ,id))
                self.calendars.append(id)
                self.calendar_names[id] = name
            page_token = calendar_list.get('nextPageToken')
            if not page_token:
                break
        #print('Getting the upcoming 10 events')

        for calendar_id in self.calendars:
            events_result = service.eventList(calendarId=calendar_id,
                                              timeMin=now,
                                              maxResults=10,
                                              singleEvents=True,
                                              orderBy='startTime')
            events = events_result.get('items', [])

            if not events:
                #print('No upcoming events found for calendar {}'.format(self.calendar_names[calendar_id]))
                pass
            for index, event in enumerate(events):  #[:numberofevents + 1]):

                start = event['start'].get('dateTime',
                                           event['start'].get('date'))
                end = event['end'].get('dateTime', event['end'].get('date'))
                try:
                    nextstart = events[index + 1]['start'].get(
                        'dateTime', event['start'].get('date'))
                    nextstart = self.timecleaner(nextstart)
                    event[
                        "nextstart"] = nextstart  # when the next event starts
                except Exception as e:
                    pass
                start = self.timecleaner(start)
                end = self.timecleaner(end)
                event["start"] = start
                event["end"] = end
                #event["until_next"] = timelist # so you know how long it takes until the next event. not implemented on flutter yet.
                self.realevents[start] = event
                #print(start, event['summary'])

        now = pytz.timezone("Europe/Amsterdam").localize(
            datetime.datetime.now())
        #self.isfree = not (start < now < end)
        sortedlist = list(self.realevents.keys())
        for time in sortedlist:
            eventsummary = self.realevents[time]["summary"]

        return sortedlist, self.realevents
예제 #53
0
uc_dict = SortedDict({
    "SNEK": "0x1f40d",  ## Error
    ## qwertz
    "ACUTE": "0x00b4",
    ## QWERTZ
    "INV_EXCL": "0x00a1",
    "INV_QST": "0x00bf",
    ## numpad
    "SUP1": "0x00b9",
    "SUP2": "0x00b2",
    "SUP3": "0x00b3",
    "SUP_MIN": "0x207b",
    "ROOT": "0x221a",
    "DEGREE": "0x00b0",
    "MINUTE": "0x02b9",
    "SECOND": "0x02ba",
    "CURRENCY": "0x00a4",
    "CENT": "0x00a2",
    "YEN": "0x00a5",
    "POUND": "0x00a3",
    "EURO": "0x20ac",
    "DIVISION": "0x00f7",
    "MICRO": "0x00b5",
    "OHM": "0x2126",
    "BITCOIN": "0x20bf",
    "SECTION": "0x00a7",
    ## NAV_TYPO
    "SQT1": "0x203a",
    "SQT2": "0x201a",
    "SQT3": "0x2018",
    "SQT4": "0x2019",
    "SQT5": "0x2039",
    "SUP_A": "0x00aa",
    "SUP_O": "0x00ba",
    "TMARK": "0x2122",
    "COPYR": "0x00a9",
    "REGTM": "0x00ae",
    "DOTS": "0x2026",
    "CROSS": "0x2020",
    "BULLET": "0x2022",
    "EMDASH": "0x2014",
    "PERMIL": "0x2030",
    "DQT1": "0x00bb",
    "DQT2": "0x201e",
    "DQT3": "0x201c",
    "DQT4": "0x201d",
    "DQT5": "0x00ab",
    ## lang
    "ae": "0x00e4",
    "oe": "0x00f6",
    "ue": "0x00fc",
    "sz": "0x00df",
    "n_tilde": "0x00f1",
    ## LANG
    "AE": "0x00c4",
    "OE": "0x00d6",
    "UE": "0x00dc",
    "SZ": "0x1e9e",
    "N_TILDE": "0x00d1",
    ## greek
    "alpha": "0x03b1",
    "beta": "0x03b2",
    "gamma": "0x03b3",
    "delta": "0x03b4",
    "epsilon": "0x03b5",
    "zeta": "0x03b6",
    "eta": "0x03b7",
    "theta": "0x03b8",
    "iota": "0x03b9",
    "kappa": "0x03ba",
    "lambda": "0x03bb",
    "my": "0x03bc",
    "ny": "0x03bd",
    "xi": "0x03be",
    "omikron": "0x03bf",
    "pi": "0x03c0",
    "rho": "0x03c1",
    "sigma": "0x03c3",
    "tau": "0x03c4",
    "ypsilon": "0x03c5",
    "phi": "0x03c6",
    "chi": "0x03c7",
    "psi": "0x03c8",
    "omega": "0x03c9",
    # variants
    "beta2": "0x03d0",
    "epsi2": "0x03f5",
    "theta2": "0x03d1",
    "kappa2": "0x03f0",
    "pi2": "0x03d6",
    "rho2": "0x03f1",
    "sigma2": "0x03c2",
    "sigma3": "0x03f2",
    "phi2": "0x03d5",
    # non-standard
    "digamma": "0x03dd",
    "stigma": "0x03db",
    "heta": "0x0371",
    "san": "0x03fb",
    "koppa": "0x03d9",
    "sampi": "0x0373",
    "scho": "0x03f8",
    ## GREEK
    "ALPHA": "0x0391",
    "BETA": "0x0392",
    "GAMMA": "0x0393",
    "DELTA": "0x0394",
    "EPSILON": "0x0395",
    "ZETA": "0x0396",
    "ETA": "0x0397",
    "THETA": "0x0398",
    "IOTA": "0x0399",
    "KAPPA": "0x039a",
    "LAMBDA": "0x039b",
    "MY": "0x039c",
    "NY": "0x039d",
    "XI": "0x039e",
    "OMIKRON": "0x039f",
    "PI": "0x03a0",
    "RHO": "0x03a1",
    "SIGMA": "0x03a3",
    "TAU": "0x03a4",
    "YPSILON": "0x03a5",
    "PHI": "0x03a6",
    "CHI": "0x03a7",
    "PSI": "0x03a8",
    "OMEGA": "0x03a9",
    # variants
    "YPSI2": "0x03d2",
    # non-standard
    "DIGAMMA": "0x03dc",
    "STIGMA": "0x03da",
    "HETA": "0x0370",
    "SAN": "0x03fa",
    "KOPPA": "0x03d8",
    "SAMPI": "0x0372",
    "SCHO": "0x03f7",
    ## symbol
    "SHOGI_WD": "0x26c9",
    "FLAG_W": "0x2690",
    "LETTER": "0x2709",
    "PHONE": "0x2706",
    "INFO": "0x1f6c8",
    "REWIND": "0x23ee",
    "PLAY": "0x23ef",
    "FORWARD": "0x23ed",
    "STOP": "0x25fc",
    "EJECT": "0x23cf",
    "SHOGI_W": "0x2616",
    "FEMALE": "0x2640",
    "MALE": "0x2642",
    "NO": "0x2717",
    "YES": "0x2713",
    "FLAT": "0x266d",
    "NATURAL": "0x266e",
    "SHARP": "0x266f",
    "COMMON_T": "0x1d134",
    "ALLA_BR": "0x1d135",
    "PAWN_W": "0x2659",
    "DIAMS_W": "0x2662",
    "HEARTS_W": "0x2661",
    "SPADES_W": "0x2664",
    "CLUBS_W": "0x2667",
    "WHOLE_N": "0x1d15d",
    "HALF_N": "0x1d15e",
    "QUART_N": "0x1d15f",
    "EIGHTH_N": "0x1d160",
    "SIXT_N": "0x1d161",
    "ROOK_W": "0x2656",
    "KNIGHT_W": "0x2658",
    "BISHOP_W": "0x2657",
    "QUEEN_W": "0x2655",
    "KING_W": "0x2654",
    "C_CLEF": "0x1d121",
    "G_CLEF": "0x1d11e",
    "F_CLEF": "0x1d122",
    "PEDAL": "0x1d1ae",
    "PEDAL_UP": "0x1d1af",
    ## SYMBOL
    "SHOGI_BD": "0x26ca",
    "FLAG_B": "0x2691",
    "SPEAKER_UP": "0x1f50a",
    "SPEAKER_TG": "0x1f507",
    "SPEAKER_DN": "0x1f509",
    "SHOGI_B": "0x2617",
    "KEY": "0x1f511",
    "LOCK": "0x1f512",
    "LOCK_OPEN": "0x1f513",
    "STAFF": "0x1d11a",
    "BARLINE": "0x1d100",
    "BARLINE_D": "0x1d101",
    "BARLINE_FIN": "0x1d102",
    "CODA": "0x1d10c",
    "PAWN_B": "0x265f",
    "DIAMS_B": "0x2666",
    "HEARTS_B": "0x2665",
    "SPADES_B": "0x2660",
    "CLUBS_B": "0x2663",
    "WHOLE_P": "0x1d13b",
    "HALF_P": "0x1d13c",
    "QUART_P": "0x1d13d",
    "EIGHTH_P": "0x1d13e",
    "SIXT_P": "0x1d13f",
    "ROOK_B": "0x265c",
    "KNIGHT_B": "0x265e",
    "BISHOP_B": "0x265d",
    "QUEEN_B": "0x265b",
    "KING_B": "0x265a",
    "REPEAT_S": "0x1d106",
    "REPEAT_E": "0x1d107",
    "SEGNO": "0x1d10b",
    "DALSEGNO": "0x1d109",
    "DACAPO": "0x1d10a",
    ## math
    "EXISTS": "0x2203",
    "FORALL": "0x2200",
    "EQUIV": "0x2261",
    "CORRESP": "0x2259",
    "INEQUAL": "0x2260",
    "DEFINE": "0x2254",
    "TO": "0x27f6",
    "MAPSTO": "0x27fc",
    "SETMINUS": "0x2216",
    "QED": "0x220e",
    "OR_": "0x22c1",
    "AND_": "0x22c0",
    "OR": "0x2228",
    "AND": "0x2227",
    "NOT": "0x00ac",
    "BICOND": "0x21d4",
    "IMPL_REV": "0x21d0",
    "IMPL": "0x21d2",
    "TOP": "0x22a4",
    "BOTTOM": "0x22a5",
    "SUM_": "0x2211",
    "PROD_": "0x220f",
    "MDOT": "0x22c5",
    "NOTIN": "0x2209",
    "IN": "0x2208",
    "LS_EQ": "0x2264",
    "GR_EQ": "0x2265",
    "INFTY": "0x221e",
    "UNION_": "0x22c3",
    "INTERS_": "0x22c2",
    "UNION": "0x222a",
    "INTERS": "0x2229",
    "NOTSUBS": "0x2288",
    "PR_SUBSET": "0x2282",
    "SUBSET": "0x2286",
    "SUBSET_REV": "0x2287",
    "EMPTYSET": "0x2205",
    ## MATHSCRIPT
    "LEFT_CEIL": "0x2308",
    "REAL_P": "0x211c",
    "NABLA": "0x2207",
    "RIGHT_CEIL": "0x2309",
    "LEFT_FLOOR": "0x230a",
    "BRA": "0x27e8",
    "IMAG_P": "0x2111",
    "KET": "0x27e9",
    "RIGHT_FLOOR": "0x230b",
    "Q_SET": "0x211a",
    "e_FUN": "0x0065",
    "R_SET": "0x211d",
    "Z_SET": "0x2124",
    "U_SET": "0x1d54c",
    "i_UNIT": "0x0069",
    "BIG_O": "0x1d4aa",
    "POWERSET": "0x1d4ab",
    "A_SET": "0x1d538",
    "INTEGRAL": "0x222b",
    "PARTIAL": "0x2202",
    "F_SET": "0x1d53d",
    "H_SET": "0x210d",
    "K_SET": "0x1d542",
    "LENGTH": "0x2113",
    "ALEPH": "0x2135",
    "BETH": "0x2136",
    "C_SET": "0x2102",
    "B_SET": "0x1d539",
    "N_SET": "0x2115",
    "INDICATOR": "0x1d7cf",
    "FOURIER": "0x2131",
    "HAMILTON": "0x210b",
    "LAPLACE": "0x2112",
    ## MATHEXT
    "RIGHT_TACK": "0x22a2",
    "MODELS": "0x22a7",
    "AB_VEC": "0x20d7",
    "AB_LINE": "0x0305",
    "AB_ARC": "0x0361",
    "LESS_LESS": "0x226a",
    "PRED": "0x227a",
    "EMBED": "0x21aa",
    "SUCC": "0x227b",
    "GREAT_GREAT": "0x226b",
    "TRIANGLE": "0x25b3",
    "SQUARE": "0x25a1",
    "CIRCLE": "0x25cb",
    "DOT_OP": "0x2299",
    "PLUS_OP": "0x2295",
    "MINUS_OP": "0x2296",
    "ROUGHLY": "0x2248",
    "ISOMORPH": "0x2245",
    "LTIMES": "0x22c9",
    "RTIMES": "0x22ca",
    "BOWTIE": "0x22c8",
    "M_ANGLE": "0x2221",
    "ANGLE": "0x2220",
    "CIRC_MID": "0x2218",
    "AST_MID": "0x2217",
    "TIMES": "0x00d7",
    "PLUS_MINUS": "0x00b1",
    "MINUS_PLUS": "0x2213",
    "LEFT_OUTER": "0x27d5",
    "RIGHT_OUTER": "0x27d6",
    "FULL_OUTER": "0x27d7",
    "SQ_LS": "0x228f",
    "SQ_LS_EQ": "0x2291",
    "PROP_TO": "0x221d",
    "PARALLEL": "0x2225",
    "NOT_PARA": "0x2226",
    "TIMES_OP": "0x2297",
    "NOT_DIV": "0x2224"
})
예제 #54
0
class FederationRemoteSendQueue(object):
    """A drop in replacement for FederationSender"""
    def __init__(self, hs):
        self.server_name = hs.hostname
        self.clock = hs.get_clock()
        self.notifier = hs.get_notifier()
        self.is_mine_id = hs.is_mine_id

        self.presence_map = {
        }  # Pending presence map user_id -> UserPresenceState
        self.presence_changed = SortedDict(
        )  # Stream position -> list[user_id]

        # Stores the destinations we need to explicitly send presence to about a
        # given user.
        # Stream position -> (user_id, destinations)
        self.presence_destinations = SortedDict()

        self.keyed_edu = {}  # (destination, key) -> EDU
        self.keyed_edu_changed = SortedDict(
        )  # stream position -> (destination, key)

        self.edus = SortedDict()  # stream position -> Edu

        self.device_messages = SortedDict()  # stream position -> destination

        self.pos = 1
        self.pos_time = SortedDict()

        # EVERYTHING IS SAD. In particular, python only makes new scopes when
        # we make a new function, so we need to make a new function so the inner
        # lambda binds to the queue rather than to the name of the queue which
        # changes. ARGH.
        def register(name, queue):
            LaterGauge(
                "synapse_federation_send_queue_%s_size" % (queue_name, ), "",
                [], lambda: len(queue))

        for queue_name in [
                "presence_map",
                "presence_changed",
                "keyed_edu",
                "keyed_edu_changed",
                "edus",
                "device_messages",
                "pos_time",
                "presence_destinations",
        ]:
            register(queue_name, getattr(self, queue_name))

        self.clock.looping_call(self._clear_queue, 30 * 1000)

    def _next_pos(self):
        pos = self.pos
        self.pos += 1
        self.pos_time[self.clock.time_msec()] = pos
        return pos

    def _clear_queue(self):
        """Clear the queues for anything older than N minutes"""

        FIVE_MINUTES_AGO = 5 * 60 * 1000
        now = self.clock.time_msec()

        keys = self.pos_time.keys()
        time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO)
        if not keys[:time]:
            return

        position_to_delete = max(keys[:time])
        for key in keys[:time]:
            del self.pos_time[key]

        self._clear_queue_before_pos(position_to_delete)

    def _clear_queue_before_pos(self, position_to_delete):
        """Clear all the queues from before a given position"""
        with Measure(self.clock, "send_queue._clear"):
            # Delete things out of presence maps
            keys = self.presence_changed.keys()
            i = self.presence_changed.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.presence_changed[key]

            user_ids = set(user_id for uids in self.presence_changed.values()
                           for user_id in uids)

            keys = self.presence_destinations.keys()
            i = self.presence_destinations.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.presence_destinations[key]

            user_ids.update(
                user_id for user_id, _ in self.presence_destinations.values())

            to_del = [
                user_id for user_id in self.presence_map
                if user_id not in user_ids
            ]
            for user_id in to_del:
                del self.presence_map[user_id]

            # Delete things out of keyed edus
            keys = self.keyed_edu_changed.keys()
            i = self.keyed_edu_changed.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.keyed_edu_changed[key]

            live_keys = set()
            for edu_key in self.keyed_edu_changed.values():
                live_keys.add(edu_key)

            to_del = [
                edu_key for edu_key in self.keyed_edu
                if edu_key not in live_keys
            ]
            for edu_key in to_del:
                del self.keyed_edu[edu_key]

            # Delete things out of edu map
            keys = self.edus.keys()
            i = self.edus.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.edus[key]

            # Delete things out of device map
            keys = self.device_messages.keys()
            i = self.device_messages.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.device_messages[key]

    def notify_new_events(self, current_id):
        """As per FederationSender"""
        # We don't need to replicate this as it gets sent down a different
        # stream.
        pass

    def build_and_send_edu(self, destination, edu_type, content, key=None):
        """As per FederationSender"""
        if destination == self.server_name:
            logger.info("Not sending EDU to ourselves")
            return

        pos = self._next_pos()

        edu = Edu(
            origin=self.server_name,
            destination=destination,
            edu_type=edu_type,
            content=content,
        )

        if key:
            assert isinstance(key, tuple)
            self.keyed_edu[(destination, key)] = edu
            self.keyed_edu_changed[pos] = (destination, key)
        else:
            self.edus[pos] = edu

        self.notifier.on_new_replication_data()

    def send_read_receipt(self, receipt):
        """As per FederationSender

        Args:
            receipt (synapse.types.ReadReceipt):
        """
        # nothing to do here: the replication listener will handle it.
        pass

    def send_presence(self, states):
        """As per FederationSender

        Args:
            states (list(UserPresenceState))
        """
        pos = self._next_pos()

        # We only want to send presence for our own users, so lets always just
        # filter here just in case.
        local_states = list(
            filter(lambda s: self.is_mine_id(s.user_id), states))

        self.presence_map.update(
            {state.user_id: state
             for state in local_states})
        self.presence_changed[pos] = [state.user_id for state in local_states]

        self.notifier.on_new_replication_data()

    def send_presence_to_destinations(self, states, destinations):
        """As per FederationSender

        Args:
            states (list[UserPresenceState])
            destinations (list[str])
        """
        for state in states:
            pos = self._next_pos()
            self.presence_map.update(
                {state.user_id: state
                 for state in states})
            self.presence_destinations[pos] = (state.user_id, destinations)

        self.notifier.on_new_replication_data()

    def send_device_messages(self, destination):
        """As per FederationSender"""
        pos = self._next_pos()
        self.device_messages[pos] = destination
        self.notifier.on_new_replication_data()

    def get_current_token(self):
        return self.pos - 1

    def federation_ack(self, token):
        self._clear_queue_before_pos(token)

    def get_replication_rows(self,
                             from_token,
                             to_token,
                             limit,
                             federation_ack=None):
        """Get rows to be sent over federation between the two tokens

        Args:
            from_token (int)
            to_token(int)
            limit (int)
            federation_ack (int): Optional. The position where the worker is
                explicitly acknowledged it has handled. Allows us to drop
                data from before that point
        """
        # TODO: Handle limit.

        # To handle restarts where we wrap around
        if from_token > self.pos:
            from_token = -1

        # list of tuple(int, BaseFederationRow), where the first is the position
        # of the federation stream.
        rows = []

        # There should be only one reader, so lets delete everything its
        # acknowledged its seen.
        if federation_ack:
            self._clear_queue_before_pos(federation_ack)

        # Fetch changed presence
        i = self.presence_changed.bisect_right(from_token)
        j = self.presence_changed.bisect_right(to_token) + 1
        dest_user_ids = [
            (pos, user_id)
            for pos, user_id_list in self.presence_changed.items()[i:j]
            for user_id in user_id_list
        ]

        for (key, user_id) in dest_user_ids:
            rows.append((key, PresenceRow(state=self.presence_map[user_id], )))

        # Fetch presence to send to destinations
        i = self.presence_destinations.bisect_right(from_token)
        j = self.presence_destinations.bisect_right(to_token) + 1

        for pos, (user_id, dests) in self.presence_destinations.items()[i:j]:
            rows.append((pos,
                         PresenceDestinationsRow(
                             state=self.presence_map[user_id],
                             destinations=list(dests),
                         )))

        # Fetch changes keyed edus
        i = self.keyed_edu_changed.bisect_right(from_token)
        j = self.keyed_edu_changed.bisect_right(to_token) + 1
        # We purposefully clobber based on the key here, python dict comprehensions
        # always use the last value, so this will correctly point to the last
        # stream position.
        keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]}

        for ((destination, edu_key), pos) in iteritems(keyed_edus):
            rows.append((pos,
                         KeyedEduRow(
                             key=edu_key,
                             edu=self.keyed_edu[(destination, edu_key)],
                         )))

        # Fetch changed edus
        i = self.edus.bisect_right(from_token)
        j = self.edus.bisect_right(to_token) + 1
        edus = self.edus.items()[i:j]

        for (pos, edu) in edus:
            rows.append((pos, EduRow(edu)))

        # Fetch changed device messages
        i = self.device_messages.bisect_right(from_token)
        j = self.device_messages.bisect_right(to_token) + 1
        device_messages = {v: k for k, v in self.device_messages.items()[i:j]}

        for (destination, pos) in iteritems(device_messages):
            rows.append((pos, DeviceRow(destination=destination, )))

        # Sort rows based on pos
        rows.sort()

        return [(pos, row.TypeId, row.to_data()) for pos, row in rows]
예제 #55
0
""" ""
from sortedcontainers import SortedDict


def print_menu():
    print('1. Print Users')
    print('2. Add a User')
    print('3. Remove a User')
    print('4. Lookup a User')
    print('5. Quit')
    print()


# Create dictionary with key = Names, value = user_name
usernames = SortedDict()
usernames['Summer'] = 'summerela'
usernames['William'] = 'GoofyFish'
usernames['Steven'] = 'LoLCat'
usernames['Zara'] = 'zanyZara'
usernames['Renato'] = 'songDude'

# setup counter to store menu choice
menu_choice = 0

# display your menu
print_menu()

# as long as the menu choice isn't "quit" get user options
while menu_choice != 5:
    # get menu choice from user
예제 #56
0
class Mentor(Mozillian):
    def __init__(self, row):
        super().__init__(
            row
        )  #row['Email Address'], row['Organizational level (i.e. P3, M2, etc.)'], row['Organization'], row['Participant full name'], row['Manager email'])
        self.submitted_dt = row['Timestamp']
        self.time_availability_to_set(row['Time Availability'])
        self.expertise_to_set(row['Areas of expertise'])
        # if missing outside_org value, set to unbias value of 2
        self.outside_org = safe_cast(
            row['Interest in mentoring or learning from someone outside your own organization?'],
            int, 2)
        assert self.outside_org in [
            1, 2, 3
        ], self.get_id() + " has invalid outside_org value " + str(
            self.outside_org)
        self.requests = row['Any particular requests?']
        self.identify_as = row['Do you identify yourself as']
        self.welcome_email_dt = row['Sent Wecome Email (date)']
        self.notify_manager_dt = row['Sent Manager Approval Email (date)']
        self.manager_approved_dt = row['Manager Approved (date)']
        self.mentee_limit = safe_cast(
            row['Mentee Limit'], int,
            2)  # by default mentors have a max of two mentees

    def time_availability_to_set(self, availability):
        """ Set times person is available in set form """
        self.availability_set = set(
            [x.strip() for x in availability.split(',')])

    def expertise_to_set(self, expertise):
        """ Set person's  expertise in set form """
        self.expertise_set = set([x.strip() for x in expertise.split(',')])

    def get_submitted_dt(self) -> datetime:
        return self.submitted_dt

    def get_expertise(self) -> set:
        return self.expertise_set

    def get_times_available(self) -> set:
        return self.availability_set

    def get_mentee_limit(self) -> int:
        return self.mentee_limit

    def set_mentee_limit(self, mentee_limit: int):
        self.mentee_limit = mentee_limit

    def get_outside_org_score(self, learner) -> int:
        # outside org 1=prefer, 3=rather not
        assert learner.org is not None, "Learner " + learner.get_id(
        ) + " has invalid org value " + learner.track
        if self.outside_org == 1:
            if self.org == learner.org:
                return 1
            else:
                return 3
        elif self.outside_org == 2:
            if self.org == learner.org:
                return 2
            else:
                return 2
        elif self.outside_org == 3:
            if self.org == learner.org:
                return 3
            else:
                return 1

    def calc_score(self, learner, is_requested):
        # get count of overlapping times
        available_times = len(
            self.availability_set.intersection(learner.get_times_available()))

        # If constraints are satisfied, calculate preference rankings for learners based on feature vector
        score = 0

        # add bias for requested mentor
        if is_requested:
            score = score + 50

        if available_times > 0:
            # match expertise to interest, max score of 7
            # need to account for "Other:" somehow
            score = score + len(
                self.expertise_set.intersection(learner.get_interests()))

            score = score + self.get_outside_org_score(learner)

        return score

    def set_preferences(self, learners):
        self.preferences = SortedDict(neg)  #, enumerate('abc', start=1))

        for learner in learners:
            # Filter on constraints
            # cannot match to themselves
            if self.get_id() == learner.get_id():
                continue
            # mentor-learner should not be in the same management reporting chain - will need ppl dataset info
            # for now just check that learner's manager = mentor
            if self.get_manager_email() == learner.get_id():
                continue

            # unless manager says "no", manager approved column has no impact

            # get count of overlapping times
            available_times = len(
                self.availability_set.intersection(
                    learner.get_times_available()))

            # If constraints are satisfied, calculate preference rankings for learners based on feature vector
            score = 0
            if available_times > 0:

                # match expertise to interest, max score of 7
                # need to account for "Other:" somehow
                score = score + len(
                    self.expertise_set.intersection(learner.get_interests()))
                #print("interests intersection score:" + str(score))

                # outside org 1=prefer, 3=rather not
                # add 1 if prefer and orgs not the same
                if self.outside_org == 1 and self.org != learner.org:
                    #print("outside org add 1: " + mentor_org)
                    score = score + 1
                # add 1 if rather not and orgs are the same
                if self.outside_org == 3 and self.org == learner.org:
                    #print("outside org add 1: " + mentor_org)
                    score = score + 1

                # so far ranks range is [0,8]

                # be careful matching those in relationship/married/dating/familial - How??

                # option to constrain mentor org level > learner org level?

                # if score is the same, order by date_submitted? no i think this is used in the apposite & global draw
                #print(mentor.get_id() + ": " + str(score))

                if score > 0:
                    if self.preferences.__contains__(score):
                        self.preferences[score].append(learner)
                    else:
                        self.preferences[score] = [learner]

    def set_preferences_subscribed(self,
                                   subscribed_learners: dict,
                                   requested_learner=""):
        self.preferences = SortedDict(neg)  #, enumerate('abc', start=1))
        self.learner_score = {}

        for subscribed_learner_id, subscribed_learner in subscribed_learners.items(
        ):
            # Filter on constraints
            # cannot match to themselves
            if self.get_id() == subscribed_learner_id:
                continue
            # mentor-learner should not be in the same management reporting chain - will need ppl dataset info
            # for now just check that learner's manager = mentor
            if self.get_manager_email(
            ) == subscribed_learner_id or subscribed_learner.get_manager_email(
            ) == self.get_id():
                continue

            # unless manager says "no", manager approved column has no impact

            # get count of overlapping times
            available_times = len(
                self.availability_set.intersection(
                    subscribed_learner.get_times_available()))

            # If constraints are satisfied, calculate preference rankings for learners based on feature vector
            score = 0

            # add bias for requested mentor
            if subscribed_learner_id == requested_learner:
                print(self.get_id() + ' requested learner: ' +
                      requested_learner)
                score = score + 50

            if available_times > 0:

                # match expertise to interest, max score of 7
                # need to account for "Other:" somehow
                score = score + len(
                    self.expertise_set.intersection(
                        subscribed_learner.get_interests()))
                #print("interests intersection score:" + str(score))

                score = score + self.get_outside_org_score(subscribed_learner)

                # so far ranks range is [2,13]

                # be careful matching those in relationship/married/dating/familial - How??

                # option to constrain mentor org level > learner org level? do levels translate across M/P?

                # if score is the same, order by date_submitted? no i think this is used in the apposite & global draw
                #print(mentor.get_id() + ": " + str(score))

                #if score > 0:
            if self.preferences.__contains__(score):
                self.preferences[score].append(subscribed_learner)
            else:
                self.preferences[score] = [subscribed_learner]

            self.learner_score[subscribed_learner_id] = score

    def get_preferences(self) -> SortedDict:
        return self.preferences

    def get_ranked_learners(self) -> list:
        ranked_learners = []
        # add weights to those scores that are the same?
        for value in self.preferences.values():
            ranked_learners.extend([x.get_id() for x in value])

        #for score, learners_list in self.preferences.items():
        #  print("score: " + str(score) + ", learners: " + ','.join([x.get_id() for x in learners_list]))

        return ranked_learners

    def get_learner_score(self) -> dict:
        return self.learner_score

    def get_learner_rank(self, learner, is_requested) -> int:
        #scores = [score for subscribed_learner_id, score in self.learner_score.items() if subscribed_learner_id == learner.get_id()]
        return self.calc_score(learner, is_requested)
예제 #57
0
def dead_screen(score_table, resent_result):
    print(resent_result)
    pygame.init()
    size = (500, 500)
    sc = pygame.display.set_mode(size)
    pygame.display.set_caption("Snake")
    img = pygame.image.load("snake image.jpg")
    pygame.display.set_icon(img)

    gameover_surf = pygame.image.load("game over.jpg")
    gameover_surf = pygame.transform.scale(gameover_surf, (600, 300))
    x_image = 260
    y_image = 100
    gameover_rect = gameover_surf.get_rect(center=(x_image, y_image))
    sc.blit(gameover_surf, gameover_rect)

    font = pygame.font.Font('freesansbold.ttf', 32)
    text = font.render('SCORE TABLE', False, (255, 255, 255))
    textRect = text.get_rect()
    textRect.center = (260, 200)
    sc.blit(text, textRect)

    #score tablle
    global name_win
    score_table.update({resent_result: name_win})
    score_table = SortedDict(score_table)
    print(score_table)

    font = pygame.font.Font('freesansbold.ttf', 20)
    name1 = font.render(score_table[list(score_table.keys())[-1]], False,
                        (255, 255, 255))
    score = str(list(score_table.keys())[-1])
    score1 = font.render(score, False, (255, 255, 255))
    name1Rect = name1.get_rect()
    score1Rect = score1.get_rect()
    name1Rect.center = (230, 250)
    score1Rect.center = (300, 250)
    sc.blit(name1, name1Rect)
    sc.blit(score1, score1Rect)

    name2 = font.render(score_table[list(score_table.keys())[-2]], False,
                        (255, 255, 255))
    score = str(list(score_table.keys())[-2])
    score2 = font.render(score, False, (255, 255, 255))
    name2Rect = name2.get_rect()
    score2Rect = score2.get_rect()
    name2Rect.center = (230, 290)
    score2Rect.center = (300, 290)
    sc.blit(name2, name2Rect)
    sc.blit(score2, score2Rect)

    name3 = font.render(score_table[list(score_table.keys())[-3]], False,
                        (255, 255, 255))
    score = str(list(score_table.keys())[-3])
    score3 = font.render(score, False, (255, 255, 255))
    name3Rect = name3.get_rect()
    score3Rect = score3.get_rect()
    name3Rect.center = (230, 330)
    score3Rect.center = (300, 330)
    sc.blit(name3, name3Rect)
    sc.blit(score3, score3Rect)

    pygame.display.update()

    clock = pygame.time.Clock()
    waiting = True
    while waiting:
        clock.tick(100)
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
            if event.type == pygame.K_ESCAPE:
                start_screen()
            elif event.type == pygame.KEYDOWN:
                waiting = False
예제 #58
0
    def set_preferences_subscribed(self,
                                   subscribed_learners: dict,
                                   requested_learner=""):
        self.preferences = SortedDict(neg)  #, enumerate('abc', start=1))
        self.learner_score = {}

        for subscribed_learner_id, subscribed_learner in subscribed_learners.items(
        ):
            # Filter on constraints
            # cannot match to themselves
            if self.get_id() == subscribed_learner_id:
                continue
            # mentor-learner should not be in the same management reporting chain - will need ppl dataset info
            # for now just check that learner's manager = mentor
            if self.get_manager_email(
            ) == subscribed_learner_id or subscribed_learner.get_manager_email(
            ) == self.get_id():
                continue

            # unless manager says "no", manager approved column has no impact

            # get count of overlapping times
            available_times = len(
                self.availability_set.intersection(
                    subscribed_learner.get_times_available()))

            # If constraints are satisfied, calculate preference rankings for learners based on feature vector
            score = 0

            # add bias for requested mentor
            if subscribed_learner_id == requested_learner:
                print(self.get_id() + ' requested learner: ' +
                      requested_learner)
                score = score + 50

            if available_times > 0:

                # match expertise to interest, max score of 7
                # need to account for "Other:" somehow
                score = score + len(
                    self.expertise_set.intersection(
                        subscribed_learner.get_interests()))
                #print("interests intersection score:" + str(score))

                score = score + self.get_outside_org_score(subscribed_learner)

                # so far ranks range is [2,13]

                # be careful matching those in relationship/married/dating/familial - How??

                # option to constrain mentor org level > learner org level? do levels translate across M/P?

                # if score is the same, order by date_submitted? no i think this is used in the apposite & global draw
                #print(mentor.get_id() + ": " + str(score))

                #if score > 0:
            if self.preferences.__contains__(score):
                self.preferences[score].append(subscribed_learner)
            else:
                self.preferences[score] = [subscribed_learner]

            self.learner_score[subscribed_learner_id] = score
예제 #59
0
    def __init__(self, hs):
        self.server_name = hs.hostname
        self.clock = hs.get_clock()
        self.notifier = hs.get_notifier()
        self.is_mine_id = hs.is_mine_id

        self.presence_map = {
        }  # Pending presence map user_id -> UserPresenceState
        self.presence_changed = SortedDict(
        )  # Stream position -> list[user_id]

        # Stores the destinations we need to explicitly send presence to about a
        # given user.
        # Stream position -> (user_id, destinations)
        self.presence_destinations = SortedDict()

        self.keyed_edu = {}  # (destination, key) -> EDU
        self.keyed_edu_changed = SortedDict(
        )  # stream position -> (destination, key)

        self.edus = SortedDict()  # stream position -> Edu

        self.device_messages = SortedDict()  # stream position -> destination

        self.pos = 1
        self.pos_time = SortedDict()

        # EVERYTHING IS SAD. In particular, python only makes new scopes when
        # we make a new function, so we need to make a new function so the inner
        # lambda binds to the queue rather than to the name of the queue which
        # changes. ARGH.
        def register(name, queue):
            LaterGauge(
                "synapse_federation_send_queue_%s_size" % (queue_name, ), "",
                [], lambda: len(queue))

        for queue_name in [
                "presence_map",
                "presence_changed",
                "keyed_edu",
                "keyed_edu_changed",
                "edus",
                "device_messages",
                "pos_time",
                "presence_destinations",
        ]:
            register(queue_name, getattr(self, queue_name))

        self.clock.looping_call(self._clear_queue, 30 * 1000)
예제 #60
0
class DictionaryBuilder(object):

    __BYTE_MAX_VALUE = 127
    __MAX_LENGTH = 255
    __MIN_REQUIRED_COLS_NUM = 18
    __BUFFER_SIZE = 1024 * 1024
    __PATTERN_UNICODE_LITERAL = re.compile(r"\\u([0-9a-fA-F]{4}|{[0-9a-fA-F]+})")
    __ARRAY_MAX_LENGTH = __BYTE_MAX_VALUE  # max value of byte in Java
    __STRING_MAX_LENGTH = 32767  # max value of short in Java
    is_user_dictionary = False

    class WordEntry:
        headword = None
        parameters = None
        wordinfo = None
        aunit_split_string = None
        bunit_split_string = None
        cunit_split_string = None

    class PosTable(object):

        def __init__(self):
            self.table = []

        def get_id(self, str_):
            id_ = self.table.index(str_) if str_ in self.table else -1
            if id_ < 0:
                id_ = len(self.table)
                self.table.append(str_)
            return id_

        def get_list(self):
            return self.table

    @staticmethod
    def __default_logger():
        handler = StreamHandler()
        handler.terminator = ""
        handler.setLevel(DEBUG)
        logger = getLogger(__name__)
        logger.setLevel(DEBUG)
        logger.addHandler(handler)
        logger.propagate = False
        return logger

    def __init__(self, *, logger=None):
        self.byte_buffer = JTypedByteBuffer()
        self.trie_keys = SortedDict()
        self.entries = []
        self.is_dictionary = False
        self.pos_table = self.PosTable()
        self.logger = logger or self.__default_logger()

    def build(self, lexicon_paths, matrix_input_stream, out_stream):
        self.logger.info('reading the source file...')
        for path in lexicon_paths:
            with open(path, 'r', encoding='utf-8') as rf:
                self.build_lexicon(rf)
        self.logger.info('{} words\n'.format(len(self.entries)))

        self.write_grammar(matrix_input_stream, out_stream)
        self.write_lexicon(out_stream)

    def build_lexicon(self, lexicon_input_stream):
        line_no = -1
        try:
            for i, row in enumerate(csv.reader(lexicon_input_stream)):
                line_no = i
                entry = self.parse_line(row)
                if entry.headword:
                    self.add_to_trie(entry.headword, len(self.entries))
                self.entries.append(entry)
        except Exception as e:
            if line_no >= 0:
                self.logger.error(
                    '{} at line {} in {}\n'.format(e.args[0], line_no, lexicon_input_stream.name))
            raise e

    def parse_line(self, cols):
        if len(cols) < self.__MIN_REQUIRED_COLS_NUM:
            raise ValueError('invalid format')
        cols = [self.decode(col) for col in cols]
        if not self.__is_length_valid(cols):
            raise ValueError('string is too long')
        if not cols[0]:
            raise ValueError('headword is empty')

        entry = self.WordEntry()
        # head word for trie
        if cols[1] != '-1':
            entry.headword = cols[0]
        # left-id, right-id, connect_cost
        entry.parameters = [int(cols[i]) for i in [1, 2, 3]]
        # part of speech
        pos_id = self.get_posid(cols[5:11])
        if pos_id < 0:
            raise ValueError('invalid part of speech')

        entry.aunit_split_string = cols[15]
        entry.bunit_split_string = cols[16]
        entry.cunit_split_string = cols[17]
        self.check_splitinfo_format(entry.aunit_split_string)
        self.check_splitinfo_format(entry.bunit_split_string)
        self.check_splitinfo_format(entry.cunit_split_string)

        if cols[14] == 'A' and \
                not (entry.aunit_split_string == '*' and entry.bunit_split_string == '*'):
            raise ValueError('invalid splitting')

        synonym_group_ids = []
        if len(cols) > 18:
            synonym_group_ids = self.parse_synonym_group_ids(cols[18])

        head_length = len(cols[0].encode('utf-8'))
        dict_from_wordid = -1 if cols[13] == '*' else int(cols[13])
        entry.wordinfo = WordInfo(
            cols[4], head_length, pos_id, cols[12], dict_from_wordid, '', cols[11], None, None, None, synonym_group_ids)
        return entry

    def __is_length_valid(self, cols):
        head_length = len(cols[0].encode('utf-8'))
        return head_length <= self.__STRING_MAX_LENGTH \
            and len(cols[4]) <= self.__STRING_MAX_LENGTH \
            and len(cols[11]) <= self.__STRING_MAX_LENGTH \
            and len(cols[12]) <= self.__STRING_MAX_LENGTH

    def add_to_trie(self, headword, word_id):
        key = headword.encode('utf-8')
        if key not in self.trie_keys:
            self.trie_keys[key] = []
        self.trie_keys[key].append(word_id)

    def get_posid(self, strs):
        return self.pos_table.get_id(','.join(strs))

    def write_grammar(self, matrix_input_stream, output_stream):
        self.logger.info('writing the POS table...')
        self.convert_postable(self.pos_table.get_list())
        self.byte_buffer.seek(0)
        output_stream.write(self.byte_buffer.read())
        self.__logging_size(self.byte_buffer.tell())
        self.byte_buffer.clear()

        self.logger.info('writing the connection matrix...')
        if not matrix_input_stream:
            self.byte_buffer.write_int(0, 'short')
            self.byte_buffer.write_int(0, 'short')
            self.byte_buffer.seek(0)
            output_stream.write(self.byte_buffer.read())
            self.__logging_size(self.byte_buffer.tell())
            self.byte_buffer.clear()
            return
        matrix = self.convert_matrix(matrix_input_stream)
        self.byte_buffer.seek(0)
        output_stream.write(self.byte_buffer.read())
        self.byte_buffer.clear()
        output_stream.write(matrix.read())
        self.__logging_size(matrix.tell() + 4)

    def convert_postable(self, pos_list):
        self.byte_buffer.write_int(len(pos_list), 'short')
        for pos in pos_list:
            for text in pos.split(','):
                self.write_string(text)

    def convert_matrix(self, matrix_input):
        header = matrix_input.readline().strip()
        if re.fullmatch(r"\s*", header):
            raise ValueError('invalid format at line 0')
        lr = header.split()
        lsize, rsize = [int(x) for x in lr]
        self.byte_buffer.write_int(lsize, 'short')
        self.byte_buffer.write_int(rsize, 'short')

        matrix = JTypedByteBuffer()

        for i, line in enumerate(matrix_input.readlines()):
            line = line.strip()
            if re.fullmatch(r"\s*", line) or re.match("#", line):
                continue
            cols = line.split()
            if len(cols) < 3:
                self.logger.warn('invalid format at line {}'.format(i))
                continue
            l, r, cost = [int(col) for col in cols]
            pos = matrix.tell()
            matrix.seek(2 * (l + lsize * r))
            matrix.write_int(cost, 'short')
            matrix.seek(pos)
        return matrix

    def write_lexicon(self, io_out):
        trie = DoubleArray()
        wordid_table = JTypedByteBuffer()
        keys = []
        vals = []
        for key, word_ids in self.trie_keys.items():
            keys.append(key)
            vals.append(wordid_table.tell())
            wordid_table.write_int(len(word_ids), 'byte')
            for wid in word_ids:
                wordid_table.write_int(wid, 'int')

        self.logger.info('building the trie...')

        trie.build(keys, lengths=[len(k) for k in keys], values=vals)

        self.logger.info('done\n')
        self.logger.info('writing the trie...')
        self.byte_buffer.clear()
        self.byte_buffer.write_int(trie.size(), 'int')
        self.byte_buffer.seek(0)
        io_out.write(self.byte_buffer.read())
        self.byte_buffer.clear()

        io_out.write(trie.array())
        self.__logging_size(trie.size() * 4 + 4)
        trie.clear()
        del trie

        self.logger.info('writing the word-ID table...')
        self.byte_buffer.write_int(wordid_table.tell(), 'int')
        self.byte_buffer.seek(0)
        io_out.write(self.byte_buffer.read())
        self.byte_buffer.clear()

        wordid_table.seek(0)
        io_out.write(wordid_table.read())
        self.__logging_size(wordid_table.tell() + 4)
        del wordid_table

        self.logger.info('writing the word parameters...')
        self.byte_buffer.write_int(len(self.entries), 'int')
        for entry in self.entries:
            self.byte_buffer.write_int(entry.parameters[0], 'short')
            self.byte_buffer.write_int(entry.parameters[1], 'short')
            self.byte_buffer.write_int(entry.parameters[2], 'short')
            self.byte_buffer.seek(0)
            io_out.write(self.byte_buffer.read())
            self.byte_buffer.clear()
        self.__logging_size(len(self.entries) * 6 + 4)
        self.write_wordinfo(io_out)

    def write_wordinfo(self, io_out):
        mark = io_out.tell()
        io_out.seek(mark * 4 + len(self.entries))
        offsets = JTypedByteBuffer()
        self.logger.info('writing the word_infos...')
        base = io_out.tell()
        for entry in self.entries:
            wi = entry.wordinfo
            offsets.write_int(io_out.tell(), 'int')
            self.write_string(wi.surface)
            self.write_stringlength(wi.length())
            self.byte_buffer.write_int(wi.pos_id, 'short')
            if wi.normalized_form == wi.surface:
                self.write_string('')
            else:
                self.write_string(wi.normalized_form)
            self.byte_buffer.write_int(wi.dictionary_form_word_id, 'int')
            if wi.reading_form == wi.surface:
                self.write_string('')
            else:
                self.write_string(wi.reading_form)

            self.write_intarray(self.parse_splitinfo(entry.aunit_split_string))
            self.write_intarray(self.parse_splitinfo(entry.bunit_split_string))
            self.write_intarray(self.parse_splitinfo(entry.cunit_split_string))
            self.write_intarray(wi.synonym_group_ids)
            self.byte_buffer.seek(0)
            io_out.write(self.byte_buffer.read())
            self.byte_buffer.clear()
        self.__logging_size(io_out.tell() - base)
        self.logger.info('writing word_info offsets...')
        io_out.seek(mark)
        offsets.seek(0)
        io_out.write(offsets.read())
        self.__logging_size(offsets.tell())

    def decode(self, str_):
        def replace(match):
            uni_text = match.group()
            uni_text = uni_text.replace('{', '').replace('}', '')
            if len(uni_text) > 6:
                uni_text = ('\\U000{}'.format(uni_text[2:]))
            return uni_text.encode('ascii').decode('unicode-escape')
        return re.sub(self.__PATTERN_UNICODE_LITERAL, replace, str_)

    def check_splitinfo_format(self, str_):
        if str_.count('/') + 1 > self.__ARRAY_MAX_LENGTH:
            raise ValueError('too many units')

    def parse_splitinfo(self, info):
        if info == '*':
            return []
        words = info.split('/')
        if len(words) > self.__ARRAY_MAX_LENGTH:
            raise ValueError('too many units')
        ids = []
        for word in words:
            if self.__is_id(word):
                ids.append(self.parse_id(word))
            else:
                ids.append(self.word_to_id(word))
                if ids[-1] < 0:
                    raise ValueError('not found such a word: {}'.format(word))
        return ids

    @staticmethod
    def __is_id(text):
        return re.match(r'U?\d+$', text)

    def parse_id(self, text):
        if text.startswith('U'):
            id_ = int(text[1:])
            if self.is_user_dictionary:
                id_ |= (1 << 28)
        else:
            id_ = int(text)
        self.check_wordid(id_)
        return id_

    def word_to_id(self, text):
        cols = text.split(',')
        if len(cols) < 8:
            raise ValueError('too few columns')
        headword = self.decode(cols[0])
        pos_id = self.get_posid([cols[i] for i in range(1, 7)])
        if pos_id < 0:
            raise ValueError('invalid part of speech')
        reading = self.decode(cols[7])
        return self.get_wordid(headword, pos_id, reading)

    def get_wordid(self, headword, pos_id, reading_form):
        for i in range(len(self.entries)):
            info = self.entries[i].wordinfo
            if info.surface == headword \
                    and info.pos_id == pos_id \
                    and info.reading_form == reading_form:
                return i
        return -1

    def check_wordid(self, wid):
        if wid < 0 or wid >= len(self.entries):
            raise ValueError('invalid word ID')

    def parse_synonym_group_ids(self, text):
        if text == '*':
            return []
        synonym_group_ids = text.split('/')
        if len(synonym_group_ids) > self.__ARRAY_MAX_LENGTH:
            raise ValueError("too many units")
        return [int(synonym_group_id) for synonym_group_id in synonym_group_ids]

    def write_string(self, text):
        len_ = 0
        for c in text:
            if 0x10000 <= ord(c) <= 0x10FFFF:
                len_ += 2
            else:
                len_ += 1
        self.write_stringlength(len_)
        self.byte_buffer.write_str(text)

    def write_stringlength(self, len_):
        if len_ <= self.__BYTE_MAX_VALUE:
            self.byte_buffer.write_int(len_, 'byte')
        else:
            self.byte_buffer.write_int((len_ >> 8) | 0x80, 'byte')
            self.byte_buffer.write_int((len_ & 0xFF), 'byte')

    def write_intarray(self, array):
        self.byte_buffer.write_int(len(array), 'byte')
        for item in array:
            self.byte_buffer.write_int(item, 'int')

    def __logging_size(self, size):
        self.logger.info('{} bytes\n'.format(size))