Ejemplo n.º 1
0
 def __init__(self, my_book):
     # Save reference to the book
     self.my_book = my_book
     # Save reference to the metamanager
     self.metamgr = my_book.get_meta_manager()
     # Save reference to the edited document
     self.document = my_book.get_edit_model()
     # Save reference to a speller, which will be the default
     # at this point.
     self.speller = my_book.get_speller()
     # The vocabulary list as a blist sorted dict.
     self.vocab = blist.sorteddict()
     # Key and Values views on the vocab list for indexing by table row.
     self.vocab_kview = self.vocab.keys()
     self.vocab_vview = self.vocab.values()
     # The good- and bad-words sets and the scannos set.
     self.good_words = set()
     self.bad_words = set()
     self.scannos = set()
     # A dict of words that use an alt-dict tag. The key is a word and the
     # value is the alt-dict tag string.
     self.alt_tags = blist.sorteddict()
     # Register metadata readers and writers.
     self.metamgr.register(C.MD_GW, self.good_read, self.good_save)
     self.metamgr.register(C.MD_BW, self.bad_read, self.bad_save)
     self.metamgr.register(C.MD_SC, self.scanno_read, self.scanno_save)
     self.metamgr.register(C.MD_VL, self.word_read, self.word_save)
Ejemplo n.º 2
0
 def __init__(self):
     """ Initializes trader by order books for the asset from different markets
     """
     MultiAssetStrategy.__init__(self)
     
     # order queues ordered by their best asks and bids
     # something like std::map<Ticks, OrderQueue>[2]
     self._bests = [sorteddict(), sorteddict()]
     self._oldBests = {}
Ejemplo n.º 3
0
    def __init__(self):
        """ Initializes trader by order books for the asset from different markets
        """
        MultiAssetStrategy.__init__(self)
        from blist import sorteddict

        # order queues ordered by their best asks and bids
        # something like std::map<Ticks, OrderQueue>[2]
        self._bests = [sorteddict(), sorteddict()]
        self._oldBests = {}
  def __init__(self):
	self.index = sorteddict() 
	self.tag = []
	self.pgid = None
	self.title = None
	self.title_words = sorteddict()
	self.text = ''
	self.page_map = sortedlist()
	self.Infobox = sorteddict()
	self.Links = sorteddict()
	self.stopwords = open('stopwords.txt','r').readlines()[0].strip().split(',')
	xml.sax.ContentHandler.__init__(self)
Ejemplo n.º 5
0
    def __init__(self, db_filename, fields_to_index=[], columns=[], cluster=[]):
        self.data = sorteddict()
        self.keys = self.data.keys()
        self.values = self.data.values()

        self.indexes = {field: sorteddict() for field in fields_to_index}
        self.columns = {field: sorteddict() for field in columns}

        self.lock = TimeoutLock(0.5)

        self._setup_cluster(cluster)
        self._setup_persistence(db_filename)
Ejemplo n.º 6
0
def solution(counts):
    agg = blist.sorteddict(lambda x: -x)
    for elem in counts:
        agg[elem] = agg.get(elem, 0) + 1

    best = 10000
    if next(agg.iterkeys()) >= 9:
        cop = blist.sorteddict(agg)
        num, count = cop.popitem()
        cop[num // 3] = cop.get(num // 3, 0) + 3 * count
        best = count * 2 + solve(cop)
    return min(solve(agg), best)
Ejemplo n.º 7
0
def run_tests(names, num_items, num_its, type_=int):
    fns = dict([
        ('btrees', lambda: BTrees.OOBTree.OOBTree()),
        ('blist', lambda: blist.sorteddict()),
        ('bintrees', lambda: bintrees.FastRBTree()), ('dict', lambda: dict()),
        ('banyan_red_black_tree',
         lambda: banyan.SortedDict(key_type=type_, alg=banyan.RED_BLACK_TREE)),
        ('banyan_red_black_tree_rank_updator',
         lambda: banyan.SortedDict(key_type=type_,
                                   alg=banyan.RED_BLACK_TREE,
                                   updator=banyan.RankUpdator)),
        ('banyan_red_black_tree_min_max_updator',
         lambda: banyan.SortedDict(key_type=type_,
                                   alg=banyan.RED_BLACK_TREE,
                                   updator=banyan.MinMaxUpdator)),
        ('banyan_splay_tree',
         lambda: banyan.SortedDict(key_type=type_, alg=banyan.SPLAY_TREE)),
        ('banyan_sorted_list',
         lambda: banyan.SortedDict(key_type=type_, alg=banyan.SORTED_LIST)),
        ('banyan_red_black_tree_gen',
         lambda: banyan.SortedDict(alg=banyan.RED_BLACK_TREE)),
        ('banyan_splay_tree_gen',
         lambda: banyan.SortedDict(alg=banyan.SPLAY_TREE)),
        ('banyan_sorted_list_gen',
         lambda: banyan.SortedDict(alg=banyan.SORTED_LIST))
    ])
    t = dict([])
    for name in names:
        t[name] = _run_test(fns[name], type_, num_items, num_its)
    return t
Ejemplo n.º 8
0
    def loadPatched(self):
        with open(self.patched_mgf, "r") as file_to_read:
            for line in file_to_read:

                if "BEGIN IONS" in line:
                    mD = mgfData()
                    mz_int_additional_data = blist.sorteddict()
                elif "END IONS" in line:
                    mD.setMZ(mz_int_additional_data)
                    self.patched_Data[mD.title] = mD
                else:
                    items = line.strip().split('=')
                    if items[0] == "TITLE":
                        mD.setTitle(items[1])
                    elif items[0] == "PEPMASS":
                        mD.setPepmass(items[1])
                    elif items[0] == "CHARGE":
                        mD.setCharge(items[1])
                    else:  # peaks
                        mz_int_additional = items[0].split()
                        if len(mz_int_additional) == 0:  # empty line
                            continue
                        else:
                            mz_int_additional_data[float(
                                mz_int_additional[0])] = [
                                    mz_int_additional[1]
                                ]
Ejemplo n.º 9
0
 def __init__(self, size_of_cache=10000):
     self._size_of_cache = size_of_cache
     self._room_to_key = {}
     self._cache = sorteddict()
     self._earliest_key = None
     self.name = "ReceiptsRoomChangeCache"
     caches_by_name[self.name] = self._cache
Ejemplo n.º 10
0
 def __init__(self, size_of_cache=10000):
     self._size_of_cache = size_of_cache
     self._room_to_key = {}
     self._cache = sorteddict()
     self._earliest_key = None
     self.name = "ReceiptsRoomChangeCache"
     caches_by_name[self.name] = self._cache
Ejemplo n.º 11
0
        def setInSlot(self, slot, key, value):
            shape = self.inputs["shape"].value
            eraseLabel = self.inputs["eraser"].value
            neutralElement = 0
    
            self.lock.acquire()
            #fix slicing of single dimensions:            
            start, stop = sliceToRoi(key, shape, extendSingleton = False)
            start = start.floor()
            stop = stop.floor()
            
            tempKey = roiToSlice(start-start, stop-start, hardBind = True)
            
            stop += numpy.where(stop-start == 0,1,0)

            key = roiToSlice(start,stop)

            updateShape = tuple(stop-start)
    
            update = self._denseArray[key].copy()
            
            update[tempKey] = value

            startRavel = numpy.ravel_multi_index(numpy.array(start, numpy.int32),shape)
            
            #insert values into dict
            updateNZ = numpy.nonzero(numpy.where(update != neutralElement,1,0))
            updateNZRavelSmall = numpy.ravel_multi_index(updateNZ, updateShape)
            
            if isinstance(value, numpy.ndarray):
                valuesNZ = value.ravel()[updateNZRavelSmall]
            else:
                valuesNZ = value
    
            updateNZRavel = numpy.ravel_multi_index(updateNZ, shape)
            updateNZRavel += startRavel        
    
            self._denseArray.ravel()[updateNZRavel] = valuesNZ        
            
            valuesNZ = self._denseArray.ravel()[updateNZRavel]
            
            self._denseArray.ravel()[updateNZRavel] =  valuesNZ       
    
            
            td = blist.sorteddict(zip(updateNZRavel.tolist(),valuesNZ.tolist()))
       
            self._sparseNZ.update(td)
            
            #remove values to be deleted
            updateNZ = numpy.nonzero(numpy.where(update == eraseLabel,1,0))
            if len(updateNZ)>0:
                updateNZRavel = numpy.ravel_multi_index(updateNZ, shape)
                updateNZRavel += startRavel    
                self._denseArray.ravel()[updateNZRavel] = neutralElement
                for index in updateNZRavel:
                    self._sparseNZ.pop(index)
            
            self.lock.release()
            
            self.outputs["Output"].setDirty(key)
Ejemplo n.º 12
0
    def __init__(self,
                 descriptor=None,
                 filters=None,
                 sort=None,
                 encoding=None,
                 work_dir=None,
                 *args,
                 **kwargs):
        super(CodecArticleIndex, self).__init__(descriptor=descriptor,
                                                work_dir=work_dir,
                                                *args,
                                                **kwargs)

        # Our Meta Content
        self.decoded = NNTPMetaContent(work_dir=self.work_dir)

        # Switch our content subvalue to be a sorteddict()
        self.decoded.content = sorteddict()

        # Filters
        self.filters = filters

        # Sort Order
        self.sort = sort
        if self.sort is None or self.sort not in XOVER_GROUPINGS:
            self.sort = XoverGrouping.BY_POSTER_TIME

        # The character set encoding usenet content is retrieved in
        if encoding is None:
            self.encoding = NNTP_DEFAULT_ENCODING
        else:
            self.encoding = encoding
Ejemplo n.º 13
0
    def __init__(self,
                 db_filename,
                 fields_to_index=[],
                 columns=[],
                 cluster=[]):
        self.data = sorteddict()
        self.keys = self.data.keys()
        self.values = self.data.values()

        self.indexes = {field: sorteddict() for field in fields_to_index}
        self.columns = {field: sorteddict() for field in columns}

        self.lock = TimeoutLock(0.5)

        self._setup_cluster(cluster)
        self._setup_persistence(db_filename)
Ejemplo n.º 14
0
def run_tests(names, num_items, num_its, type_ = int):
    fns = dict([
        ('btrees', lambda: BTrees.OOBTree.OOBTree()),
        ('blist', lambda: blist.sorteddict()),
        ('bintrees', lambda: bintrees.FastRBTree()),
        ('dict', lambda: dict()),
        ('banyan_red_black_tree', 
            lambda: banyan.SortedDict(key_type = type_, alg = banyan.RED_BLACK_TREE)),
        ('banyan_red_black_tree_rank_updator', 
            lambda: banyan.SortedDict(key_type = type_, alg = banyan.RED_BLACK_TREE, updator = banyan.RankUpdator)),
        ('banyan_red_black_tree_min_max_updator', 
            lambda: banyan.SortedDict(key_type = type_, alg = banyan.RED_BLACK_TREE, updator = banyan.MinMaxUpdator)),
        ('banyan_splay_tree', 
            lambda: banyan.SortedDict(key_type = type_, alg = banyan.SPLAY_TREE)),
        ('banyan_sorted_list', 
            lambda: banyan.SortedDict(key_type = type_, alg = banyan.SORTED_LIST)),
        ('banyan_red_black_tree_gen', 
            lambda: banyan.SortedDict(alg = banyan.RED_BLACK_TREE)),
        ('banyan_splay_tree_gen', 
            lambda: banyan.SortedDict(alg = banyan.SPLAY_TREE)),
        ('banyan_sorted_list_gen', 
            lambda: banyan.SortedDict(alg = banyan.SORTED_LIST))])
    t = dict([])        
    for name in names:        
        t[name] = _run_test(fns[name], type_, num_items, num_its)
    return t
Ejemplo n.º 15
0
    def __init__(self, eps=1e-6):

        # [remaining, jobid] queue for the *virtual* scheduler
        self.queue = blist()

        # Jobs that should have finished in the virtual time,
        # but didn't in the real (happens only in case of estimation
        # errors)
        self.late = set()

        # last time we run the schedule function
        self.last_t = 0

        # Jobs that are running in the real time
        self.running = set()

        # Jobs that have less than eps work to do are considered done
        # (deals with floating point imprecision)
        self.eps = eps

        # queue for late jobs, sorted by attained service
        self.late_queue = sorteddict()

        # {jobid: att} where att is jobid's attained service
        self.attained = {}

        # last result of calling the schedule function
        self.scheduled = {}
Ejemplo n.º 16
0
    def __init__(self, eps=1e-6):

        # [remaining, jobid] queue for the *virtual* scheduler
        self.queue = blist()

        # Jobs that should have finished in the virtual time,
        # but didn't in the real (happens only in case of estimation
        # errors)
        self.late = set()

        # last time we run the schedule function
        self.last_t = 0

        # Jobs that are running in the real time
        self.running = set()

        # Jobs that have less than eps work to do are considered done
        # (deals with floating point imprecision)
        self.eps = eps

        # queue for late jobs, sorted by attained service
        self.late_queue = sorteddict()

        # {jobid: att} where att is jobid's attained service
        self.attained = {}

        # last result of calling the schedule function
        self.scheduled = {}
Ejemplo n.º 17
0
 def __init__(self, time_window):
     """
 :param time_window: time in milliseconds
 """
     self.time_window = time_window
     self.ts_to_values = sorteddict()
     self.values = sortedlist()
Ejemplo n.º 18
0
 def load_apls(self):
     for f in self.apl_files:
         file = self.apl_folder + f
         print(file)
         with open(file, "r") as file_to_read:
             for line in file_to_read:
                 line = line.strip()
                 if "peaklist start" in line:
                     mD = mgfData()
                     mz_int_additional_data = blist.sorteddict()
                 elif "peaklist end" in line:
                     mD.setMZ(mz_int_additional_data)
                     self.apl_data[mD.title + "__tobi__" + mD.charge] = mD
                 else:
                     items = line.strip().split('=')
                     if items[0] == 'mz':
                         mD.setPepmass(items[1])
                     elif items[0] == "header":
                         mD.setTitle(items[1])
                     elif items[0] == "charge":
                         mD.setCharge(items[1])
                     elif items[0] == "fragmentation":
                         continue
                     else:  # peaks
                         mz_int_additional = items[0].split("\t")
                         if len(mz_int_additional) == 1:  # empty line
                             continue
                         else:
                             mz_int_additional_data[float(
                                 mz_int_additional[0])] = [
                                     mz_int_additional[1]
                                 ]
         print(len(self.apl_data))
Ejemplo n.º 19
0
 def __init__(self):
     super(SortedList, self).__init__()
     self.nodes = blist.sorteddict()
     self.datas = blist.sortedlist()
     self._max_id = 0
     self._except_max_id = 0
     return None
Ejemplo n.º 20
0
    def compare2original(self):
        firstLine = True
        with open(self.original_mgf, "r") as file_to_read:
            for line in file_to_read:
                if firstLine:
                    firstLine = False
                    self.firstLine = line
                    continue

                if "BEGIN IONS" in line:
                    mD = mgfData()
                    mz_int_additional_data = blist.sorteddict()
                elif "END IONS" in line:
                    mD.setMZ(mz_int_additional_data)

                    for spectrum_keys in self.patched_Data[mD.title].mz.keys():
                        self.patched_Data[
                            mD.title].mz[spectrum_keys] = mD.mz[spectrum_keys]
                else:
                    items = line.strip().split('=')
                    if items[0] == "TITLE":
                        mD.setTitle(items[1])
                    elif items[0] == "PEPMASS":
                        mD.setPepmass(items[1])
                    elif items[0] == "CHARGE":
                        mD.setCharge(items[1])
                    else:  # peaks
                        mz_int_additional = items[0].split()
                        if len(mz_int_additional) == 0:  # empty line
                            continue
                        else:
                            mz_int_additional_data[float(
                                mz_int_additional[0])] = [
                                    mz_int_additional[1], mz_int_additional[2]
                                ]
 def __init__(self):
     """
     Initialize your data structure here.
     """
     # all ints map to the interval left
     self.left = {}
     # current intervals left to current intervals, pop and add
     self.intervals = sorteddict()
Ejemplo n.º 22
0
def term_frequency_query(terms):
    term_freq_dict = sorteddict({})
    for term in terms:
        try:
            term_freq_dict[term] += 1
        except KeyError:
            term_freq_dict[term] = 1
    return term_freq_dict
Ejemplo n.º 23
0
    def set(self, key, col, val):
        """Sets the value at the given key/column.

        In the average case, requires O(log(c)**2) operations, where c is the
        number of columns associated with the key."""
        assert all(isinstance(datum, basestring) for datum in (key, col, val))
        self.kcv.setdefault(key, sorteddict())[col] = val
        self._persist()
Ejemplo n.º 24
0
def params_from_postdata(data):
    p = sorteddict()
    for i in data.split('&'):
        k, v = i.strip().split('=')
        if k == 'oauth_signature':
            oauth_signature = urllib.unquote(v)
            continue
        p[percent_encode(k)] = percent_encode(urllib.unquote_plus(v))
    return p, oauth_signature
Ejemplo n.º 25
0
def params_from_authorization(auth):
    p = sorteddict()
    for i in auth.split(',')[1:]:
        k,v = i.strip().split('=')
        if k == 'oauth_signature':
            oauth_signature = urllib.unquote(v[1:-1])
            continue
        p[percent_encode(k)] = percent_encode(urllib.unquote(v[1:-1]))
    return p, oauth_signature
Ejemplo n.º 26
0
def params_from_postdata(data):
    p = sorteddict()
    for i in data.split('&'):
        k,v = i.strip().split('=')
        if k == 'oauth_signature':
            oauth_signature = urllib.unquote(v)
            continue
        p[percent_encode(k)] = percent_encode(urllib.unquote_plus(v))
    return p, oauth_signature
Ejemplo n.º 27
0
def params_from_authorization(auth):
    p = sorteddict()
    for i in auth.split(',')[1:]:
        k, v = i.strip().split('=')
        if k == 'oauth_signature':
            oauth_signature = urllib.unquote(v[1:-1])
            continue
        p[percent_encode(k)] = percent_encode(urllib.unquote(v[1:-1]))
    return p, oauth_signature
Ejemplo n.º 28
0
    def __init__(self, description, rr_replay, source_pane):
        self.rr_replay = rr_replay
        self.description = description
        self.spawn_cmd = "{} replay {}".format(shlex.quote(cli_args.rr),
                                               shlex.quote(rr_replay))
        self.source_pane = source_pane

        self.breakpoints = {}
        self.watches_set = 0
        self.instr_to_checkpoint = sorteddict()
Ejemplo n.º 29
0
    def __init__(self, name, current_stream_pos, max_size=10000, prefilled_cache={}):
        self._max_size = int(max_size * CACHE_SIZE_FACTOR)
        self._entity_to_key = {}
        self._cache = sorteddict()
        self._earliest_known_stream_pos = current_stream_pos
        self.name = name
        self.metrics = register_cache(self.name, self._cache)

        for entity, stream_pos in prefilled_cache.items():
            self.entity_has_changed(entity, stream_pos)
Ejemplo n.º 30
0
    def __init__(self, name, current_stream_pos, max_size=10000, prefilled_cache={}):
        self._max_size = max_size
        self._entity_to_key = {}
        self._cache = sorteddict()
        self._earliest_known_stream_pos = current_stream_pos
        self.name = name
        caches_by_name[self.name] = self._cache

        for entity, stream_pos in prefilled_cache.items():
            self.entity_has_changed(entity, stream_pos)
Ejemplo n.º 31
0
    def __init__(self, name, current_stream_pos, max_size=10000, prefilled_cache={}):
        self._max_size = int(max_size * CACHE_SIZE_FACTOR)
        self._entity_to_key = {}
        self._cache = sorteddict()
        self._earliest_known_stream_pos = current_stream_pos
        self.name = name
        self.metrics = register_cache(self.name, self._cache)

        for entity, stream_pos in prefilled_cache.items():
            self.entity_has_changed(entity, stream_pos)
Ejemplo n.º 32
0
    def __init__(self, description, rr_replay, source_pane):
        self.rr_replay = rr_replay
        self.description = description
        self.spawn_cmd = "{} replay {}".format(
            shlex.quote(cli_args.rr), shlex.quote(rr_replay))
        self.source_pane = source_pane

        self.breakpoints = {}
        self.watches_set = 0
        self.instr_to_checkpoint = sorteddict()
Ejemplo n.º 33
0
    def refresh(self,progress):
        global re_lang_attr, re_token

        count = 0
        end_count = self.document.blockCount()
        progress.setMaximum(end_count)
        progress.setValue(0)

        # get a reference to the dictionary to use
        self.speller = self.my_book.get_speller()
        # clear the alt-dict list.
        self.alt_tags = blist.sorteddict()
        # Zero out all counts and property sets that we have so far. We will
        # develop new properties when each word is first seen. Properties
        # such as HY will not have changed, but both AD and XX might have
        # changed while the word text remains the same.
        for j in range(len(self.vocab)) :
            self.vocab_vview[j][0] = 0
            self.vocab_vview[j][1] = set()

        # iterate over all lines extracting tokens and processing them.
        alt_dict = None
        alt_tag = None
        for line in self.document.all_lines():
            count += 1
            if 0 == (count % 20):
                progress.setValue(count)
                QCoreApplication.processEvents()
            j = 0
            m = re_token.search(line,0)
            while m : # while match is not None
                if m.group(6) : # start-tag; has it lang= ?
                    d = re_lang_attr.search(m.group(8))
                    if d :
                        alt_dict = d.group(1)
                        alt_tag = m.group(7)
                elif m.group(9) :
                    if m.group(10) == alt_tag :
                        # end tag of a lang= start tag
                        alt_dict = None
                        alt_tag = None
                else :
                    self._add_token(m.group(0),alt_dict)
                j = m.end()
                m = re_token.search(line,j)
        # look for zero counts and delete those items. In order not to
        # confuse the value and keys views, make a list of the actual word
        # tokens to be deleted, then use del.
        togo = []
        for j in range(len(self.vocab)) :
            if self.vocab_vview[j][0] == 0 :
                togo.append(self.vocab_kview[j])
        for key in togo:
            del self.vocab[key]
        progress.setValue(end_count)
Ejemplo n.º 34
0
 def order(self):
     order_attr = self._order_attr
     context = aq_base(self.context)
     # We store ordering on the context.  This could get quite
     # large, but it will be used often enough that including it on
     # the object probably makes sense.
     if not hasattr(context, self._order_attr):
         setattr(context, order_attr, sorteddict())
         # Force initial ordering
         self.orderObjects()
     return getattr(context, order_attr)
Ejemplo n.º 35
0
    def reset(self):
        """
        Reset our decoded content
        """
        super(CodecArticleIndex, self).reset()

        # Our Meta Content
        self.decoded = NNTPMetaContent(work_dir=self.work_dir)

        # Switch our decoded subvalue to be a sorteddict()
        self.decoded.content = sorteddict()
Ejemplo n.º 36
0
    def reset(self):
        """
        Reset our decoded content
        """
        super(CodecArticleIndex, self).reset()

        # Our Meta Content
        self.decoded = NNTPMetaContent(work_dir=self.work_dir)

        # Switch our decoded subvalue to be a sorteddict()
        self.decoded.content = sorteddict()
Ejemplo n.º 37
0
 def __init__(self,my_book):
     super().__init__()
     # Save access to the book, from which we learn the metadata
     # manager and the edit data model.
     self.my_book = my_book
     # The character list as a dict 'x':count.
     self.census = sorteddict()
     # Key and Values views on the census dict for indexed access.
     self.k_view = None # supplied in char_read() or refresh()
     self.v_view = None
     # Register to handle metadata.
     self.my_book.get_meta_manager().register('CHARCENSUS', self.char_read, self.char_save)
Ejemplo n.º 38
0
 def token_map(self):
     """
     returns a map of token -> {key[1], ...key[n]}
     :return:
     """
     #TODO: keep track of this during normal storage operation
     # using a set in case there are token collisions
     token_map = defaultdict(set)
     for key in self._data.keys():
         token_map[self.partitioner.get_key_token(key)].add(key)
     token_map = sorteddict(token_map)
     return token_map
def get_rescore_kaldi_n_best(kaldi_n_best_sentence, kaldi_n_best_acoustics,
                             session, m, fasttext_model, max_word_length,
                             char_vocab, rnn_state):
    rescore_kaldi_n_best = {}

    for sentence, acoustics in zip(kaldi_n_best_sentence,
                                   kaldi_n_best_acoustics):
        sentence_wer = get_sentence_wer(sentence, acoustics, session, m,
                                        fasttext_model, max_word_length,
                                        char_vocab, rnn_state)
        rescore_kaldi_n_best[sentence] = sentence_wer

    return sorteddict(rescore_kaldi_n_best).keys()
Ejemplo n.º 40
0
    def setupOutputs(self):
        if (numpy.array(self._oldShape) != self.inputs["shape"].value).any():
            shape = self.inputs["shape"].value
            self._oldShape = shape
            self.outputs["Output"].meta.dtype = numpy.uint8
            self.outputs["Output"].meta.shape = shape

            # FIXME: Don't give arbitrary axistags.  Specify them correctly if you need them.
            #self.outputs["Output"].meta.axistags = vigra.defaultAxistags(len(shape))

            self.inputs["Input"].meta.shape = shape

            self.outputs["nonzeroValues"].meta.dtype = object
            self.outputs["nonzeroValues"].meta.shape = (1, )

            self.outputs["nonzeroCoordinates"].meta.dtype = object
            self.outputs["nonzeroCoordinates"].meta.shape = (1, )

            self._denseArray = numpy.zeros(shape, numpy.uint8)
            self._sparseNZ = blist.sorteddict()

        if self.inputs["deleteLabel"].ready(
        ) and self.inputs["deleteLabel"].value != -1:
            labelNr = self.inputs["deleteLabel"].value

            neutralElement = 0
            self.inputs["deleteLabel"].setValue(-1)  #reset state of inputslot
            self.lock.acquire()

            # Find the entries to remove
            updateNZ = numpy.nonzero(
                numpy.where(self._denseArray == labelNr, 1, 0))
            if len(updateNZ) > 0:
                # Convert to 1-D indexes for the raveled version
                updateNZRavel = numpy.ravel_multi_index(
                    updateNZ, self._denseArray.shape)
                # Zero out the entries we don't want any more
                self._denseArray.ravel()[updateNZRavel] = neutralElement
                # Remove the zeros from the sparse list
                for index in updateNZRavel:
                    self._sparseNZ.pop(index)
            # Labels are continuous values: Shift all higher label values down by 1.
            self._denseArray[:] = numpy.where(self._denseArray > labelNr,
                                              self._denseArray - 1,
                                              self._denseArray)
            self._maxLabel = self._denseArray.max()
            self.lock.release()
            self.outputs["nonzeroValues"].setDirty(slice(None))
            self.outputs["nonzeroCoordinates"].setDirty(slice(None))
            self.outputs["Output"].setDirty(slice(None))
            self.outputs["maxLabel"].setValue(self._maxLabel)
Ejemplo n.º 41
0
 def writeBinaryFileIndex(self,index_file):
     with open(self.indexFolder+index_file, 'wb') as file_index:
         with open(self.termFolder+index_file+'.txt','w') as file_term:
             #Trier l'indexe par terme
             inverse_index = sorteddict(self.index)
             self.index.clear()
             pos_term = 0
             #Lire l'indexe par terme
             for elem in inverse_index.items():
                 term = elem[0]
                 nb_doc = 0
                 #Trier la postingslist par docID
                 docs_list = sorteddict(elem[1])
                 #Lire la postingslist par docID
                 for doc in docs_list.items():
                     nb_doc += 1
                     doc_id = doc[0]
                     nb_occur = doc[1]
                     #Écrire en binaire le docID et le nombre d'occurences
                     file_index.write(struct.pack('ii',doc_id,nb_occur))
                 #Écire une ligne correspondante dans le fichier de vocabulaire avec le terme (vocabulaire), le nombre de doc contenant ce terme et la position débute de sa postingslist dans le fichier binaire
                 file_term.write(term+"|"+str(nb_doc)+"|"+str(pos_term)+"\n")
                 pos_term += nb_doc #La position débute du terme suivant
 def order(self):
     order_attr = self._order_attr
     context = aq_base(self.context)
     # We store ordering on the context.  This could get quite
     # large, but it will be used often enough that including it on
     # the object probably makes sense.
     order_obj = getattr(context, order_attr, None)
     if order_obj is None or getattr(order_obj, 'viewkeys', None) is None:
         setattr(context, order_attr, sorteddict())
         # Force initial ordering
         self.orderObjects()
         # Override CSRF protection if we can
         request = getattr(self.context, 'REQUEST', None)
         if request is not None:
             alsoProvides(request, IDisableCSRFProtection)
     return getattr(context, order_attr)
Ejemplo n.º 43
0
    def setupOutputs(self):
        if (self._oldShape != self.inputs["shape"].value).any():
            shape = self.inputs["shape"].value
            self._oldShape = shape
            self.outputs["Output"].meta.dtype = numpy.uint8
            self.outputs["Output"].meta.shape = shape
            
            # FIXME: Don't give arbitrary axistags.  Specify them correctly if you need them.
            #self.outputs["Output"].meta.axistags = vigra.defaultAxistags(len(shape))

            self.inputs["Input"].meta.shape = shape


            self.outputs["nonzeroValues"].meta.dtype = object
            self.outputs["nonzeroValues"].meta.shape = (1,)

            self.outputs["nonzeroCoordinates"].meta.dtype = object
            self.outputs["nonzeroCoordinates"].meta.shape = (1,)

            self._denseArray = numpy.zeros(shape, numpy.uint8)
            self._sparseNZ =  blist.sorteddict()

        if self.inputs["deleteLabel"].ready() and self.inputs["deleteLabel"].value != -1:
            labelNr = self.inputs["deleteLabel"].value

            neutralElement = 0
            self.inputs["deleteLabel"].setValue(-1) #reset state of inputslot
            self.lock.acquire()

            # Find the entries to remove
            updateNZ = numpy.nonzero(numpy.where(self._denseArray == labelNr,1,0))
            if len(updateNZ)>0:
                # Convert to 1-D indexes for the raveled version
                updateNZRavel = numpy.ravel_multi_index(updateNZ, self._denseArray.shape)                                        
                # Zero out the entries we don't want any more
                self._denseArray.ravel()[updateNZRavel] = neutralElement
                # Remove the zeros from the sparse list
                for index in updateNZRavel:
                    self._sparseNZ.pop(index)
            # Labels are continuous values: Shift all higher label values down by 1.
            self._denseArray[:] = numpy.where(self._denseArray > labelNr, self._denseArray - 1, self._denseArray)
            self._maxLabel = self._denseArray.max()
            self.lock.release()
            self.outputs["nonzeroValues"].setDirty(slice(None))
            self.outputs["nonzeroCoordinates"].setDirty(slice(None))
            self.outputs["Output"].setDirty(slice(None))
            self.outputs["maxLabel"].setValue(self._maxLabel)
Ejemplo n.º 44
0
	def endElement(self, name):
		global titlePageMapper
		global pageNumber

		if name=="title":
			wikipediaHandler.title=self.bufferObject
			wikipediaHandler.titleWords=tokenizeTitle(wikipediaHandler.title, str(pageNumber))

		if name=="text":
			wikipediaHandler.text=self.bufferObject
			#wikipediaHandler.textWords=tokenizeText(wikipediaHandler.text,str(pageNumber))
		if name=="page":
			if pageNumber%7000==0:
				for key in titlePageMapper:
					title_page_map_fp.write(str(key) + " " + titlePageMapper[key].strip().encode('utf-8') + "\n")
				titlePageMapper = blist.sorteddict({})

		self.bufferObject=""
Ejemplo n.º 45
0
def findsol(mol, target, steps, maxlen, replacements):
    branches = [(mol, steps)]
    # print(branches)
    index = blist.sorteddict()
    i = 0
    solution = -1
    while branches:
        i += 1
        # print(branch)
        if i % 100000 == 0:
            print(len(index), len(branches), end='\r')
        branch = branches.pop()
        steps = branch[1] + 1
        # if i == 500000:
        #     for key, val in index.items():
        #         print(key, val)
        # If no other branch reached here first
        if branch[0] not in index or (index[branch[0]] > branch[1]):
            index[branch[0]] = branch[1]
            for rep in replacements:
                done = False
                off = 0
                while not done:
                    done = True
                    pos = branch[0].find(rep[1], off)
                    # print(rep)
                    # If found
                    if pos != -1:
                        done = False
                        newmol = branch[0][:pos] + rep[0] +\
                            branch[0][pos+len(rep[1]):]
                        if newmol == target:
                            # print('Target reached in', steps, 'steps!')
                            if solution == -1 or solution > steps:
                                solution = steps
                                print('Solution:', solution)
                                sys.exit(0)
                        else:
                            if rep[0] != 'e':
                                branches.append((newmol, steps))
                        off = pos + len(rep[1])
                # print(branches)
                # input()
    return solution
Ejemplo n.º 46
0
def indexText(words, pageNumber):
	word_length = len(words)
	if word_length:
		term = round((1/float(word_length)), 4)
		global title_size, indexTitle
		for word in words:
			if word not in indexTitle:
				indexTitle[word]={}
				indexTitle[word][pageNumber]=term
			elif pageNumber in indexTitle[word]:
				indexTitle[word][pageNumber]+=term
			else:
				indexTitle[word][pageNumber]=term
				title_size = title_size + sys.getsizeof(pageNumber) + sys.getsizeof(indexTitle[word][pageNumber])

		if title_size>=file_size_limit:
			writeOutput(indexTitle, 'title')
			indexTitle = blist.sorteddict({})
			title_size=0
Ejemplo n.º 47
0
    def __init__(self, eps=1e-6):

        # job attained service is represented as (real attained service // eps)
        # (not perfectly precise but avoids problems with floats)
        self.eps = eps

        # sorted dictionary for {attained: {jobid}}
        self.queue = sorteddict()

        # {jobid: attained} dictionary
        self.attained = {}

        # result of the last time the schedule() method was called
        # grouped by {attained: [service, {jobid}]}
        self.scheduled = {}
        # This is the entry point for doing XXX + LAS schedulers:
        # it's sufficient to touch here

        # last time when the schedule was changed
        self.last_t = 0
Ejemplo n.º 48
0
def indexText(words, pageNumber):
    word_length = len(words)
    if word_length:
        term = round((1 / float(word_length)), 4)
        global title_size, indexTitle
        for word in words:
            if word not in indexTitle:
                indexTitle[word] = {}
                indexTitle[word][pageNumber] = term
            elif pageNumber in indexTitle[word]:
                indexTitle[word][pageNumber] += term
            else:
                indexTitle[word][pageNumber] = term
                title_size = title_size + sys.getsizeof(
                    pageNumber) + sys.getsizeof(indexTitle[word][pageNumber])

        if title_size >= file_size_limit:
            writeOutput(indexTitle, 'title')
            indexTitle = blist.sorteddict({})
            title_size = 0
Ejemplo n.º 49
0
    def __init__(self, eps=1e-6):

        # job attained service is represented as (real attained service // eps)
        # (not perfectly precise but avoids problems with floats)
        self.eps = eps

        # sorted dictionary for {attained: {jobid}}
        self.queue = sorteddict()

        # {jobid: attained} dictionary
        self.attained = {}

        # result of the last time the schedule() method was called
        # grouped by {attained: [service, {jobid}]}
        self.scheduled = {}
        # This is the entry point for doing XXX + LAS schedulers:
        # it's sufficient to touch here

        # last time when the schedule was changed
        self.last_t = 0
Ejemplo n.º 50
0
def run_tests(names, num_items, num_its):
    # Tmp Ami - make key-type separate labels below
    fns = dict([
        ('btrees', lambda es: BTrees.OOBTree.OOBTree([(e, 1) for e in es])),
        ('blist', lambda es: blist.sorteddict([(e, 1) for e in es])),
        ('bintrees', lambda es: bintrees.FastRBTree([(e, 1) for e in es])),
        ('set', lambda es: set([(e, 1) for e in es])),
        ('banyan_red_black_tree', lambda es: banyan.SortedDict([(e, 1) for e in es], alg = banyan.RED_BLACK_TREE)),
        ('banyan_splay_tree', lambda es: banyan.SortedDict([(e, 1) for e in es], alg = banyan.SPLAY_TREE)),
        ('banyan_sorted_list', lambda es: banyan.SortedDict([(e, 1) for e in es], alg = banyan.SORTED_LIST)),
        ('banyan_red_black_tree_gen', 
            lambda es: banyan.SortedDict([(e, 1) for e in es], key_type = int, alg = banyan.RED_BLACK_TREE)),
        ('banyan_splay_tree_gen', 
            lambda es: banyan.SortedDic([(e, 1) for e in es], key_type = int, alg = banyan.SPLAY_TREE)),
        ('banyan_sorted_list_gen', 
            lambda es: banyan.SortedDict([(e, 1) for e in es], key_type = int, alg = banyan.SORTED_LIST))])
    t = dict([])        
    for name in names:        
        t[name] = _run_test(fns[name], int, num_items, num_its)
    return t
Ejemplo n.º 51
0
    def output_to_file(self, out_filename, tags):
        print "=== Making file %s" % out_filename
        clauses = ["%s ='%s'" % (tag, value) for (tag, value) in tags.iteritems()]
        query = 'select value from /^cpu.trace.*/ where %s' % " and ".join(clauses)
        if self.start:
            query += " and time>=\'{}\'".format(self.start)
        if self.end:
            query += " and time<=\'{}\'".format(self.end)
        print "running query: %s" % query
        metrics = self.client.query(query)
        try:
            series = metrics.raw['series']
        except KeyError:
            print "got an empty recordset"
            return

        print "putting metrics into a sorted dictionary..."
        traces = sorteddict()
        for metric in series:
            if re.match(r'cpu\.trace\.\d+', metric['name']):
                continue
            name = self._format_metric_name(metric['name'], 'cpu.trace.')
            value = sum([v[1] for v in metric['values']])
            if name in traces:
                traces[name] = traces[name] + value
            else:
                traces[name] = value

        print "output this dictionary to the file..."
        with open(out_filename, "w") as f:
            for t in traces:
                found = False
                for filter_string in self.filter_exclude:
                    if filter_string in t:
                        found = True
                        break
                if not found:
                    v = traces[t]
                    if t != v: # this is Andrew's  cpu.trace.23 = 23  measures;  I don't know what are they for
                        f.write('%s %d\n' % (t, v))
        print "output finished."
Ejemplo n.º 52
0
    def endElement(self, name):
        global titlePageMapper
        global pageNumber

        if name == "title":
            wikipediaHandler.title = self.bufferObject
            wikipediaHandler.titleWords = tokenizeTitle(
                wikipediaHandler.title, str(pageNumber))

        if name == "text":
            wikipediaHandler.text = self.bufferObject
            #wikipediaHandler.textWords=tokenizeText(wikipediaHandler.text,str(pageNumber))
        if name == "page":
            if pageNumber % 7000 == 0:
                for key in titlePageMapper:
                    title_page_map_fp.write(
                        str(key) + " " +
                        titlePageMapper[key].strip().encode('utf-8') + "\n")
                titlePageMapper = blist.sorteddict({})

        self.bufferObject = ""
Ejemplo n.º 53
0
    def __init__(self, *args, **kargs):
        """Construct the typeHintedDict.

        Args:
            *args, **kargs: Pass any parameters through to the dict() constructor.


        Calls the dict() constructor, then runs through the keys of the
        created dictionary and either uses the string type embedded in
        the keyname to generate the type hint (and remove the
        embedded string type from the keyname) or determines the likely
        type hint from the value of the dict element.
        """
        self._typehints = sorteddict()
        super(typeHintedDict, self).__init__(*args, **kargs)
        for key in list(self.keys()):  # Chekc through all the keys and see if they contain
            # type hints. If they do, move them to the
            # _typehint dict
            value = super(typeHintedDict, self).__getitem__(key)
            super(typeHintedDict, self).__delitem__(key)
            self[key] = value  # __Setitem__ has the logic to handle embedded type hints correctly
Ejemplo n.º 54
0
    def output_to_file(self, out_filename, tags):
        print "=== Making file %s" % out_filename
        clauses = [
            "%s ='%s'" % (tag, value) for (tag, value) in tags.iteritems()
        ] + self.extra_clauses
        query = 'select value from /^cpu.trace.*/ where %s' % " and ".join(
            clauses)
        print "running query: %s" % query
        metrics = self.client.query(query)
        try:
            series = metrics.raw['series']
        except KeyError:
            print "got an empty recordset"
            return

        print "putting metrics into a sorted dictionary..."
        traces = sorteddict()
        for metric in series:
            if re.match(r'cpu\.trace\.\d+', metric['name']):
                continue
            name = self._format_metric_name(metric['name'], 'cpu.trace.')
            value = sum([v[1] for v in metric['values']])
            if name in traces:
                traces[name] = traces[name] + value
            else:
                traces[name] = value

        print "output this dictionary to the file..."
        with open(out_filename, "w") as f:
            for t in traces:
                found = False
                for filter_string in self.filter_exclude:
                    if filter_string in t:
                        found = True
                        break
                if not found:
                    v = traces[t]
                    if t != v:  # this is Andrew's  cpu.trace.23 = 23  measures;  I don't know what are they for
                        f.write('%s %d\n' % (t, v))
        print "output finished."
Ejemplo n.º 55
0
        def notifyConnectAll(self):
          if (self._oldShape != self.inputs["shape"].value).all():
                shape = self.inputs["shape"].value
                self._oldShape = shape
                self.outputs["Output"]._dtype = numpy.uint8
                self.outputs["Output"]._shape = shape
                self.outputs["Output"]._axistags = vigra.defaultAxistags(len(shape))
                
                self.inputs["Input"].meta.shape = shape


                self.outputs["nonzeroValues"]._dtype = object
                self.outputs["nonzeroValues"]._shape = (1,)
                self.outputs["nonzeroValues"]._axistags = vigra.defaultAxistags(1)
                
                self.outputs["nonzeroCoordinates"]._dtype = object
                self.outputs["nonzeroCoordinates"]._shape = (1,)
                self.outputs["nonzeroCoordinates"]._axistags = vigra.defaultAxistags(1)
    
                self._denseArray = numpy.zeros(shape, numpy.uint8)
                self._sparseNZ =  blist.sorteddict()  
                
          if self.inputs["deleteLabel"].connected() and self.inputs["deleteLabel"].value != -1:
                labelNr = self.inputs["deleteLabel"].value
                neutralElement = 0
                self.inputs["deleteLabel"].setValue(-1) #reset state of inputslot
                self.lock.acquire()

                #remove values to be deleted
                updateNZ = numpy.nonzero(numpy.where(self._denseArray == labelNr,1,0))
                if len(updateNZ)>0:
                    updateNZRavel = numpy.ravel_multi_index(updateNZ, self._denseArray.shape)
                    self._denseArray.ravel()[updateNZRavel] = neutralElement
                    for index in updateNZRavel:
                        self._sparseNZ.pop(index)
                self._denseArray[:] = numpy.where(self._denseArray > labelNr, self._denseArray - 1, self._denseArray)
                self.lock.release()
                self.outputs["nonzeroValues"][0] = numpy.array(self._sparseNZ.values())
                self.outputs["nonzeroCoordinates"][0] = numpy.array(self._sparseNZ.keys())
                self.outputs["Output"][:] = self._denseArray #set output dirty
Ejemplo n.º 56
0
    def __init__(self,
                 memory_limit=1000000,
                 filter_tags=False,
                 remove_stopwords=False,
                 case_sensitive=False,
                 with_stemming=False):
        self.memory_limit = memory_limit  # Max size of self.inv_index in bytes, if reached it we use merge-based methode
        self.filter_tags = filter_tags
        self.inv_index = {}  # Contains the whole index in in-memory mode
        self.remove_stopwords = remove_stopwords
        self.case_sensitive = case_sensitive
        self.with_stemming = with_stemming
        self.indexed = False

        self._doc_id_list = []  # Contains the ids of the documents indexed
        self._partial_files_names = [
        ]  # Contains the filenames of the partial index in merge-based mode
        self.dict_terms_offset = dict(
        )  # Keep the line where each term is in InvertedFile
        self.offset = 1

        self.dict_term_pl = sorteddict()  # Used in merging
Ejemplo n.º 57
0
    def __init__(self, hs):
        self.server_name = hs.hostname
        self.clock = hs.get_clock()
        self.notifier = hs.get_notifier()
        self.is_mine_id = hs.is_mine_id

        self.presence_map = {
        }  # Pending presence map user_id -> UserPresenceState
        self.presence_changed = sorteddict()  # Stream position -> user_id

        self.keyed_edu = {}  # (destination, key) -> EDU
        self.keyed_edu_changed = sorteddict(
        )  # stream position -> (destination, key)

        self.edus = sorteddict()  # stream position -> Edu

        self.failures = sorteddict(
        )  # stream position -> (destination, Failure)

        self.device_messages = sorteddict()  # stream position -> destination

        self.pos = 1
        self.pos_time = sorteddict()

        # EVERYTHING IS SAD. In particular, python only makes new scopes when
        # we make a new function, so we need to make a new function so the inner
        # lambda binds to the queue rather than to the name of the queue which
        # changes. ARGH.
        def register(name, queue):
            LaterGauge(
                "synapse_federation_send_queue_%s_size" % (queue_name, ), "",
                [], lambda: len(queue))

        for queue_name in [
                "presence_map",
                "presence_changed",
                "keyed_edu",
                "keyed_edu_changed",
                "edus",
                "failures",
                "device_messages",
                "pos_time",
        ]:
            register(queue_name, getattr(self, queue_name))

        self.clock.looping_call(self._clear_queue, 30 * 1000)
Ejemplo n.º 58
0
    def __init__(self, eps=1e-6):

        # job that should have finished, but didn't
        # (because of estimation errors)
        self.late = set()

        # [remaining, jobid] heap for the SRPT scheduler
        self.queue = sortedlist()

        # last time we run the update function
        self.last_t = 0

        # Jobs that have less than eps work to do are considered done
        # (deals with floating point imprecision)
        self.eps = eps

        # {jobid: att} where att is jobid's attained service
        self.attained = {}

        # queue for late jobs, sorted by attained service
        self.late_queue = sorteddict()

        # last result of calling the schedule function
        self.scheduled = {}
Ejemplo n.º 59
0
def run_tests(names, num_items, num_its):
    # Tmp Ami - make key-type separate labels below
    fns = dict([
        ('btrees', lambda es: BTrees.OOBTree.OOBTree([(e, 1) for e in es])),
        ('blist', lambda es: blist.sorteddict([(e, 1) for e in es])),
        ('bintrees', lambda es: bintrees.FastRBTree([(e, 1) for e in es])),
        ('set', lambda es: set([(e, 1) for e in es])),
        ('banyan_red_black_tree', lambda es: banyan.SortedDict(
            [(e, 1) for e in es], alg=banyan.RED_BLACK_TREE)),
        ('banyan_splay_tree', lambda es: banyan.SortedDict(
            [(e, 1) for e in es], alg=banyan.SPLAY_TREE)),
        ('banyan_sorted_list', lambda es: banyan.SortedDict(
            [(e, 1) for e in es], alg=banyan.SORTED_LIST)),
        ('banyan_red_black_tree_gen', lambda es: banyan.SortedDict(
            [(e, 1) for e in es], key_type=int, alg=banyan.RED_BLACK_TREE)),
        ('banyan_splay_tree_gen', lambda es: banyan.SortedDic(
            [(e, 1) for e in es], key_type=int, alg=banyan.SPLAY_TREE)),
        ('banyan_sorted_list_gen', lambda es: banyan.SortedDict(
            [(e, 1) for e in es], key_type=int, alg=banyan.SORTED_LIST))
    ])
    t = dict([])
    for name in names:
        t[name] = _run_test(fns[name], int, num_items, num_its)
    return t
Ejemplo n.º 60
0
    def __init__(self, eps=1e-6):

        # job that should have finished, but didn't
        # (because of estimation errors)
        self.late = set()

        # [remaining, jobid] heap for the SRPT scheduler
        self.queue = sortedlist()

        # last time we run the update function
        self.last_t = 0

        # Jobs that have less than eps work to do are considered done
        # (deals with floating point imprecision)
        self.eps = eps

        # {jobid: att} where att is jobid's attained service
        self.attained = {}

        # queue for late jobs, sorted by attained service
        self.late_queue = sorteddict()

        # last result of calling the schedule function
        self.scheduled = {}