def __init__(self, my_book): # Save reference to the book self.my_book = my_book # Save reference to the metamanager self.metamgr = my_book.get_meta_manager() # Save reference to the edited document self.document = my_book.get_edit_model() # Save reference to a speller, which will be the default # at this point. self.speller = my_book.get_speller() # The vocabulary list as a blist sorted dict. self.vocab = blist.sorteddict() # Key and Values views on the vocab list for indexing by table row. self.vocab_kview = self.vocab.keys() self.vocab_vview = self.vocab.values() # The good- and bad-words sets and the scannos set. self.good_words = set() self.bad_words = set() self.scannos = set() # A dict of words that use an alt-dict tag. The key is a word and the # value is the alt-dict tag string. self.alt_tags = blist.sorteddict() # Register metadata readers and writers. self.metamgr.register(C.MD_GW, self.good_read, self.good_save) self.metamgr.register(C.MD_BW, self.bad_read, self.bad_save) self.metamgr.register(C.MD_SC, self.scanno_read, self.scanno_save) self.metamgr.register(C.MD_VL, self.word_read, self.word_save)
def __init__(self): """ Initializes trader by order books for the asset from different markets """ MultiAssetStrategy.__init__(self) # order queues ordered by their best asks and bids # something like std::map<Ticks, OrderQueue>[2] self._bests = [sorteddict(), sorteddict()] self._oldBests = {}
def __init__(self): """ Initializes trader by order books for the asset from different markets """ MultiAssetStrategy.__init__(self) from blist import sorteddict # order queues ordered by their best asks and bids # something like std::map<Ticks, OrderQueue>[2] self._bests = [sorteddict(), sorteddict()] self._oldBests = {}
def __init__(self): self.index = sorteddict() self.tag = [] self.pgid = None self.title = None self.title_words = sorteddict() self.text = '' self.page_map = sortedlist() self.Infobox = sorteddict() self.Links = sorteddict() self.stopwords = open('stopwords.txt','r').readlines()[0].strip().split(',') xml.sax.ContentHandler.__init__(self)
def __init__(self, db_filename, fields_to_index=[], columns=[], cluster=[]): self.data = sorteddict() self.keys = self.data.keys() self.values = self.data.values() self.indexes = {field: sorteddict() for field in fields_to_index} self.columns = {field: sorteddict() for field in columns} self.lock = TimeoutLock(0.5) self._setup_cluster(cluster) self._setup_persistence(db_filename)
def solution(counts): agg = blist.sorteddict(lambda x: -x) for elem in counts: agg[elem] = agg.get(elem, 0) + 1 best = 10000 if next(agg.iterkeys()) >= 9: cop = blist.sorteddict(agg) num, count = cop.popitem() cop[num // 3] = cop.get(num // 3, 0) + 3 * count best = count * 2 + solve(cop) return min(solve(agg), best)
def run_tests(names, num_items, num_its, type_=int): fns = dict([ ('btrees', lambda: BTrees.OOBTree.OOBTree()), ('blist', lambda: blist.sorteddict()), ('bintrees', lambda: bintrees.FastRBTree()), ('dict', lambda: dict()), ('banyan_red_black_tree', lambda: banyan.SortedDict(key_type=type_, alg=banyan.RED_BLACK_TREE)), ('banyan_red_black_tree_rank_updator', lambda: banyan.SortedDict(key_type=type_, alg=banyan.RED_BLACK_TREE, updator=banyan.RankUpdator)), ('banyan_red_black_tree_min_max_updator', lambda: banyan.SortedDict(key_type=type_, alg=banyan.RED_BLACK_TREE, updator=banyan.MinMaxUpdator)), ('banyan_splay_tree', lambda: banyan.SortedDict(key_type=type_, alg=banyan.SPLAY_TREE)), ('banyan_sorted_list', lambda: banyan.SortedDict(key_type=type_, alg=banyan.SORTED_LIST)), ('banyan_red_black_tree_gen', lambda: banyan.SortedDict(alg=banyan.RED_BLACK_TREE)), ('banyan_splay_tree_gen', lambda: banyan.SortedDict(alg=banyan.SPLAY_TREE)), ('banyan_sorted_list_gen', lambda: banyan.SortedDict(alg=banyan.SORTED_LIST)) ]) t = dict([]) for name in names: t[name] = _run_test(fns[name], type_, num_items, num_its) return t
def loadPatched(self): with open(self.patched_mgf, "r") as file_to_read: for line in file_to_read: if "BEGIN IONS" in line: mD = mgfData() mz_int_additional_data = blist.sorteddict() elif "END IONS" in line: mD.setMZ(mz_int_additional_data) self.patched_Data[mD.title] = mD else: items = line.strip().split('=') if items[0] == "TITLE": mD.setTitle(items[1]) elif items[0] == "PEPMASS": mD.setPepmass(items[1]) elif items[0] == "CHARGE": mD.setCharge(items[1]) else: # peaks mz_int_additional = items[0].split() if len(mz_int_additional) == 0: # empty line continue else: mz_int_additional_data[float( mz_int_additional[0])] = [ mz_int_additional[1] ]
def __init__(self, size_of_cache=10000): self._size_of_cache = size_of_cache self._room_to_key = {} self._cache = sorteddict() self._earliest_key = None self.name = "ReceiptsRoomChangeCache" caches_by_name[self.name] = self._cache
def setInSlot(self, slot, key, value): shape = self.inputs["shape"].value eraseLabel = self.inputs["eraser"].value neutralElement = 0 self.lock.acquire() #fix slicing of single dimensions: start, stop = sliceToRoi(key, shape, extendSingleton = False) start = start.floor() stop = stop.floor() tempKey = roiToSlice(start-start, stop-start, hardBind = True) stop += numpy.where(stop-start == 0,1,0) key = roiToSlice(start,stop) updateShape = tuple(stop-start) update = self._denseArray[key].copy() update[tempKey] = value startRavel = numpy.ravel_multi_index(numpy.array(start, numpy.int32),shape) #insert values into dict updateNZ = numpy.nonzero(numpy.where(update != neutralElement,1,0)) updateNZRavelSmall = numpy.ravel_multi_index(updateNZ, updateShape) if isinstance(value, numpy.ndarray): valuesNZ = value.ravel()[updateNZRavelSmall] else: valuesNZ = value updateNZRavel = numpy.ravel_multi_index(updateNZ, shape) updateNZRavel += startRavel self._denseArray.ravel()[updateNZRavel] = valuesNZ valuesNZ = self._denseArray.ravel()[updateNZRavel] self._denseArray.ravel()[updateNZRavel] = valuesNZ td = blist.sorteddict(zip(updateNZRavel.tolist(),valuesNZ.tolist())) self._sparseNZ.update(td) #remove values to be deleted updateNZ = numpy.nonzero(numpy.where(update == eraseLabel,1,0)) if len(updateNZ)>0: updateNZRavel = numpy.ravel_multi_index(updateNZ, shape) updateNZRavel += startRavel self._denseArray.ravel()[updateNZRavel] = neutralElement for index in updateNZRavel: self._sparseNZ.pop(index) self.lock.release() self.outputs["Output"].setDirty(key)
def __init__(self, descriptor=None, filters=None, sort=None, encoding=None, work_dir=None, *args, **kwargs): super(CodecArticleIndex, self).__init__(descriptor=descriptor, work_dir=work_dir, *args, **kwargs) # Our Meta Content self.decoded = NNTPMetaContent(work_dir=self.work_dir) # Switch our content subvalue to be a sorteddict() self.decoded.content = sorteddict() # Filters self.filters = filters # Sort Order self.sort = sort if self.sort is None or self.sort not in XOVER_GROUPINGS: self.sort = XoverGrouping.BY_POSTER_TIME # The character set encoding usenet content is retrieved in if encoding is None: self.encoding = NNTP_DEFAULT_ENCODING else: self.encoding = encoding
def run_tests(names, num_items, num_its, type_ = int): fns = dict([ ('btrees', lambda: BTrees.OOBTree.OOBTree()), ('blist', lambda: blist.sorteddict()), ('bintrees', lambda: bintrees.FastRBTree()), ('dict', lambda: dict()), ('banyan_red_black_tree', lambda: banyan.SortedDict(key_type = type_, alg = banyan.RED_BLACK_TREE)), ('banyan_red_black_tree_rank_updator', lambda: banyan.SortedDict(key_type = type_, alg = banyan.RED_BLACK_TREE, updator = banyan.RankUpdator)), ('banyan_red_black_tree_min_max_updator', lambda: banyan.SortedDict(key_type = type_, alg = banyan.RED_BLACK_TREE, updator = banyan.MinMaxUpdator)), ('banyan_splay_tree', lambda: banyan.SortedDict(key_type = type_, alg = banyan.SPLAY_TREE)), ('banyan_sorted_list', lambda: banyan.SortedDict(key_type = type_, alg = banyan.SORTED_LIST)), ('banyan_red_black_tree_gen', lambda: banyan.SortedDict(alg = banyan.RED_BLACK_TREE)), ('banyan_splay_tree_gen', lambda: banyan.SortedDict(alg = banyan.SPLAY_TREE)), ('banyan_sorted_list_gen', lambda: banyan.SortedDict(alg = banyan.SORTED_LIST))]) t = dict([]) for name in names: t[name] = _run_test(fns[name], type_, num_items, num_its) return t
def __init__(self, eps=1e-6): # [remaining, jobid] queue for the *virtual* scheduler self.queue = blist() # Jobs that should have finished in the virtual time, # but didn't in the real (happens only in case of estimation # errors) self.late = set() # last time we run the schedule function self.last_t = 0 # Jobs that are running in the real time self.running = set() # Jobs that have less than eps work to do are considered done # (deals with floating point imprecision) self.eps = eps # queue for late jobs, sorted by attained service self.late_queue = sorteddict() # {jobid: att} where att is jobid's attained service self.attained = {} # last result of calling the schedule function self.scheduled = {}
def __init__(self, time_window): """ :param time_window: time in milliseconds """ self.time_window = time_window self.ts_to_values = sorteddict() self.values = sortedlist()
def load_apls(self): for f in self.apl_files: file = self.apl_folder + f print(file) with open(file, "r") as file_to_read: for line in file_to_read: line = line.strip() if "peaklist start" in line: mD = mgfData() mz_int_additional_data = blist.sorteddict() elif "peaklist end" in line: mD.setMZ(mz_int_additional_data) self.apl_data[mD.title + "__tobi__" + mD.charge] = mD else: items = line.strip().split('=') if items[0] == 'mz': mD.setPepmass(items[1]) elif items[0] == "header": mD.setTitle(items[1]) elif items[0] == "charge": mD.setCharge(items[1]) elif items[0] == "fragmentation": continue else: # peaks mz_int_additional = items[0].split("\t") if len(mz_int_additional) == 1: # empty line continue else: mz_int_additional_data[float( mz_int_additional[0])] = [ mz_int_additional[1] ] print(len(self.apl_data))
def __init__(self): super(SortedList, self).__init__() self.nodes = blist.sorteddict() self.datas = blist.sortedlist() self._max_id = 0 self._except_max_id = 0 return None
def compare2original(self): firstLine = True with open(self.original_mgf, "r") as file_to_read: for line in file_to_read: if firstLine: firstLine = False self.firstLine = line continue if "BEGIN IONS" in line: mD = mgfData() mz_int_additional_data = blist.sorteddict() elif "END IONS" in line: mD.setMZ(mz_int_additional_data) for spectrum_keys in self.patched_Data[mD.title].mz.keys(): self.patched_Data[ mD.title].mz[spectrum_keys] = mD.mz[spectrum_keys] else: items = line.strip().split('=') if items[0] == "TITLE": mD.setTitle(items[1]) elif items[0] == "PEPMASS": mD.setPepmass(items[1]) elif items[0] == "CHARGE": mD.setCharge(items[1]) else: # peaks mz_int_additional = items[0].split() if len(mz_int_additional) == 0: # empty line continue else: mz_int_additional_data[float( mz_int_additional[0])] = [ mz_int_additional[1], mz_int_additional[2] ]
def __init__(self): """ Initialize your data structure here. """ # all ints map to the interval left self.left = {} # current intervals left to current intervals, pop and add self.intervals = sorteddict()
def term_frequency_query(terms): term_freq_dict = sorteddict({}) for term in terms: try: term_freq_dict[term] += 1 except KeyError: term_freq_dict[term] = 1 return term_freq_dict
def set(self, key, col, val): """Sets the value at the given key/column. In the average case, requires O(log(c)**2) operations, where c is the number of columns associated with the key.""" assert all(isinstance(datum, basestring) for datum in (key, col, val)) self.kcv.setdefault(key, sorteddict())[col] = val self._persist()
def params_from_postdata(data): p = sorteddict() for i in data.split('&'): k, v = i.strip().split('=') if k == 'oauth_signature': oauth_signature = urllib.unquote(v) continue p[percent_encode(k)] = percent_encode(urllib.unquote_plus(v)) return p, oauth_signature
def params_from_authorization(auth): p = sorteddict() for i in auth.split(',')[1:]: k,v = i.strip().split('=') if k == 'oauth_signature': oauth_signature = urllib.unquote(v[1:-1]) continue p[percent_encode(k)] = percent_encode(urllib.unquote(v[1:-1])) return p, oauth_signature
def params_from_postdata(data): p = sorteddict() for i in data.split('&'): k,v = i.strip().split('=') if k == 'oauth_signature': oauth_signature = urllib.unquote(v) continue p[percent_encode(k)] = percent_encode(urllib.unquote_plus(v)) return p, oauth_signature
def params_from_authorization(auth): p = sorteddict() for i in auth.split(',')[1:]: k, v = i.strip().split('=') if k == 'oauth_signature': oauth_signature = urllib.unquote(v[1:-1]) continue p[percent_encode(k)] = percent_encode(urllib.unquote(v[1:-1])) return p, oauth_signature
def __init__(self, description, rr_replay, source_pane): self.rr_replay = rr_replay self.description = description self.spawn_cmd = "{} replay {}".format(shlex.quote(cli_args.rr), shlex.quote(rr_replay)) self.source_pane = source_pane self.breakpoints = {} self.watches_set = 0 self.instr_to_checkpoint = sorteddict()
def __init__(self, name, current_stream_pos, max_size=10000, prefilled_cache={}): self._max_size = int(max_size * CACHE_SIZE_FACTOR) self._entity_to_key = {} self._cache = sorteddict() self._earliest_known_stream_pos = current_stream_pos self.name = name self.metrics = register_cache(self.name, self._cache) for entity, stream_pos in prefilled_cache.items(): self.entity_has_changed(entity, stream_pos)
def __init__(self, name, current_stream_pos, max_size=10000, prefilled_cache={}): self._max_size = max_size self._entity_to_key = {} self._cache = sorteddict() self._earliest_known_stream_pos = current_stream_pos self.name = name caches_by_name[self.name] = self._cache for entity, stream_pos in prefilled_cache.items(): self.entity_has_changed(entity, stream_pos)
def __init__(self, description, rr_replay, source_pane): self.rr_replay = rr_replay self.description = description self.spawn_cmd = "{} replay {}".format( shlex.quote(cli_args.rr), shlex.quote(rr_replay)) self.source_pane = source_pane self.breakpoints = {} self.watches_set = 0 self.instr_to_checkpoint = sorteddict()
def refresh(self,progress): global re_lang_attr, re_token count = 0 end_count = self.document.blockCount() progress.setMaximum(end_count) progress.setValue(0) # get a reference to the dictionary to use self.speller = self.my_book.get_speller() # clear the alt-dict list. self.alt_tags = blist.sorteddict() # Zero out all counts and property sets that we have so far. We will # develop new properties when each word is first seen. Properties # such as HY will not have changed, but both AD and XX might have # changed while the word text remains the same. for j in range(len(self.vocab)) : self.vocab_vview[j][0] = 0 self.vocab_vview[j][1] = set() # iterate over all lines extracting tokens and processing them. alt_dict = None alt_tag = None for line in self.document.all_lines(): count += 1 if 0 == (count % 20): progress.setValue(count) QCoreApplication.processEvents() j = 0 m = re_token.search(line,0) while m : # while match is not None if m.group(6) : # start-tag; has it lang= ? d = re_lang_attr.search(m.group(8)) if d : alt_dict = d.group(1) alt_tag = m.group(7) elif m.group(9) : if m.group(10) == alt_tag : # end tag of a lang= start tag alt_dict = None alt_tag = None else : self._add_token(m.group(0),alt_dict) j = m.end() m = re_token.search(line,j) # look for zero counts and delete those items. In order not to # confuse the value and keys views, make a list of the actual word # tokens to be deleted, then use del. togo = [] for j in range(len(self.vocab)) : if self.vocab_vview[j][0] == 0 : togo.append(self.vocab_kview[j]) for key in togo: del self.vocab[key] progress.setValue(end_count)
def order(self): order_attr = self._order_attr context = aq_base(self.context) # We store ordering on the context. This could get quite # large, but it will be used often enough that including it on # the object probably makes sense. if not hasattr(context, self._order_attr): setattr(context, order_attr, sorteddict()) # Force initial ordering self.orderObjects() return getattr(context, order_attr)
def reset(self): """ Reset our decoded content """ super(CodecArticleIndex, self).reset() # Our Meta Content self.decoded = NNTPMetaContent(work_dir=self.work_dir) # Switch our decoded subvalue to be a sorteddict() self.decoded.content = sorteddict()
def __init__(self,my_book): super().__init__() # Save access to the book, from which we learn the metadata # manager and the edit data model. self.my_book = my_book # The character list as a dict 'x':count. self.census = sorteddict() # Key and Values views on the census dict for indexed access. self.k_view = None # supplied in char_read() or refresh() self.v_view = None # Register to handle metadata. self.my_book.get_meta_manager().register('CHARCENSUS', self.char_read, self.char_save)
def token_map(self): """ returns a map of token -> {key[1], ...key[n]} :return: """ #TODO: keep track of this during normal storage operation # using a set in case there are token collisions token_map = defaultdict(set) for key in self._data.keys(): token_map[self.partitioner.get_key_token(key)].add(key) token_map = sorteddict(token_map) return token_map
def get_rescore_kaldi_n_best(kaldi_n_best_sentence, kaldi_n_best_acoustics, session, m, fasttext_model, max_word_length, char_vocab, rnn_state): rescore_kaldi_n_best = {} for sentence, acoustics in zip(kaldi_n_best_sentence, kaldi_n_best_acoustics): sentence_wer = get_sentence_wer(sentence, acoustics, session, m, fasttext_model, max_word_length, char_vocab, rnn_state) rescore_kaldi_n_best[sentence] = sentence_wer return sorteddict(rescore_kaldi_n_best).keys()
def setupOutputs(self): if (numpy.array(self._oldShape) != self.inputs["shape"].value).any(): shape = self.inputs["shape"].value self._oldShape = shape self.outputs["Output"].meta.dtype = numpy.uint8 self.outputs["Output"].meta.shape = shape # FIXME: Don't give arbitrary axistags. Specify them correctly if you need them. #self.outputs["Output"].meta.axistags = vigra.defaultAxistags(len(shape)) self.inputs["Input"].meta.shape = shape self.outputs["nonzeroValues"].meta.dtype = object self.outputs["nonzeroValues"].meta.shape = (1, ) self.outputs["nonzeroCoordinates"].meta.dtype = object self.outputs["nonzeroCoordinates"].meta.shape = (1, ) self._denseArray = numpy.zeros(shape, numpy.uint8) self._sparseNZ = blist.sorteddict() if self.inputs["deleteLabel"].ready( ) and self.inputs["deleteLabel"].value != -1: labelNr = self.inputs["deleteLabel"].value neutralElement = 0 self.inputs["deleteLabel"].setValue(-1) #reset state of inputslot self.lock.acquire() # Find the entries to remove updateNZ = numpy.nonzero( numpy.where(self._denseArray == labelNr, 1, 0)) if len(updateNZ) > 0: # Convert to 1-D indexes for the raveled version updateNZRavel = numpy.ravel_multi_index( updateNZ, self._denseArray.shape) # Zero out the entries we don't want any more self._denseArray.ravel()[updateNZRavel] = neutralElement # Remove the zeros from the sparse list for index in updateNZRavel: self._sparseNZ.pop(index) # Labels are continuous values: Shift all higher label values down by 1. self._denseArray[:] = numpy.where(self._denseArray > labelNr, self._denseArray - 1, self._denseArray) self._maxLabel = self._denseArray.max() self.lock.release() self.outputs["nonzeroValues"].setDirty(slice(None)) self.outputs["nonzeroCoordinates"].setDirty(slice(None)) self.outputs["Output"].setDirty(slice(None)) self.outputs["maxLabel"].setValue(self._maxLabel)
def writeBinaryFileIndex(self,index_file): with open(self.indexFolder+index_file, 'wb') as file_index: with open(self.termFolder+index_file+'.txt','w') as file_term: #Trier l'indexe par terme inverse_index = sorteddict(self.index) self.index.clear() pos_term = 0 #Lire l'indexe par terme for elem in inverse_index.items(): term = elem[0] nb_doc = 0 #Trier la postingslist par docID docs_list = sorteddict(elem[1]) #Lire la postingslist par docID for doc in docs_list.items(): nb_doc += 1 doc_id = doc[0] nb_occur = doc[1] #Écrire en binaire le docID et le nombre d'occurences file_index.write(struct.pack('ii',doc_id,nb_occur)) #Écire une ligne correspondante dans le fichier de vocabulaire avec le terme (vocabulaire), le nombre de doc contenant ce terme et la position débute de sa postingslist dans le fichier binaire file_term.write(term+"|"+str(nb_doc)+"|"+str(pos_term)+"\n") pos_term += nb_doc #La position débute du terme suivant
def order(self): order_attr = self._order_attr context = aq_base(self.context) # We store ordering on the context. This could get quite # large, but it will be used often enough that including it on # the object probably makes sense. order_obj = getattr(context, order_attr, None) if order_obj is None or getattr(order_obj, 'viewkeys', None) is None: setattr(context, order_attr, sorteddict()) # Force initial ordering self.orderObjects() # Override CSRF protection if we can request = getattr(self.context, 'REQUEST', None) if request is not None: alsoProvides(request, IDisableCSRFProtection) return getattr(context, order_attr)
def setupOutputs(self): if (self._oldShape != self.inputs["shape"].value).any(): shape = self.inputs["shape"].value self._oldShape = shape self.outputs["Output"].meta.dtype = numpy.uint8 self.outputs["Output"].meta.shape = shape # FIXME: Don't give arbitrary axistags. Specify them correctly if you need them. #self.outputs["Output"].meta.axistags = vigra.defaultAxistags(len(shape)) self.inputs["Input"].meta.shape = shape self.outputs["nonzeroValues"].meta.dtype = object self.outputs["nonzeroValues"].meta.shape = (1,) self.outputs["nonzeroCoordinates"].meta.dtype = object self.outputs["nonzeroCoordinates"].meta.shape = (1,) self._denseArray = numpy.zeros(shape, numpy.uint8) self._sparseNZ = blist.sorteddict() if self.inputs["deleteLabel"].ready() and self.inputs["deleteLabel"].value != -1: labelNr = self.inputs["deleteLabel"].value neutralElement = 0 self.inputs["deleteLabel"].setValue(-1) #reset state of inputslot self.lock.acquire() # Find the entries to remove updateNZ = numpy.nonzero(numpy.where(self._denseArray == labelNr,1,0)) if len(updateNZ)>0: # Convert to 1-D indexes for the raveled version updateNZRavel = numpy.ravel_multi_index(updateNZ, self._denseArray.shape) # Zero out the entries we don't want any more self._denseArray.ravel()[updateNZRavel] = neutralElement # Remove the zeros from the sparse list for index in updateNZRavel: self._sparseNZ.pop(index) # Labels are continuous values: Shift all higher label values down by 1. self._denseArray[:] = numpy.where(self._denseArray > labelNr, self._denseArray - 1, self._denseArray) self._maxLabel = self._denseArray.max() self.lock.release() self.outputs["nonzeroValues"].setDirty(slice(None)) self.outputs["nonzeroCoordinates"].setDirty(slice(None)) self.outputs["Output"].setDirty(slice(None)) self.outputs["maxLabel"].setValue(self._maxLabel)
def endElement(self, name): global titlePageMapper global pageNumber if name=="title": wikipediaHandler.title=self.bufferObject wikipediaHandler.titleWords=tokenizeTitle(wikipediaHandler.title, str(pageNumber)) if name=="text": wikipediaHandler.text=self.bufferObject #wikipediaHandler.textWords=tokenizeText(wikipediaHandler.text,str(pageNumber)) if name=="page": if pageNumber%7000==0: for key in titlePageMapper: title_page_map_fp.write(str(key) + " " + titlePageMapper[key].strip().encode('utf-8') + "\n") titlePageMapper = blist.sorteddict({}) self.bufferObject=""
def findsol(mol, target, steps, maxlen, replacements): branches = [(mol, steps)] # print(branches) index = blist.sorteddict() i = 0 solution = -1 while branches: i += 1 # print(branch) if i % 100000 == 0: print(len(index), len(branches), end='\r') branch = branches.pop() steps = branch[1] + 1 # if i == 500000: # for key, val in index.items(): # print(key, val) # If no other branch reached here first if branch[0] not in index or (index[branch[0]] > branch[1]): index[branch[0]] = branch[1] for rep in replacements: done = False off = 0 while not done: done = True pos = branch[0].find(rep[1], off) # print(rep) # If found if pos != -1: done = False newmol = branch[0][:pos] + rep[0] +\ branch[0][pos+len(rep[1]):] if newmol == target: # print('Target reached in', steps, 'steps!') if solution == -1 or solution > steps: solution = steps print('Solution:', solution) sys.exit(0) else: if rep[0] != 'e': branches.append((newmol, steps)) off = pos + len(rep[1]) # print(branches) # input() return solution
def indexText(words, pageNumber): word_length = len(words) if word_length: term = round((1/float(word_length)), 4) global title_size, indexTitle for word in words: if word not in indexTitle: indexTitle[word]={} indexTitle[word][pageNumber]=term elif pageNumber in indexTitle[word]: indexTitle[word][pageNumber]+=term else: indexTitle[word][pageNumber]=term title_size = title_size + sys.getsizeof(pageNumber) + sys.getsizeof(indexTitle[word][pageNumber]) if title_size>=file_size_limit: writeOutput(indexTitle, 'title') indexTitle = blist.sorteddict({}) title_size=0
def __init__(self, eps=1e-6): # job attained service is represented as (real attained service // eps) # (not perfectly precise but avoids problems with floats) self.eps = eps # sorted dictionary for {attained: {jobid}} self.queue = sorteddict() # {jobid: attained} dictionary self.attained = {} # result of the last time the schedule() method was called # grouped by {attained: [service, {jobid}]} self.scheduled = {} # This is the entry point for doing XXX + LAS schedulers: # it's sufficient to touch here # last time when the schedule was changed self.last_t = 0
def indexText(words, pageNumber): word_length = len(words) if word_length: term = round((1 / float(word_length)), 4) global title_size, indexTitle for word in words: if word not in indexTitle: indexTitle[word] = {} indexTitle[word][pageNumber] = term elif pageNumber in indexTitle[word]: indexTitle[word][pageNumber] += term else: indexTitle[word][pageNumber] = term title_size = title_size + sys.getsizeof( pageNumber) + sys.getsizeof(indexTitle[word][pageNumber]) if title_size >= file_size_limit: writeOutput(indexTitle, 'title') indexTitle = blist.sorteddict({}) title_size = 0
def run_tests(names, num_items, num_its): # Tmp Ami - make key-type separate labels below fns = dict([ ('btrees', lambda es: BTrees.OOBTree.OOBTree([(e, 1) for e in es])), ('blist', lambda es: blist.sorteddict([(e, 1) for e in es])), ('bintrees', lambda es: bintrees.FastRBTree([(e, 1) for e in es])), ('set', lambda es: set([(e, 1) for e in es])), ('banyan_red_black_tree', lambda es: banyan.SortedDict([(e, 1) for e in es], alg = banyan.RED_BLACK_TREE)), ('banyan_splay_tree', lambda es: banyan.SortedDict([(e, 1) for e in es], alg = banyan.SPLAY_TREE)), ('banyan_sorted_list', lambda es: banyan.SortedDict([(e, 1) for e in es], alg = banyan.SORTED_LIST)), ('banyan_red_black_tree_gen', lambda es: banyan.SortedDict([(e, 1) for e in es], key_type = int, alg = banyan.RED_BLACK_TREE)), ('banyan_splay_tree_gen', lambda es: banyan.SortedDic([(e, 1) for e in es], key_type = int, alg = banyan.SPLAY_TREE)), ('banyan_sorted_list_gen', lambda es: banyan.SortedDict([(e, 1) for e in es], key_type = int, alg = banyan.SORTED_LIST))]) t = dict([]) for name in names: t[name] = _run_test(fns[name], int, num_items, num_its) return t
def output_to_file(self, out_filename, tags): print "=== Making file %s" % out_filename clauses = ["%s ='%s'" % (tag, value) for (tag, value) in tags.iteritems()] query = 'select value from /^cpu.trace.*/ where %s' % " and ".join(clauses) if self.start: query += " and time>=\'{}\'".format(self.start) if self.end: query += " and time<=\'{}\'".format(self.end) print "running query: %s" % query metrics = self.client.query(query) try: series = metrics.raw['series'] except KeyError: print "got an empty recordset" return print "putting metrics into a sorted dictionary..." traces = sorteddict() for metric in series: if re.match(r'cpu\.trace\.\d+', metric['name']): continue name = self._format_metric_name(metric['name'], 'cpu.trace.') value = sum([v[1] for v in metric['values']]) if name in traces: traces[name] = traces[name] + value else: traces[name] = value print "output this dictionary to the file..." with open(out_filename, "w") as f: for t in traces: found = False for filter_string in self.filter_exclude: if filter_string in t: found = True break if not found: v = traces[t] if t != v: # this is Andrew's cpu.trace.23 = 23 measures; I don't know what are they for f.write('%s %d\n' % (t, v)) print "output finished."
def endElement(self, name): global titlePageMapper global pageNumber if name == "title": wikipediaHandler.title = self.bufferObject wikipediaHandler.titleWords = tokenizeTitle( wikipediaHandler.title, str(pageNumber)) if name == "text": wikipediaHandler.text = self.bufferObject #wikipediaHandler.textWords=tokenizeText(wikipediaHandler.text,str(pageNumber)) if name == "page": if pageNumber % 7000 == 0: for key in titlePageMapper: title_page_map_fp.write( str(key) + " " + titlePageMapper[key].strip().encode('utf-8') + "\n") titlePageMapper = blist.sorteddict({}) self.bufferObject = ""
def __init__(self, *args, **kargs): """Construct the typeHintedDict. Args: *args, **kargs: Pass any parameters through to the dict() constructor. Calls the dict() constructor, then runs through the keys of the created dictionary and either uses the string type embedded in the keyname to generate the type hint (and remove the embedded string type from the keyname) or determines the likely type hint from the value of the dict element. """ self._typehints = sorteddict() super(typeHintedDict, self).__init__(*args, **kargs) for key in list(self.keys()): # Chekc through all the keys and see if they contain # type hints. If they do, move them to the # _typehint dict value = super(typeHintedDict, self).__getitem__(key) super(typeHintedDict, self).__delitem__(key) self[key] = value # __Setitem__ has the logic to handle embedded type hints correctly
def output_to_file(self, out_filename, tags): print "=== Making file %s" % out_filename clauses = [ "%s ='%s'" % (tag, value) for (tag, value) in tags.iteritems() ] + self.extra_clauses query = 'select value from /^cpu.trace.*/ where %s' % " and ".join( clauses) print "running query: %s" % query metrics = self.client.query(query) try: series = metrics.raw['series'] except KeyError: print "got an empty recordset" return print "putting metrics into a sorted dictionary..." traces = sorteddict() for metric in series: if re.match(r'cpu\.trace\.\d+', metric['name']): continue name = self._format_metric_name(metric['name'], 'cpu.trace.') value = sum([v[1] for v in metric['values']]) if name in traces: traces[name] = traces[name] + value else: traces[name] = value print "output this dictionary to the file..." with open(out_filename, "w") as f: for t in traces: found = False for filter_string in self.filter_exclude: if filter_string in t: found = True break if not found: v = traces[t] if t != v: # this is Andrew's cpu.trace.23 = 23 measures; I don't know what are they for f.write('%s %d\n' % (t, v)) print "output finished."
def notifyConnectAll(self): if (self._oldShape != self.inputs["shape"].value).all(): shape = self.inputs["shape"].value self._oldShape = shape self.outputs["Output"]._dtype = numpy.uint8 self.outputs["Output"]._shape = shape self.outputs["Output"]._axistags = vigra.defaultAxistags(len(shape)) self.inputs["Input"].meta.shape = shape self.outputs["nonzeroValues"]._dtype = object self.outputs["nonzeroValues"]._shape = (1,) self.outputs["nonzeroValues"]._axistags = vigra.defaultAxistags(1) self.outputs["nonzeroCoordinates"]._dtype = object self.outputs["nonzeroCoordinates"]._shape = (1,) self.outputs["nonzeroCoordinates"]._axistags = vigra.defaultAxistags(1) self._denseArray = numpy.zeros(shape, numpy.uint8) self._sparseNZ = blist.sorteddict() if self.inputs["deleteLabel"].connected() and self.inputs["deleteLabel"].value != -1: labelNr = self.inputs["deleteLabel"].value neutralElement = 0 self.inputs["deleteLabel"].setValue(-1) #reset state of inputslot self.lock.acquire() #remove values to be deleted updateNZ = numpy.nonzero(numpy.where(self._denseArray == labelNr,1,0)) if len(updateNZ)>0: updateNZRavel = numpy.ravel_multi_index(updateNZ, self._denseArray.shape) self._denseArray.ravel()[updateNZRavel] = neutralElement for index in updateNZRavel: self._sparseNZ.pop(index) self._denseArray[:] = numpy.where(self._denseArray > labelNr, self._denseArray - 1, self._denseArray) self.lock.release() self.outputs["nonzeroValues"][0] = numpy.array(self._sparseNZ.values()) self.outputs["nonzeroCoordinates"][0] = numpy.array(self._sparseNZ.keys()) self.outputs["Output"][:] = self._denseArray #set output dirty
def __init__(self, memory_limit=1000000, filter_tags=False, remove_stopwords=False, case_sensitive=False, with_stemming=False): self.memory_limit = memory_limit # Max size of self.inv_index in bytes, if reached it we use merge-based methode self.filter_tags = filter_tags self.inv_index = {} # Contains the whole index in in-memory mode self.remove_stopwords = remove_stopwords self.case_sensitive = case_sensitive self.with_stemming = with_stemming self.indexed = False self._doc_id_list = [] # Contains the ids of the documents indexed self._partial_files_names = [ ] # Contains the filenames of the partial index in merge-based mode self.dict_terms_offset = dict( ) # Keep the line where each term is in InvertedFile self.offset = 1 self.dict_term_pl = sorteddict() # Used in merging
def __init__(self, hs): self.server_name = hs.hostname self.clock = hs.get_clock() self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id self.presence_map = { } # Pending presence map user_id -> UserPresenceState self.presence_changed = sorteddict() # Stream position -> user_id self.keyed_edu = {} # (destination, key) -> EDU self.keyed_edu_changed = sorteddict( ) # stream position -> (destination, key) self.edus = sorteddict() # stream position -> Edu self.failures = sorteddict( ) # stream position -> (destination, Failure) self.device_messages = sorteddict() # stream position -> destination self.pos = 1 self.pos_time = sorteddict() # EVERYTHING IS SAD. In particular, python only makes new scopes when # we make a new function, so we need to make a new function so the inner # lambda binds to the queue rather than to the name of the queue which # changes. ARGH. def register(name, queue): LaterGauge( "synapse_federation_send_queue_%s_size" % (queue_name, ), "", [], lambda: len(queue)) for queue_name in [ "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed", "edus", "failures", "device_messages", "pos_time", ]: register(queue_name, getattr(self, queue_name)) self.clock.looping_call(self._clear_queue, 30 * 1000)
def __init__(self, eps=1e-6): # job that should have finished, but didn't # (because of estimation errors) self.late = set() # [remaining, jobid] heap for the SRPT scheduler self.queue = sortedlist() # last time we run the update function self.last_t = 0 # Jobs that have less than eps work to do are considered done # (deals with floating point imprecision) self.eps = eps # {jobid: att} where att is jobid's attained service self.attained = {} # queue for late jobs, sorted by attained service self.late_queue = sorteddict() # last result of calling the schedule function self.scheduled = {}
def run_tests(names, num_items, num_its): # Tmp Ami - make key-type separate labels below fns = dict([ ('btrees', lambda es: BTrees.OOBTree.OOBTree([(e, 1) for e in es])), ('blist', lambda es: blist.sorteddict([(e, 1) for e in es])), ('bintrees', lambda es: bintrees.FastRBTree([(e, 1) for e in es])), ('set', lambda es: set([(e, 1) for e in es])), ('banyan_red_black_tree', lambda es: banyan.SortedDict( [(e, 1) for e in es], alg=banyan.RED_BLACK_TREE)), ('banyan_splay_tree', lambda es: banyan.SortedDict( [(e, 1) for e in es], alg=banyan.SPLAY_TREE)), ('banyan_sorted_list', lambda es: banyan.SortedDict( [(e, 1) for e in es], alg=banyan.SORTED_LIST)), ('banyan_red_black_tree_gen', lambda es: banyan.SortedDict( [(e, 1) for e in es], key_type=int, alg=banyan.RED_BLACK_TREE)), ('banyan_splay_tree_gen', lambda es: banyan.SortedDic( [(e, 1) for e in es], key_type=int, alg=banyan.SPLAY_TREE)), ('banyan_sorted_list_gen', lambda es: banyan.SortedDict( [(e, 1) for e in es], key_type=int, alg=banyan.SORTED_LIST)) ]) t = dict([]) for name in names: t[name] = _run_test(fns[name], int, num_items, num_its) return t