def to_group_by_snp_range(self): '''Return a list of segment-SNP-range-to-list-of-sample-lists. Assumes that group_to_disjoint() has been called on this object. Segments and lists are lexicographically ordered.''' d = SnpRangeDictionary() for segment in self._segments: d.setdefault(segment.snp, sortedlist()).add(sortedlist(segment.samples)) return d
def split_equation_set_v3(eqn_set): """Split an equation set up into smaller solvable equation sets""" # used for tiebreaker of priority key nEq = len(eqn_set.eqns) solve_sets = set() underconstrained_set = EqnSet() # keep track of what has been visited unique_eqn_combos = set() unsolved_eqns = set(eqn_set.eqns) # Initialize priority queue with the equations in the input set pq = sortedlist([EqnSet().add(eqn) for eqn in eqn_set.eqns], key=lambda p: p.key(nEq)) while pq: eqn_set = pq.pop() if eqn_set.is_solvable(): # set this equation set as solved solve_sets.add(eqn_set) eqn_set.set_solved() unsolved_eqns.difference_update(eqn_set.eqns) # discard this equation set from all sets in the pq for p in pq: p.discard(eqn_set) # delete any empty eqn sets and re-sort the pq pq = sortedlist(filter(lambda p: not p.is_empty(), pq), key=lambda p: p.key(nEq)) unique_eqn_combos = set( frozenset(eqs.eqns | eqs.vars) for eqs in pq) else: # add the frontier to the pq for eqs in eqn_set.frontier(): eqn_combo = frozenset(eqs.eqns | eqs.vars) if eqn_combo not in unique_eqn_combos: unique_eqn_combos.add(eqn_combo) pq.add(eqs) # create eqn set(s) of underconstrained systems underconstrained_set = EqnSet() for eqn in unsolved_eqns: underconstrained_set.add(eqn) underconstrained_set.set_solved() return solve_sets, underconstrained_set
def __init__(self, initial_elevation, pond): self.mean_elevation = initial_elevation # contains Pixel objects in cascaded data structure: # lower_fringe[True/False] sets whether or not the pixel is a site # lower_fringe[x][0..8] is a count of the pixels num neighbours self._lower_fringe = [9 * [sortedlist(key=(lambda x: -1 * x.elevation))]] * 2 self._upper_fringe = [9 * [sortedlist(key=(lambda x: x.elevation))]] * 2 # contain (x, y) tuples self._fringe_pixels = {} # contain (x, y) tuples self._bad_fringe = set() self._pond = pond
def __init__(self, config = CACHE_CONFIG_FILE): """ Initializes the cache using a json file. Note, if a different cache config file is preferred, that file must be the first argument when instantiating this object. """ properties = json.load(open(config)) self.maxDuration = properties['cacheDuration'] self.minDuration = properties['cacheMinDuration'] self.maxBytes = properties['cacheSizeBytes'] self.maxElems = properties['cacheSizeElements'] self.numElems = 0 self.numBytes = 0 self.lock = threading.Lock() self.accessList = blist.sortedlist(key=lambda cachedObject: cachedObject.lastAccessTime) # sort by last access time. (lastAcessTime, CacheObject) self.expireList = blist.sortedlist(key=lambda cachedObject: cachedObject.expirationTime) # sort by time remaining. (expire-date, CacheObject) self.map = {} # elements will be of the form, {url: CacheObject}
def __init__(self): self.mean = 0.0 self._old_mean = 0.0 self._sum = 0L self._n = 0 #n items # items greater than the mean self._toplist = sortedlist() # items less than the mean self._bottomlist = sortedlist(key = operator.neg) # Since all items in the "eq list" have the same value (self.mean) we don't need # to maintain an eq list, only a count self._eqlistlen = 0 self._top_deviance = 0 self._bottom_deviance = 0
def __init__(self): super(SortedList, self).__init__() self.nodes = blist.sorteddict() self.datas = blist.sortedlist() self._max_id = 0 self._except_max_id = 0 return None
def __init__(self, data, minPts=20, eps=None): ''' Constructor ''' self.minPts = minPts # Set default value of 'eps' to be large enough to engulf all points; # this method assumes the Euclidean metric: if not isinstance(eps, int): dim_ranges = np.amax(data,0) - np.amin(data,0) self.eps = sum(dim_ranges**2) else: self.eps = eps self.num_points = data.shape[0] self.num_features = data.shape[1] self.dist_mat = pairwise_distances(data) self.visited = np.zeros(self.num_points) self.noise = np.zeros(self.num_points) #what's the point of this? self.cluster_assignment = np.zeros(self.num_points) #cluster==0 --> cluster unassigned self.current_cluster = 1 self.reachability_dist = -np.ones(self.num_points) self.ordered_points = [] #ordered by visitation order #self.seeds = blist([]) #ordered by reachability distance (i.e value) self.seeds = sortedlist([], key=lambda x: x[1]) #ordered by reachability distance (i.e value) self.core_dist = map(self._get_core_distance,range(self.num_points)) #compute core_dist ahead of time for simplicity
def allocate(self, masters, unallocated_netarea): """ assuming all masters are alive, ie call prune before """ if not masters: logging.critical( "Masters must be added, system is overload") return None while( unallocated_netarea ): medium_load = float(sum([ m.load() for m in masters ])) / len( masters ) under_loaded = sortedlist( [ m for m in masters if m.load() <= medium_load ] ) #<= for the first alloc : all load = 0 if not under_loaded : under_loaded = [ m for m in masters if not m.is_overload() ] if not under_loaded : logging.critical( "Masters must be added, system is overload") return None while( unallocated_netarea and under_loaded): m, net = under_loaded.pop( 0 ), unallocated_netarea.pop() self.delta_netareas[0][m].append( net ) m.allocate( net ) if( m.load() < medium_load ): under_loaded.add( m ) if( sum([ int(not master.is_overload()) for master in masters ]) < self.limitFreeMasters): logging.warning( "Masters should be added, system will be overload") self.propagate(masters)
def recommend(self, ratings, people): genres_ratings = [sum(self.coef[i]*r[i] for r in ratings) for i in range(0, len(self.genres))] result = sortedlist([('null', -1)] * 100, key=lambda x: x[1]) lfilms, dfilms = self.get_people_films(people) if len(people)==1: banned = [f.id for f in lfilms] + [f.id for f in dfilms] else: banned = [f.id for f in dfilms] session, films = self.films for f in films: if f.id not in banned: frate = sum([float(j)*genres_ratings[i] for i, j in enumerate(f.genres)]) nb_genres = sum([math.ceil(float(i)) for i in f.genres]) if nb_genres: frate = frate / nb_genres if nb_genres < 3 else frate / 3 if frate > result[0][1]: f.mark = 0 result.pop(0) result.add((f, frate)) session.close() user_classes = self.get_user_classes(people) films = {f[0].title: f[0] for f in result} if len(people) > 1: for f in lfilms: f.mark = 0 films[f.title] = f session, rtgs = self.get_ratings(films.keys()) for rtg in rtgs: films[rtg.title].mark += user_classes[rtg.class_id]*rtg.rate result = [(f.title, f.poster, f.id, f.mark) for f in films.values()] result = sorted(result, key=lambda x: x[3])[-10:] session.close() return list(result)
def ap_frequent_itemsets(transactions, minSupport=0.5): '''Apriori algorithm for frequent itemsets. Returns frequent itemsets in a dict where the key means that value contains list of k-itemsets. Params: transactions - a 0/1 matrix minSupport - minimum support value accepted ''' # First create all 1-itemsets k = 1 frequent_itemsets = {k : sortedlist()} for x in range(transactions.shape[1]): frequent_itemsets[k].append(frozenset( (x,) )) # add 1-itemset {x} calculate_frequencies(frequent_itemsets[k], transactions) prune_infrequent(frequent_itemsets[k], minSupport) # Then loop through the rest for k in range(2,100): candidates = generate_candidates(frequent_itemsets[k-1], k-1) if candidates is None: break calculate_frequencies(candidates, transactions) prune_infrequent(candidates, minSupport) frequent_itemsets[k] = candidates return frequent_itemsets
def __init__(self, files, exclude_others = False, macros = ()): """Extracts features only from those files passed in files, an iterable or a string. macros allows one to pass a list of macros to define.""" extra_args = ['-D'+i for i in macros] if isinstance(files, str) or isinstance(files, unicode): files = [files] self.files = set(files) self.index = clang.cindex.Index.create() self.translation_units = [self.index.parse(i, options = clang.cindex.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD, args = extra_args) for i in self.files] self.cursors = [i.cursor for i in self.translation_units] raw_functions = [func for i in self.cursors for func in extract_functions(i)] raw_macro_tuples = [extract_macros.extract_macros(i) for i in self.cursors] raw_macros = [j for i in raw_macro_tuples for j in i[0]] failed_macros = [j for i in raw_macro_tuples for j in i[1]] #remove any files we aren't interested in. raw_macros = filter(lambda x: x.file in self.files or not exclude_others, raw_macros) raw_functions = filter(lambda x: x.file in self.files or not exclude_others, raw_functions) raw_file_contents = collections.defaultdict(lambda:blist.sortedlist(key = lambda x: x.line)) #create the index of things by which file they're in. for i in itertools.chain(raw_functions, raw_macros): raw_file_contents[i.file].add(i) self.file_contents = raw_file_contents self.macros_dict = dict([(i.name, i) for i in raw_macros]) self.functions_dict = dict([(i.name, i) for i in raw_functions]) self.macros_list = raw_macros self.functions_list = raw_functions self.failed_macros = failed_macros
def get_graph_from_matrix(assoc_mat,density,ignore_weights=True): """ get graph from association matrix """ mat = assoc_mat.vmat l = mat.shape[0] g = Graph(directed=True) g.add_vertices(l) g.vs['name'] = assoc_mat.names max_edges = int(l*(l-1)*density) a,b = mat.nonzero() values = sortedlist() es = [] weights = [] for z in range(len(a)): values.add(mat[a[z],b[z]]) if mat.nnz < max_edges: threshold = min(values) else: threshold = values[len(values)-(max_edges+1)] for z in range(len(a)): w = mat[a[z],b[z]] if w >= threshold: es.append((a[z],b[z])) weights.append(w) g.add_edges(es) if not ignore_weights: g.es['weight'] = weights # this amounts to row-normalize the adjacency matrix GraphUtils.normalize_out_weights(g) return g
def getIterator(self, modelContext, maxCandidate): D = self.getDeltaMatrix(modelContext) if not self.adjustMatrix: self.adjustMatrix = sp.coo_matrix(np.zeros(D.shape)).tolil() D = D + self.adjustMatrix self.adjustMatrix = self.adjustMatrix * 0.9 PD = D[0:modelContext.getMethodCount(), :] PD = PD - np.absolute(PD) PD = PD / 2 PD = PD.astype('int32') (rows, cols) = PD.nonzero() candidateList = blist.sortedlist( [], key=lambda (fromClassIdx, methodIdx, toClassIdx, delta): delta) for i in xrange(len(rows)): val = D[rows[i], cols[i]] fromClassIdx = modelContext.getOwnerClass(rows[i]) candidateList.add((fromClassIdx, rows[i], cols[i], val)) self.adjustMatrix[rows[i], cols[i]] = self.adjustMatrix[rows[i], cols[i]] + 1.31 if maxCandidate > 0 and len(candidateList) > maxCandidate: candidateList.pop(-1) candidateIterator = ARDeltaCandidateIterator(candidateList) return candidateIterator
def test_by_search_sortedblist_algo(self): u1 = uuid_generator.create('user').bytes u2 = uuid_generator.create('user').bytes u3 = uuid_generator.create('user').bytes p1 = uuid_generator.create('post').bytes p2 = uuid_generator.create('post').bytes p3 = uuid_generator.create('post').bytes p4 = uuid_generator.create('post').bytes p5 = uuid_generator.create('post').bytes p6 = uuid_generator.create('post').bytes p7 = uuid_generator.create('post').bytes p8 = uuid_generator.create('post').bytes p9 = uuid_generator.create('post').bytes self.index.append(u1, p1, 0, bytes=True) self.index.append(u2, p2, 0, bytes=True) self.index.append(u3, p3, 0, bytes=True) self.index.append(u1, p4, 1, bytes=True) self.index.append(u2, p5, 2, bytes=True) self.index.append(u3, p6, 3, bytes=True) self.index.append(u3, p7, 6, bytes=True) self.index.append(u2, p8, 5, bytes=True) self.index.append(u2, p9, 8, bytes=True) user_last_post = blist.sortedlist([9, 7, 4]) result = list() while user_last_post: b = self.index.get_block(user_last_post.pop()) result.append(b.id) b.prev_p and user_last_post.add(b.prev_p) self.assertListEqual([9, 8, 7, 6, 5, 4, 3, 2, 1], result)
def process_donations(cont_ip_fp, percentile, donana_op_fp): donors_dict = {} # Hashmap containing donors. # Key is donor name and zip code. # Value is the earliest year the donor contributed to # any campaign. politicians_dict = {} # Hashmap containing donation recipients. # Key is recipient id, zip code and year. # Value is running total number of donation, # running total donation amount, and # sorted binary tree list containing individual # donation amounts. for line in cont_ip_fp: fields = line.split('|') cmte_id = fields[0] name = fields[7] zip_code = fields[10][:5] # Need first five characters transaction_dt = fields[13] try: transaction_amt = float(fields[14]) except ValueError: continue other_id = fields[15] if (not validations.record_is_valid (other_id, transaction_dt, zip_code, name, cmte_id, transaction_amt)): continue year = get_year_from_date(transaction_dt) if ((name, zip_code) in donors_dict): if (donors_dict[(name, zip_code)] >= year): # This donation is out of order chronologically, # so we update the year in the donors hashmap and move on. donors_dict[(name, zip_code)] = year continue else: # This donor is encountered for the first time, so we just add # him in the donors hashmap and move on. donors_dict[(name, zip_code)] = year continue # Now it is guaranteed that the donor is a repeat donor, so we # add the donation details in the politicians hashmap and also # write to the output file. amt_rounded = get_rounded_amt(transaction_amt) if ((cmte_id, zip_code, year) in politicians_dict): if (amt_rounded <> 0): politicians_dict[(cmte_id, zip_code, year)][0] += 1 politicians_dict[(cmte_id, zip_code, year)][1] += amt_rounded politicians_dict[(cmte_id, zip_code, year)][2].add(amt_rounded) else: politicians_dict[(cmte_id, zip_code, year)] = [1, amt_rounded, sortedlist([amt_rounded])] if (not write_to_op_file (donana_op_fp, percentile, cmte_id, zip_code, year, politicians_dict)): return False return True
def __init__(self, intervals=[]): """ @param intervals: a list of tuples, each with a minimum number and a maximum number representing a range of values covered by the set. """ self.intervals = blist.sortedlist() self.dirty = False for min_v, max_v in intervals: self.add(min_v, max_v)
def __init__(self, path, output_path, min_rel_similarity, ppm=10): self.ppm = ppm self.ms = MasterSpectrum() self.path = path self.out = [] self.output_path = output_path self.references = sortedlist(key=lambda i: i.id_1) self.min_rel_similarity = min_rel_similarity
def __init__(self, our_id, minimum_id=None, maximum_id=None): assert our_id is not None self._mut_lock = threading.Lock() self._our_id = our_id self._min = minimum_id is not None and minimum_id or 0 self._max = maximum_id is not None and maximum_id or 2**160 self._children = None self._items = blist.sortedlist(key=lambda item: item.node.node_id)
def sort(self, key): """ Sort data set according the key """ if self.sortedkey == key: return self.sortedindex = blist.sortedlist(range(0, self._size), key=lambda x:self[key][x]) self.sortedkey = key
def __init__(self, score, problem): self.score = score self.ok = False self.star_count = 0 self.ok_time = timedelta() self.ok_submit = None self.result_list = sortedlist() self.problem = problem self.params = self.score.aggregator.problem_params[problem.id]
def arrangements(lines, n=2): """Yield every possible arrangement of lines. Considers merging two lines when the last n characters of a line equal the first n characters of another. Lines are sorted.""" try: lines = sortedlist(lines) for pairs in pairings(*overlapping(lines, n)): copy = sortedlist(lines) for left, right in pairs: copy.remove(left) copy.remove(right) copy.add(left + right[n:]) for solution in arrangements(copy): yield solution except NoOverlap: yield lines return
def init(self): self.checked = 0 self.passed = 0 self.weighted = 0 self.normalized = 0 self._status = '' self.reportlines = sortedlist(key=lambda row: row[0]) self.test_suite_result.status = 'QUE' self.test_suite_result.report = '' self.test_suite_result.save(force_update=True)
def _beforeAfterTest(): l = [1, 4, 4, 6, 10] d = { 4: [1, 6], 1: [None, 4], 0: [None, 1], 8: [6, 10], 20: [10, None], (4, 2): [None, 1, 6, 10], (6, 3): [1, 4, 4, 10, None, None] } for i in [0, 1]: if i == 1: l = blist.sortedlist(l) for obj, rv in d.iteritems(): args = [l] + list(obj) if isinstance(obj, tuple) else [l, obj] test(beforeAfter, args, rv)
def __or__(self, *other): sorted_ends = self.__merged_ends(*other) new_ends = [] for _, end, state in RangeSet.__iterate_state(sorted_ends): if state > 1 and end == _START: continue elif state > 0 and end == _END: continue new_ends.append((_, end)) return RangeSet(blist.sortedlist(new_ends), _RAW_ENDS)
def __invert__(self): if not self.ends: new_ends = ((NEGATIVE_INFINITY, _START), (INFINITY, _END)) return RangeSet(new_ends, _RAW_ENDS) new_ends = blist.sortedlist(self.ends) head, tail = [], [] if new_ends[0][0] == NEGATIVE_INFINITY: new_ends.pop(0) else: head = [(NEGATIVE_INFINITY, _START)] if new_ends[-1][0] == INFINITY: new_ends.pop(-1) else: tail = [(INFINITY, _END)] new_ends = blist.sortedlist((value[0], _NEGATE[value[1]]) for value in new_ends) new_ends.update(head) new_ends.update(tail) return RangeSet(new_ends, _RAW_ENDS)
def __init__(self, start, end): if isinstance(end, _RawEnd): ends = start else: if isinstance(start, _Indeterminate) and isinstance(end, _Indeterminate) and \ start == end: raise LogicError("A range cannot consist of a single end the line.") if start > end: start, end = end, start ends = blist.sortedlist([(start, _START), (end, _END)]) object.__setattr__(self, "ends", ends)
def __and__(self, *other, **kwargs): min_overlap = kwargs.pop('minimum', 2) if kwargs: raise ValueError("kwargs is not empty: {0}".format(kwargs)) sorted_ends = self.__merged_ends(*other) new_ends = [] for _, end, state in RangeSet.__iterate_state(sorted_ends): if state == min_overlap and end == _START: new_ends.append((_, end)) elif state == (min_overlap - 1) and end == _END: new_ends.append((_, end)) return RangeSet(blist.sortedlist(new_ends), _RAW_ENDS)
def __init__(self): self.index = sorteddict() self.tag = [] self.pgid = None self.title = None self.title_words = sorteddict() self.text = '' self.page_map = sortedlist() self.Infobox = sorteddict() self.Links = sorteddict() self.stopwords = open('stopwords.txt','r').readlines()[0].strip().split(',') xml.sax.ContentHandler.__init__(self)
def _get_sorted_filings(cls, ticker, filing_type): '''Step 1 Search for the ticker and filing type, generate the urls for the document pages that have interactive data/XBRL. Step 2 : Get the document pages, on each page find the url for the XBRL document. Return a blist sorted by filing date. ''' filings = blist.sortedlist(key=filing_sort_key_func) document_page_urls = cls._get_document_page_urls(ticker, filing_type) for url in document_page_urls: filing = cls._get_filing_from_document_page(url) filings.add(filing) return filings
def _initializeWorker(**kwargs): print '_initializeWorker called' organism = "drosophila_melanogaster" with open(GENOME_FILE % organism, 'r') as f: text = f.read() _workerState["sTree"] = PySSTree(text, "load", TREE_FILE % organism) with open(BOUNDARY_FILE % organism, "r") as f: _workerState["geneBoundaries"] = sortedlist(pickle.load(f), key=lambda x: x['pos'])
def prog_26(fname): f = open(fname) n = eval(f.readline().strip()) arr = map(int, f.readline().strip().split()) f.close() sarr = sortedlist(arr) with open('result.dat','w') as f: for a in sarr: print a, f.write(str(a)+'\t')
def __init__(self, score, problem): self.score = score self.ok = False self.ok_final = False self.pending = False self.trials = 0 self.late_trials = 0 self.ok_time = timedelta() self.ok_submit = None self.result_list = sortedlist() self.problem = problem self.params = self.score.aggregator.problem_params[problem.id] self.agr_params = self.score.aggregator.params self.ignored = self.agr_params.ignored.split(',')
def __xor__(self, *other): sorted_ends = self.__merged_ends(*other) new_ends = [] old_val = None for _, end, state in RangeSet.__iterate_state(sorted_ends): if state == 2 and end == _START: new_ends.append((_, _NEGATE[end])) elif state == 1 and end == _END: new_ends.append((_, _NEGATE[end])) elif state == 1 and end == _START: new_ends.append((_, end)) elif state == 0 and end == _END: new_ends.append((_, end)) return RangeSet(blist.sortedlist(new_ends), _RAW_ENDS)
def overlapping(lines, n=2): """Find lines that could overlap. Returns two lists: all lines that share a suffix and all lines that use the same string as their prefix. Raises NoOverlap if none found.""" lines = sortedlist(lines) for left in lines: overlap = left[-n:] rights = list(takewhile( lambda right: right.startswith(overlap), lines[lines.bisect_left(overlap):])) if left in rights: rights.remove(left) if rights: lefts = [l for l in lines if l.endswith(overlap)] return (lefts, rights) raise NoOverlap()
def unique_median_print(filename, median_unique_words_tweeted) : # Declare variables #tmp_median_unique_words_tweeted = [] tmp_median_unique_words_tweeted = sortedlist([]) new_median_unique_words_tweeted = [] # For each unique word counts, append to a new list, sort and find the median of updated list for x in median_unique_words_tweeted: #bisect.insort(tmp_median_unique_words_tweeted, x, lo=0, hi=len(tmp_median_unique_words_tweeted)) tmp_median_unique_words_tweeted.add(x) new_median_unique_words_tweeted.append(median_of_list(tmp_median_unique_words_tweeted)) # Export the content of the updated list to an output file outfile = open(filename, 'w') for item in new_median_unique_words_tweeted : outfile.write("{0}\n".format(str(item))) outfile.close()