def test_prod_without_zeros_custom_acc_dtype(self): """ Test ability to provide your own acc_dtype for a ProdWithoutZeros(). """ # We try multiple axis combinations even though axis should not matter. axes = [None, 0, 1, [], [0], [1], [0, 1]] idx = 0 for input_dtype in imap(str, theano.scalar.all_types): x = tensor.matrix(dtype=input_dtype) for acc_dtype in imap(str, theano.scalar.all_types): axis = axes[idx % len(axes)] # If acc_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, acc_dtype) if (acc_dtype == upcasted_dtype or (input_dtype in tensor.discrete_dtypes and acc_dtype in tensor.continuous_dtypes) ): prod_woz_var = ProdWithoutZeros( axis=axis, acc_dtype=acc_dtype)(x) assert prod_woz_var.owner.op.acc_dtype == acc_dtype if (acc_dtype.startswith('complex') and input_dtype != acc_dtype): continue f = theano.function([x], prod_woz_var) data = numpy.random.rand(2, 3) * 3 data = data.astype(input_dtype) f(data) else: self.assertRaises(TypeError, ProdWithoutZeros(axis=axis, acc_dtype=acc_dtype), x) idx += 1
def upre_bdd(self, dst_states_bdd, env_strat=None, get_strat=False, use_trans=False): """ UPRE = EXu.AXc.EL' : T(L,Xu,Xc,L') ^ dst(L') [^St(L,Xu)] """ # take a transition step backwards # TECH NOTE: the restrict_fun=~dst... works ONLY because I will use the # result and take the union with dst_states afterwards... p_bdd = self.substitute_latches_next( dst_states_bdd, restrict_fun=~dst_states_bdd, use_trans=use_trans) # use the given strategy if env_strat is not None: p_bdd &= env_strat # there is an uncontrollable action such that for all contro... temp_bdd = p_bdd.univ_abstract( BDD.make_cube(imap(funcomp(BDD, symbol_lit), self.iterate_controllable_inputs()))) p_bdd = temp_bdd.exist_abstract( BDD.make_cube(imap(funcomp(BDD, symbol_lit), self.iterate_uncontrollable_inputs()))) # prepare the output if get_strat: return temp_bdd else: return p_bdd
def encode_to_server(self, iv, timestamp, return_code, host_name, svc_description, plugin_output): # note that this will pad the strings with 0's instead of random digits. Oh well. toserver = [ self.proto_version, 0, # crc32_value timestamp, return_code, host_name, svc_description, plugin_output, ] # calculate crc32 and insert into the list crc32 = binascii.crc32(struct.pack(self.toserver_fmt, *toserver)) toserver[1] = crc32 # convert to bytes toserver_pkt = struct.pack(self.toserver_fmt, *toserver) # and XOR with the IV toserver_pkt = ''.join([chr(p^i) for p,i in itertools.izip( itertools.imap(ord, toserver_pkt), itertools.imap(ord, itertools.cycle(iv)))]) return toserver_pkt
def multiplex_neighbours(self,vertex_object,layer=None): 'Returns an iterator of vertices in layer, that are multiplex neighbours of vertex_object.' #define helper functions, necessary as using a lambda function would disabkle pickling of objects later ... def ret_multiplex_citation_key(x): return self._multiplex_citation[x].keys() def ret_multiplex_collab_key(x): return self._multiplex_collab[x].keys() if layer==None: print "###################################" print "Specify start_layer of mapping first!" print "USE layer='collab' OR layer='citation'" print "####################################" return if layer=='collab': multiplex_neighbours_TMP=itertools.imap(ret_multiplex_citation_key,self._multiplex_collab[vertex_object].keys()) multiplex_neighbours=itertools.chain.from_iterable(multiplex_neighbours_TMP) return multiplex_neighbours if layer=='citation': multiplex_neighbours_TMP=itertools.imap(ret_multiplex_collab_key,self._multiplex_citation[vertex_object].keys()) multiplex_neighbours=itertools.chain.from_iterable(multiplex_neighbours_TMP) return multiplex_neighbours
def yield_sequences_in_list(paths): """ Yield the discrete sequences within paths. This does not try to determine if the files actually exist on disk, it assumes you already know that. :param paths: a list of paths :rtype: generator """ seqs = {} _check = DISK_RE.match for match in ifilter(None, imap(_check, imap(utils.asString, paths))): dirname, basename, frame, ext = match.groups() if not basename and not ext: continue key = (dirname, basename, ext) seqs.setdefault(key, set()) if frame: seqs[key].add(frame) for (dirname, basename, ext), frames in seqs.iteritems(): # build the FileSequence behind the scenes, rather than dupe work seq = FileSequence.__new__(FileSequence) seq._dir = dirname or '' seq._base = basename or '' seq._ext = ext or '' if frames: seq._frameSet = FrameSet(set(imap(int, frames))) if frames else None seq._pad = FileSequence.getPaddingChars(min(imap(len, frames))) else: seq._frameSet = None seq._pad = '' seq.__init__(str(seq)) yield seq
def test_imap(self): import itertools obj_list = [object(), object(), object()] it = itertools.imap(None, obj_list) for x in obj_list: assert it.next() == (x, ) raises(StopIteration, it.next) it = itertools.imap(None, [1, 2, 3], [4], [5, 6]) assert it.next() == (1, 4, 5) raises(StopIteration, it.next) it = itertools.imap(None, [], [], [1], []) raises(StopIteration, it.next) it = itertools.imap(str, [0, 1, 0, 1]) for x in ['0', '1', '0', '1']: assert it.next() == x raises(StopIteration, it.next) import operator it = itertools.imap(operator.add, [1, 2, 3], [4, 5, 6]) for x in [5, 7, 9]: assert it.next() == x raises(StopIteration, it.next)
def prefix_filter_flowgrams(flowgrams, squeeze=False): """Filters flowgrams by common prefixes. flowgrams: iterable source of flowgrams squeeze: if True, collapse all poly-X to X Returns prefix mapping. """ # collect flowgram sequences if squeeze: seqs = imap( lambda f: (f.Name, squeeze_seq(str(f.toSeq(truncate=True)))), flowgrams) else: seqs = imap(lambda f: (f.Name, str(f.toSeq(truncate=True))), flowgrams) # equivalent but more efficient than #seqs = [(f.Name, str(f.toSeq(truncate=True))) for f in flowgrams] # get prefix mappings mapping = build_prefix_map(seqs) l = len(mapping) orig_l = sum([len(a) for a in mapping.values()]) + l return (l, orig_l, mapping)
def secure_compare(a,b): """Return 'a == b', but try not to leak timing information about the arguments. In the event that the length of the two strings are not equal, we leak the length of the right argument, b. """ retval = True if not (isinstance(a, bytes) & isinstance(b, bytes) \ & (a is not b) \ & (len(a) != 0) & (len(b) != 0)): raise TypeError('Arguments must be distinct bytes objects with nonzero length') # copy b to a if the lengths of a and b are unequal retval &= len(a) == len(b) # some gymnastics we have to do because of small integer caching new_a = [None] * len(b) a_mask = -long(retval) b_mask = ~a_mask for i in xrange(len(new_a)): # It's conceivable that the pattern of memory accesses here may leak # information about the length of a. However, I belive that this is # unlikely new_a[i] = ord(a[i & a_mask]) & a_mask \ | ord(b[i & b_mask]) & b_mask a = ''.join(imap(chr, new_a)) del new_a, b_mask, a_mask, i retval &= (reduce(or_, imap(xor, imap(ord, a), imap(ord, b)), 0L) == 0L) return retval
def get_posts_tags(subscribers, object_list, feed, tag_name): '''Adds a qtags property in every post object in a page. Use "qtags" instead of "tags" in templates to avoid unnecesary DB hits.''' tagd = dict() user_obj = None tag_obj = None tags = models.Tag.objects.extra( select=dict(post_id='{0}.{1}'.format( *it.imap( connection.ops.quote_name, ('feedjack_post_tags', 'post_id') ) )), tables=['feedjack_post_tags'], where=[ '{0}.{1}={2}.{3}'.format(*it.imap( connection.ops.quote_name, ('feedjack_tag', 'id', 'feedjack_post_tags', 'tag_id') )), '{0}.{1} IN ({2})'.format( connection.ops.quote_name('feedjack_post_tags'), connection.ops.quote_name('post_id'), ', '.join([str(post.id) for post in object_list]) ) ] ) for tag in tags: if tag.post_id not in tagd: tagd[tag.post_id] = list() tagd[tag.post_id].append(tag) if tag_name and tag.name == tag_name: tag_obj = tag subd = dict() for sub in subscribers: subd[sub.feed.id] = sub for post in object_list: if post.id in tagd: post.qtags = tagd[post.id] else: post.qtags = list() post.subscriber = subd[post.feed.id] if feed == post.feed: user_obj = post.subscriber return user_obj, tag_obj
def prepare_update(state, neighborhood, rule): ''' state should be a 1D numpy array. neighborhood should be a function that takes the automata length and an index and returns a list of indices. rule should be a hashmap which maps each initial condition, as a 1D numpy array, to the correct final condition, which is a single boolean value. returns an iterable. ''' # needed for iterating through the state array. l = len(state) # convenience function for nx_generator objects if hasattr(neighborhood, 'set_length'): neighborhood.set_length(l) # generate neighborhood selections for each index of state #n = imap(neighborhood, range(0,l)) # e.g. neighborhood(0) # apply each slice to state #o = imap(state.__getitem__, n) # e.g. state[neighborhood(0)] # convert each numpy array into an immutable tuple #p = imap(tuple, o) # e.g. tuple(state[neighborhood(0)]) # determine the new state for each of the state slices #q = imap(rule.__getitem__, p) # e.g. rule[tuple(state[neighborhood(0)])] #return q # comments above left for readability, all of it strung together for speed: return imap(rule.__getitem__, imap(tuple, imap(state.__getitem__, imap(neighborhood, range(0,l)))))
def process_training_set(self, training_set): input_set, output_set = imap(numpy.asarray, izip(*training_set)) # convert training set to numpy array ... self.input_normalization, self.output_normalization = imap(self.normalization_class, (input_set, output_set)) normalized_input_set = self.input_normalization.normalize(input_set) normalized_output_set = self.output_normalization.normalize(output_set) return normalized_input_set, normalized_output_set
def navigate(self, dest): """Find a path from current location to the given destination.""" # Make our node objects for A*. rospy.loginfo("POSE: %f, %f" %(self.pose.x, self.pose.y)) start = Landmark(name="START", ltype="END", x=self.pose.x, y=self.pose.y) goal = Landmark(name="GOAL", ltype="END", x=dest.x, y=dest.y) # Find visible neighbors of start and goal to add as edges to the graph. landmarks = imap(lambda pair: pair[0], self.landmark_graph.itervalues()) # Here we manually add the goal to the candidate landmarks for start # to allow for navigation directly to the goal location. start_zone = self.find_nearest_visibles(start, chain(landmarks, [goal])) # We have to add start here for the empty zone check below. landmarks = imap(lambda pair: pair[0], self.landmark_graph.itervalues()) goal_zone = self.find_nearest_visibles(goal, chain(landmarks, [start])) rospy.loginfo("Start zone: %s" % (", ".join(l.name for l in start_zone))) rospy.loginfo("Goal zone: %s" % (", ".join(l.name for l in goal_zone))) if not start_zone or not goal_zone: rospy.logerr("Cannot connect start and goal! A* failed.") return []; # A* functions. is_goal = lambda node: node.name == "GOAL" heuristic = lambda node: point_distance(node, goal) def neighbors(node): if node.name == "START": return start_zone nbrs = self.landmark_graph[node.name][1] if node in goal_zone: # Intentionally use list concat to make a copy of the list. # If we just modify nbrs, it will modify the original graph. return nbrs + [goal] return nbrs return a_star(start, is_goal, neighbors, point_distance, heuristic)
def init_env(env_ext=dict()): global global_env, macro_table, symbol_table symbol_table = dict() global_env = add_globals(Env()) global _quote, _if, _set, _define, _lambda, _begin, _definemacro _quote, _if, _set, _define, _lambda, _begin, _definemacro = it.imap( Sym, ['quote', 'if', 'set', 'define', 'lambda', 'begin', 'define-macro'] ) global _quasiquote, _unquote, _unquotesplicing _quasiquote, _unquote, _unquotesplicing = it.imap( Sym, ['quasiquote', 'unquote', 'unquote-splicing'] ) global _append, _cons, _let, quotes _append, _cons, _let = it.imap(Sym, ['append', 'cons', 'let']) quotes = {"'":_quote, '`':_quasiquote, ',':_unquote, ',@':_unquotesplicing} macro_table = {_let:let} peval('''(begin (define-macro and (lambda args (if (null? args) #t (if (= (length args) 1) (car args) `(if ,(car args) (and ,@(cdr args)) #f))))) (define-macro or (lambda args (if (null? args) #f (if (= (length args) 1) (car args) `(if ,(car args) ,(car args) (or ,@(cdr args))))))) )''') for sym,val in env_ext.iteritems(): global_env[Sym(sym)] = val
def map_colors(self, colors, cmap=None, lut=None, mode='hexs', **norm_kw): """return a list of rgb tuples/hexs from color numbers. - colors: a seq of color numbers. - cmap: a Colormap or a name like 'jet' (passto cm.get_cmap(cmap, lut) - mode: one of ['hexs', 'tuples', 'arrays'] Ref: http://www.scipy.org/Cookbook/Matplotlib/Show_colormaps from: http://nullege.com/codes/show/src%40p%40y%40pycogent-HEAD%40cogent%40draw%40multivariate_plot.py/6/matplotlib.colors.Colormap/python """ modes = ['hexs', 'tuples', 'arrays'] if mode not in modes: raise ValueError('mode must be one of %s, but got %s' % (modes, mode)) if not isinstance(cmap, Colormap): cmap = cm.get_cmap(cmap, lut=lut) rgba_arrays = cmap(Normalize(**norm_kw)(colors)) rgb_arrays = rgba_arrays[:, :-1] #without alpha if mode == 'arrays': return rgb_arrays elif mode == 'tuples': return list(imap(tuple, rgb_arrays)) else: # mode == 'hexs': return list(imap(rgb2hex, rgb_arrays))
def main(files): if len(files) > 1: file_a = open(files[0], 'r') file_b = open(files[1], 'r') elif len(files) > 0: file_a = open(files[0], 'r') file_b = None else: file_a = sys.stdin file_b = None mem_a = analyse(file_a) file_a.close() key_space = max(imap(len, mem_a)) if file_b is None: for key in sorted(mem_a): v = mem_a[key] if v['pss'] != 0 or v['uss'] != 0: print '{key:<{width}} {pss:>7} {uss:>7}'.format(key = key, width = key_space, pss = v['pss'], uss = v['uss']) else: mem_b = analyse(file_b) file_b.close() report = diff(mem_a, mem_b) key_space = max(imap(len, report)) for key in sorted(report): v = report[key] if v['pss'] != 0 or v['uss'] != 0: print '{dir} {key:<{width}} {pss:>+7} {uss:>+7}'.format(dir = "-|+"[v['dir'] + 1], key = key, width = key_space, pss = v['pss'], uss = v['uss'])
def build_cliff( altitudes, key, start_cliff, end_cliff ): keys = [key, (key[0]+1,key[1]), (key[0]+1,key[1]+1), (key[0],key[1]+1)] alts = [] for k in keys: # we need to have a full cell and there is none if not altitudes.has_key(k): return None alts.append(altitudes[k]) deltas=[(abs(alts[(i+1)%4]-alts[i]),i) for i in range(len(alts))] good_deltas = filter(lambda x: x[0]>=start_cliff and x[0]<end_cliff, deltas) if len(good_deltas)>2: print "special case good deltas" if len(good_deltas) < 2: # no cliffs found # 1 means we are at the end. In that case it should be found from another cliff. return None good_deltas.sort(reverse=True) idx1=good_deltas[0][1] idx2=good_deltas[1][1] if alts[idx1]<alts[(idx1+1)%4]: idx1,idx2=idx2,idx1 cliff_line=[divide_by_scalar(imap(add, keys[idx1],keys[(idx1+1)%4]),2.0), divide_by_scalar(imap(add, keys[idx2],keys[(idx2+1)%4]),2.0),] return cliff_line
def urlparse(fh, part=None, query_params=None, decode=False, **kwargs): """URLParse""" _yield_func = lambda x: x if query_params and part == 'query': if query_params.find(',') == -1: _yield_func = lambda x: val.get(query_params, (None,))[0] else: # Multiple query params specified on command line query_params = query_params.split(",") _yield_func = lambda x: \ [val.get(p, (None,))[0] for p in query_params] if decode is True: for line in imap(lambda x: x.strip(), fh): yield unquote_plus(line) else: for line in imap(lambda x: x.strip(), fh): url = urlsplit(line) val = { "scheme": url.scheme, "domain": url.netloc, "netloc": url.netloc, "path": url.path, "query": parse_qs(url.query) }[part] yield _yield_func(val)
def emit_mac(self): """Call this method when all the plaintext has been supplied. This method will return any remaining ciphertext chunks and the MAC, concatenated. """ if not self.encrypt_not_decrypt: raise KeccakError('This instance is intended for decryption, not encryption') if self.last_block is None: raise KeccakError('MAC has already been emitted, no further encryption may be performed') retval = '' assert len(self.input_cache) < self.block_size encoded_input_cache_len = chr(len(self.input_cache)) self.input_cache = self.k.pad10star1(self.input_cache, self.block_size*8) assert len(self.input_cache) == self.block_size assert len(self.last_block) == self.mac_size final_ciphertext_block = ''.join(imap(chr, imap(operator.xor, imap(ord, self.input_cache), imap(ord, self.last_block[:self.block_size])))) assert len(final_ciphertext_block) == self.block_size retval += final_ciphertext_block self.last_block = self.k(encoded_input_cache_len + self.input_cache + self.mac_round_byte) assert len(self.last_block) == self.mac_size retval += self.last_block self.last_block = None self.input_cache = '' assert len(retval) == self.block_size + self.mac_size return retval
def test_prod_custom_dtype(self): """ Test the ability to provide your own output dtype for a prod. """ # We try multiple axis combinations even though axis should not matter. axes = [None, 0, 1, [0], [1], [0, 1]] idx = 0 for input_dtype in imap(str, theano.scalar.all_types): x = tensor.matrix(dtype=input_dtype) for output_dtype in imap(str, theano.scalar.all_types): axis = axes[idx % len(axes)] # If output_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, output_dtype) if (output_dtype == upcasted_dtype or (input_dtype in tensor.discrete_dtypes and output_dtype in tensor.continuous_dtypes) ): prod_var = x.prod(dtype=output_dtype, axis=axis) assert prod_var.dtype == output_dtype if "complex" in output_dtype: continue # Check that we can take the gradient grad_var = tensor.grad(prod_var.sum(), x, disconnected_inputs='ignore') else: self.assertRaises(TypeError, x.prod, dtype=output_dtype, axis=axis) idx += 1
def owner_cluster(con,cur,nitem=None,reverse=True,nshingle=2,store=False,**kwargs): c = Simhash(**kwargs) cmd = 'select ownerid,name from owner' if reverse: cmd += ' order by rowid desc' if nitem: cmd += ' limit %i' % nitem name_dict = {} for (i,(ownerid,name)) in enumerate(cur.execute(cmd)): words = name.split() shings = list(shingle(name,nshingle)) features = shings + words weights = list(np.linspace(1.0,0.0,len(shings))) + list(np.linspace(1.0,0.0,len(words))) c.add(features,weights=weights,label=ownerid) name_dict[ownerid] = name if i%10000 == 0: print i ipairs = c.unions npairs = map(lambda p: map(name_dict.get,p),ipairs) print 'Found %i pairs' % len(ipairs) if store: cur.execute('drop table if exists pair') cur.execute('create table pair (ownerid1 int, ownerid2 int, name1 text, name2 text)') cur.executemany('insert into pair values (?,?,?,?)',imap(lambda ((o1,o2),(n1,n2)): (o1,o2,n1,n2),izip(ipairs,npairs))) con.commit() else: return ipairs
def do_join(eval_ctx, value, d=u""): """Return a string which is the concatenation of the strings in the sequence. The separator between elements is an empty string per default, you can define it with the optional parameter: .. sourcecode:: jinja {{ [1, 2, 3]|join('|') }} -> 1|2|3 {{ [1, 2, 3]|join }} -> 123 """ # no automatic escaping? joining is a lot eaiser then if not eval_ctx.autoescape: return unicode(d).join(imap(unicode, value)) # if the delimiter doesn't have an html representation we check # if any of the items has. If yes we do a coercion to Markup if not hasattr(d, "__html__"): value = list(value) do_escape = False for idx, item in enumerate(value): if hasattr(item, "__html__"): do_escape = True else: value[idx] = unicode(item) if do_escape: d = escape(d) else: d = unicode(d) return d.join(value) # no html involved, to normal joining return soft_unicode(d).join(imap(soft_unicode, value))
def levenshtein(a, b, casecost = 1, spacecost = 1, totals = False): """Calculates the Levenshtein edit distance between strings a and b. 'casecost' is the cost of replacement when only the case is changed, not the actual character. If totals=True, returns total character costs of both strings, in addition to the distance value, as a triple (dist, cost_a, cost_b). >>> levenshtein("Ala", "OLa") 2 >>> levenshtein("Ala", "OLa", 0.5) 1.5 >>> round(levenshtein(" a ala", "aala ", 1, 0.1), 5) 0.3 >>> levenshtein(" a ala Ola ", "aalaola ", 1, 2) 7 """ #_a, _b = a,b reorder = False n, m = len(a), len(b) if n < m: # ensure that n >= m ('a' is longer), to speed up calculations (short outer loop); but mem usage is O(max(n,m)) a,b = b,a n,m = m,n reorder = True charcost = lambda c: spacecost if c == ' ' else 1 isint = util.isint(casecost) and util.isint(spacecost) typecode = 'l' if isint else 'd' zero = array(typecode, [0]) zeron = zero * n try: alow = a.lower() blow = b.lower() acost = array(typecode, imap(charcost, a)) bcost = array(typecode, imap(charcost, b)) #current = range(n+1) current = zero + acost # initially, current[j] is the total cost of letters in a[:j], for j = 0,1,...,n for j in range(2,n+1): current[j] += current[j-1] # 'current' must hold cumulative a[:j] costs rather than single-letter costs #print current # loop invariant: current[j] is the cost of transforming a[:j] into b[:i] for i in range(1,m+1): # loop over characters of 'b' cur_b, cur_bcost = b[i-1], bcost[i-1] previous = current current = array(typecode, [previous[0] + cur_bcost]) + zeron for j in range(1,n+1): # loop over characters of 'a' add = previous[j] + cur_bcost delete = current[j-1] + acost[j-1] change = previous[j-1] if a[j-1] != cur_b: if alow[j-1] == blow[i-1]: change += casecost else: change += max(cur_bcost, acost[j-1]) #1 current[j] = min(add, delete, change) except UnicodeWarning: print "unicode error in levenshtein(%s, %s)" % (repr(a), repr(b)) raise if totals: if reorder: return current[n], sum(bcost), sum(acost) else: return current[n], sum(acost), sum(bcost) return current[n]
def service_get(env, jones): if not jones.exists(): return redirect(url_for('index')) children = jones.get_child_envs(Env.Root) is_leaf = lambda child: len(child) and not any( c.find(child + '/') >= 0 for c in children) try: version, config = jones.get_config_by_env(env) except NoNodeException: return redirect(url_for('services', service=jones.service)) childs = imap(dict, izip( izip(repeat('env'), imap(Env, children)), izip(repeat('is_leaf'), imap(is_leaf, children)))) vals = { "env": env, "version": version, "children": list(childs), "config": config, "view": jones.get_view_by_env(env), "service": jones.service, "associations": jones.get_associations(env) } if request_wants('application/json'): return jsonify(vals) else: return render_template('service.j2', **vals)
def parse_now_playing(self, response): """Scrapes USA openings this week and top 10 in week""" self.log("Parsing USA Top Week") hxs = HtmlXPathSelector(response) _urljoin = lambda url: self._urljoin(response, url) # # openings this week # openings = hxs.select('//table[@class="movies"]//a[@class="title"]') boxoffice = hxs.select('//table[@class="boxoffice movies"]//a[@class="title"]') opening_titles = openings.select('text()').extract() opening_urls = imap(_urljoin, openings.select('@href').extract()) box_titles = boxoffice.select('text()').extract() box_urls = imap(_urljoin, boxoffice.select('@href').extract()) # items opening_items = (UsaOpeningWeekMovie(title=title, url=url) for (title, url) in izip(opening_titles, opening_urls)) box_items = (UsaTopWeekMovie(title=title, url=url) for (title, url) in izip(box_titles, box_urls)) # movie requests requests = imap(self.make_requests_from_url, chain(opening_urls, box_urls)) return chain(opening_items, box_items, requests)
def __init__(self, diff, differ='diff', format='udiff'): """ :param diff: a text in diff format or generator :param format: format of diff passed, `udiff` or `gitdiff` """ if isinstance(diff, basestring): diff = [diff] self.__udiff = diff self.__format = format self.adds = 0 self.removes = 0 if isinstance(self.__udiff, basestring): self.lines = iter(self.__udiff.splitlines(1)) elif self.__format == 'gitdiff': udiff_copy = self.copy_iterator() self.lines = itertools.imap(self.escaper, self._parse_gitdiff(udiff_copy)) else: udiff_copy = self.copy_iterator() self.lines = itertools.imap(self.escaper, udiff_copy) # Select a differ. if differ == 'difflib': self.differ = self._highlight_line_difflib else: self.differ = self._highlight_line_udiff
def decrypt(self, m): """Decrypt the bytes m and return as much plaintext as is available. There may not be plaintext available every time this method is called. There is no guarantee about the length of the plaintext compared to the length of the ciphertext. Ciphertext chunks must be fed to the decrypt method in the same order that they were produced by the encrypt method """ if self.encrypt_not_decrypt: raise KeccakError('This instance is intended for encryption, not decryption') if self.last_block is None: raise KeccakError('MAC has already been verified, no further decryption may be performed') if not isinstance(m, bytes): raise TypeError("argument must be a bytes") self.input_cache += m retval = '' while len(self.input_cache) > self.block_size+self.mac_size: chunk, self.input_cache = self.input_cache[:self.block_size], \ self.input_cache[self.block_size:] assert len(self.last_block) == self.mac_size plain = ''.join(imap(chr, imap(operator.xor, imap(ord, chunk), imap(ord, self.last_block[:self.block_size])))) self.last_block = self.k(chr(len(plain))+plain+self.cipher_round_byte) retval += plain assert len(self.last_block) == self.mac_size return retval
def setup(events, field, period, lookback, flip_signal): events = list(events) values = [e[field] for e in events] direction = None flip_dir = flip_signal['direction'] if flip_dir == flip.BEAR and all(itertools.imap(operator.lt, values[lookback:], values[:period])): direction = BUY elif flip_dir == flip.BULL and all(itertools.imap(operator.gt, values[lookback:], values[:period])): direction = SELL if not direction: return bars = events[lookback:] lowes = [bar['low'] for bar in bars] highs = [bar['high'] for bar in bars] high = np.max(highs) low = np.max(lowes) if direction == BUY: perfection = np.min(lowes[-4:-2]) else: perfection = np.max(highs[-4:-2]) return Signal(direction, high, low, bars, perfection, flip_signal)
def _gen_batches(idxs, subjects, batch_size): '''divide row indicies for deepkt. divide indices into batches by subject ids and indices for each subject are further divided into sub batches by some minimal size. The first 2 rows of each subject are removed by necessity due to the recursive structure of the model Args: idxs (int[]): row indices subjects (int[]): list of subject ids corresponding to each row (could also be an EnumColumn). Subject ids must be pre-sorted. batch_size: the size of the subject's sub batches Returns: int[][]: list of batches Example: >>> _gen_batches(xrange(11), [1] * 6 + [2] * 5, 2) [[2, 3], [4, 5], [8, 9]] ''' batches = gen_batches_by_keys(idxs, [subjects]) batches = imap(lambda idxs: islice(idxs, 2, None), batches) sub_batches = imap(lambda idxs: gen_batches_by_size(list(idxs), batch_size), batches) batches = chain.from_iterable(sub_batches) batches = ifilter(lambda b: b, batches) batches = list(batches) return batches
def pipe_sort(context=None, _INPUT=None, conf=None, **kwargs): """An operator that sorts the input source according to the specified key. Not loopable. Not lazy. Parameters ---------- context : pipe2py.Context object _INPUT : pipe2py.modules pipe like object (iterable of items) kwargs -- other inputs, e.g. to feed terminals for rule values conf : { 'KEY': [ { 'field': {'type': 'text', 'value': 'title'}, 'dir': {'type': 'text', 'value': 'DESC'} } ] } Returns ------- _OUTPUT : generator of sorted items """ test = kwargs.pop('pass_if', None) _pass = utils.get_pass(test=test) key_defs = imap(DotDict, utils.listize(conf['KEY'])) get_value = partial(utils.get_value, **kwargs) parse_conf = partial(utils.parse_conf, parse_func=get_value, **kwargs) keys = imap(parse_conf, key_defs) order = ('%s%s' % ('-' if k.dir == 'DESC' else '', k.field) for k in keys) comparers = map(get_comparer, order) cmp_func = partial(multikeysort, comparers=comparers) _OUTPUT = _INPUT if _pass else iter(sorted(_INPUT, cmp=cmp_func)) return _OUTPUT
def test_prod_without_zeros_custom_dtype(self): """ Test the ability to provide your own output dtype for a ProdWithoutZeros(). """ # We try multiple axis combinations even though axis should not matter. axes = [None, 0, 1, [0], [1], [0, 1]] idx = 0 for input_dtype in imap(str, theano.scalar.all_types): x = tensor.matrix(dtype=input_dtype) for output_dtype in imap(str, theano.scalar.all_types): axis = axes[idx % len(axes)] # If output_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, output_dtype) if (output_dtype == upcasted_dtype or (input_dtype in tensor.discrete_dtypes and output_dtype in tensor.continuous_dtypes) ): prod_woz_var = ProdWithoutZeros( axis=axis, dtype=output_dtype)(x) assert prod_woz_var.dtype == output_dtype else: self.assertRaises(TypeError, ProdWithoutZeros(axis=axis, dtype=output_dtype), x) idx += 1
def led_contents(i): pmin = float(i) / num_leds * model.position_count pmax = pmin + float(model.position_count) / num_leds return any( imap(model.contents, model.contents_range(pmin, pmax)))
def itervalues(self): return imap(self.get, self._keys)
def text_shingle(n, value): return itertools.imap( u''.join, shingle(n, value), )
def all(self): return itertools.imap(Person, database.iter_people())
def sort_uniq(sequence): return itertools.imap( operator.itemgetter(0), itertools.groupby(sorted(sequence)))
def all(self): return itertools.imap(Event, database.iter_events())
def find_frequent_itemsets(dataset, min_support, min_bad_rate, include_support=True): """ Find frequent itemsets in the given loans using FP-growth. This function returns a generator instead of an eagerly-populated list of items. The `dataset` parameter can be any iterable of iterables of items. `min_support` should be an integer specifying the minimum number of occurrences of an itemset for it to be accepted. Each item must be hashable (i.e., it must be valid as a member of a dictionary or a set). If `include_support` is true, yield (itemset, support) pairs instead of just the itemsets. Parameters ---------- dataset : list The dataset (a list of transactions) from which to generate candidate itemsets. min_support : interger The minimum support threshold. Defaults to None. include_support : bool Include support in output (default=False). """ items = defaultdict(lambda: 0) # mapping from items to their supports processed_transactions = [] # Load the passed-in transactions and count the support that individual # items have. for transaction in dataset: processed = [] for item in transaction: items[item] += 1 processed.append(item) processed_transactions.append(processed) # Remove infrequent items from the item support dictionary. items = dict((item, support) for item, support in items.iteritems() if support >= min_support) # Build our FP-tree. Before any transactions can be added to the tree, they # must be stripped of infrequent items and their surviving items must be # sorted in decreasing order of frequency. def clean_transaction(transaction): transaction = filter(lambda v: v in items, transaction) transaction.sort(key=lambda v: items[v], reverse=True) return transaction master = FPTree() for transaction in imap(clean_transaction, processed_transactions): master.add(transaction) support_data = {} def find_with_suffix(tree, suffix): for item, nodes in tree.items(): support = sum(n.count for n in nodes) #support = float(sum(n.count for n in nodes)) / len(dataset) if support >= min_support and item not in suffix: # New winner! found_set = [item] + suffix #print found_set support_data[frozenset(found_set)] = support yield (found_set, support) if include_support else found_set # Build a conditional tree and recursively search for frequent # itemsets within it. cond_tree = conditional_tree_from_paths( tree.prefix_paths(item), min_support) for s in find_with_suffix(cond_tree, found_set): yield s # pass along the good news to our caller # Search for frequent itemsets, and yield the results we find. for itemset in find_with_suffix(master, []): yield itemset
def map_async(self, func, args): from itertools import imap for _ in imap(func, args): pass
def __repr__(self, repr=repr): return ' -> '.join(imap(repr, self.maps))
def tienepar1(lista): return any(imap(lambda x: x % 2 == 0, lista))
def xrefs_from(self): """Xrefs from this line. :return: Xrefs as `sark.code.xref.Xref` objects. """ return imap(Xref, idautils.XrefsFrom(self.ea))
def __len__(self, len=len, sum=sum, imap=imap): return sum(imap(len, self.maps))
def versions(self): """ Extract the external names and versions from an installed LCGCMT. @return: dictionary mapping external names to versions """ from itertools import imap def statements(lines): """ Generator of CMT statements from a list of lines. """ statement = "" # we start with an empty statement for l in imap(lambda l: l.rstrip(), lines): # CMT ignores spaces at the end of line when checking for '\' # append the current line to the statement so far statement += l if statement.endswith("\\"): # in this case we need to strip the '\' and continue the concatenation statement = statement[:-1] else: # we can stop concatenating, but we return only non-trivial statements statement = statement.strip() if statement: yield statement statement = "" # we start collecting a new statement def tokens(statement): """ Split a statement in tokens. Trivial implementation assuming the tokens do not contain spaces. """ return statement.split() def macro(args): """ Analyze the arguments of a macro command. @return: tuple (name, value, exceptionsDict) """ unquote = lambda s: s.strip('"') name = args[0] value = unquote(args[1]) # make a dictionary of the even to odd remaining args (unquoting the values) exceptions = dict(zip(args[2::2], map(unquote, args[3::2]))) return name, value, exceptions # prepare the dictionary for the results versions = {} # We extract the statements from the requirements file of the LCG_Configuration package req = open(os.path.join(self.lcgcmt_root, "LCG_Configuration", "cmt", "requirements")) for toks in imap(tokens, statements(req)): if toks.pop(0) == "macro": # get only the macros ... name, value, exceptions = macro(toks) if name.endswith("_config_version"): # that end with _config_version name = name[:-len("_config_version")] name = self.__special_names__.get(name, name) for tag in ["target-slc"]: # we use the alternative for 'target-slc' if present value = exceptions.get(tag, value) versions[name] = value.replace('(', '{').replace(')', '}') return versions
def inv_dict(d): return dict(imap(reversed, d.iteritems()))
def fio_cfg_compile(source, fname, test_params): it = parse_all_in_1(source, fname) it = (apply_params(sec, test_params) for sec in it) it = flatmap(process_cycles, it) it = flatmap(process_repeats, it) return itertools.imap(finall_process, it)
def power_of(x): """Generator returning powers of the provided number (fastest) """ pow_of_x = partial(pow, x) return imap(pow_of_x, count())
def get_release_type(self, version): if version in imap(lambda f: f.version, self.stable_firmwares): return 'stable' if version in imap(lambda f: f.version, self.dev_firmwares): return 'dev' return 'unknown'
def mosaicify(target, sources, tiles=32, zoom=1): """Create mosaic of photos. The function wraps all process of the creation of a mosaic, given the target, the list of source images, the number of tiles to use per side, the zoom level (a.k.a. how large the mosaic will be), and finally if we want to display the output on screen or dump it on a file. First, open the target image, divide it into the specified number of tiles, and store information about the tiles average color. In order to reduce the amount of used memory, we will free the *blobs* associated to each processed image, as soon as possible, aka inside the ``postfunc`` function. Then, index all the source images by color. Given that we are aware about the size and the ratio of the tiles of the target, we can use the ``prefunc`` to reduce the dimension of the image; consequently the amount of computations needed to compute the average color will smaller. Moreover, as in the previous paragraph, there is no need to keep into processed images, hence we are going to use the ``postfunc`` method to delete them. Finally, for each tile extracted from the target image, we need to find the most similar contained inside the list of source images, and paste it in the right position inside the mosaic image. When done, show the result on screen or dump it on the disk. """ # Load the target image into memory mosaic = ImageWrapper(filename=target) # Generate the list of rectangles identifying mosaic tiles (original_width, original_height) = mosaic.size rectangles = list(lattice(original_width, original_height, tiles)) # Compute the size of the tiles after the zoom factor has been applied (zoomed_tile_width, zoomed_tile_height) = (zoom * original_width // tiles, zoom * original_height // tiles) # Initialize the pool of workers workers = multiprocessing.cpu_count() pool = multiprocessing.Pool(workers) # Load tiles into memory and resize them accordingly source_tiles = dict( itertools.izip( sources, load_raw_tiles(sources, mosaic.ratio, (zoomed_tile_width, zoomed_tile_height), pool, workers))) # Indicize all the source images by their average color source_list = ImageList(source_tiles.values()) # Compute the average color of each mosaic tile mosaic_avg_colors = list( extract_average_colors(mosaic, rectangles, pool, workers)) # Find which source image best fits each mosaic tile best_matching_imgs = list( search_matching_images(source_list, mosaic_avg_colors, pool, workers)) # Shut down the pool of workers pool.close() pool.join() # Apply the zoom factor (zoomed_width, zoomed_height) = (tiles * zoomed_tile_width, tiles * zoomed_tile_height) mosaic.resize((zoomed_width, zoomed_height)) rectangles = list(lattice(zoomed_width, zoomed_height, tiles)) return Mosaic( mosaic, itertools.izip(rectangles, itertools.imap(source_tiles.get, best_matching_imgs)))
def itervalues(self): return itertools.imap(self.__getitem__, reversed(self._o))
def parmap(fun,seq,N=None,Nt=1,chunksize=1,ordered=True,\ daemon=False,progress=False, args=(),kwargs=None, star=False,kwstar=False, exception=None): """ parmap -- Simple parallel mapper that can split amongst processes (N) and threads (Nt) (within the processes). Does *NOT* require functions to be pickleable (unlike vanilla multiprocess.Pool.map) Inputs: ------- fun Single input function. Use lambdas or functools.partial to enable/exapnd multi-input. See example seq Sequence of inputs to map in parallel Options: -------- N [None] (integer or None) Number of processes to use. If `None`, will use the CPU_COUNT Nt [1] (integer) Number of threads to use. See notes below on multi-threaded vs multi-processes. chunksize [1] (int) How to be break up the incoming sequence. Useful if also using threads. Will be (re)set to max(chunksize,Nt). Alternativly, if len(seq) exists and chunksize=-1 it will be reset to ceil(len(seq)/(N*Nt)). If chunksize=-1 and len(sequence) is not known, a warning will be emitted and chucksize will be reset to max(chunksize,Nt) ordered [True] (bool) Whether or not to order the results. If False, will return in whatever order they finished. daemon [False] (bool) Sets the multiprocessing `daemon` flag. If True, can not spawn child processes (i.e. cannot nest parmap) but should allow for CTRL+C type stopping. Supposedly, there may be issues with CTRL+C with it set to False. Use at your own risk progress [False] (bool) Display a progress bar or counter. Warning: Inconsistant in iPython/Jupyter notebooks and may clear other printed content. Instead, specify as 'nb' to use a Jupyter Widget progress bar. args [tuple()] Specify additional arguments for the function kwargs [dict()] Specify additional keyword arguments star [False] If True, the arguments to the function will be "starred" so, for example if `seq = [ (1,2), (3,4) ]`, the function will be called as star is False: fun((1,2)) star is True: fun(1,2) <==> fun(*(1,2)) Can also set to None to not send anything kwstar [False] Assumes all items are (vals,kwvals) where `vals` RESPECTS `star` setting and still includes `args` and `kwvals`. See "Additional Arguments" section below. exception ['raise' if N>1 else 'proc'] Choose how to handle an exception in a child process 'raise' : [Default] raise the exception (outside of the Process). Also terminates all existing processes. 'return' : Return the Exception instead of raising it. 'proc' : Raise the exception inside the process. NOT RECOMMENDED unless used in debugging (and with N=1) Note: An additional attribute called `seq_index` will also be set in the exception (whether raised or returned) to aid in debugging. Additional Arguments -------------------- As noted above, there are many ways to pass additional arguments to your function. All of these are not completely needed since parmap makes using lambdas so easy, but they are there if preffered. Assume the following function: def dj(dictA,dictB): '''Join dictA and dictB where dictB takes precedence''' dictA = dictA.copy() dictA.update(dictB) # NOTE: dictB takes precedence return dictA Then the behavior is as follows where `args` and `kwargs` come from they main function call. The `val` (singular), `vals` (sequence/tuple of values), and `kwvals` are set via the sequence. | star | kwstar | expected item | function args | function keywords | |-------|--------|---------------|----------------|---------------------| | False | False | val | *((val,)+args) | **kwargs |† | True | False | vals | *(vals+args) | **kwargs | | None | False | --- | *args | **kwargs |° | None | True | --- | *args | **dj(kwargs,kwvals) |‡ | False | True | val,kwval | *((val,)+args) | **dj(kwargs,kwvals) |‡ | True | True | vals,kwval | *(vals+args) | **dj(kwargs,kwvals) |‡ † Default ° If kwargs and args are empty, basically calls with nothing ‡ Note the ordering so kwvals takes precedance Note: ------ Performs SEMI-lazy iteration based on chunksize. It will exhaust the input iterator but will yield as results are computed (This is similar to the `multiprocessing.Pool().imap` behavior) Explicitly wrap the parmap call in a list(...) to force immediate evaluation Threads and/or processes: ------------------------- This tool has the ability to split work amongst python processes (via multiprocessing) and python threads (via the multiprocessing.dummy module). Python is not very performant in multi-threaded situations (due to the GIL) therefore, processes are the usually the best for CPU bound tasks and threading is good for those that release the GIL (such as IO-bound tasks). WARNING: Many NumPy functions *do* release the GIL and can be threaded, but many NumPy functions are, themselves, multi-threaded. Alternatives: ------------- This tool allows more data types, can split with threads, has an optional progress bar, and has fewer pickling issues, but these come at a small cost. For simple needs, the following may be better: >>> import multiprocessing as mp >>> pool = mp.Pool(N) # Or mp.Pool() for N=None >>> results = list( pool.imap(fun,seq) ) # or just pool.map >>> pool.close() Additional Note --------------- For the sake of convienance, a `map=imap=__call__` and `close = lamba *a,**k:None` are also added so a parmap function can mimic a multiprocessing pool object with duck typing Version: ------- __version__ """ # Build up a dummy function with args,vals,kwargs, and kwvals if kwargs is None: kwargs = {} def _fun(ss): _args = list(args) _kw = kwargs.copy() try: # Check for None before boolean if star is None and kwstar: # 4 _kw.update(ss) elif star is None and not kwstar: # 3 pass elif not star and not kwstar: # 1 _args = [ss] + _args elif star and not kwstar: # 2 _args = list(ss) + _args elif not star and kwstar: # 5 _args = [ss[0]] + _args _kw.update(ss[1]) elif star and kwstar: # 6 _args = list(ss[0]) + _args _kw.update(ss[1]) else: raise TypeError() except TypeError: # Mostly because bad input types return _Exception( TypeError('Ensure `args` are tuples and `kwargs` are dicts'), infun=False) except Exception as E: return _Exception(E, infun=False) if exception == 'proc': return fun(*_args, **_kw) # Outside of a try try: return fun(*_args, **_kw) except Exception as E: return _Exception(E) # It would be great to include all of sys.exc_info() but tracebacks # cannot be pickled. try: tot = len(seq) except TypeError: tot = None N = CPU_COUNT if N is None else N if exception is None: exception = 'raise' if N > 1 else 'proc' if chunksize == -1: if tot is None: warnings.warn( 'chunksize=-1 does not work when len(seq) is not known') else: chunksize = math.ceil(tot / (N * Nt)) chunksize = max(chunksize, Nt) # Reset # Consider resetting N if tot is not None: N = min(N, tot // chunksize) # Build a counter iterator based on settings and tqdm if tqdm is None: if isinstance(progress,(str,unicode))\ and progress.lower() in ['jupyter','notebook','nb']: counter = partial(_counter_nb, tot=tot) else: counter = partial(_counter, tot=tot) else: if isinstance(progress,(str,unicode))\ and progress.lower() in ['jupyter','notebook','nb']\ and hasattr(tqdm,'tqdm_notebook'): counter = partial(tqdm.tqdm_notebook, total=tot) else: counter = partial( tqdm.tqdm, total=tot) # Set the total since tqdm won't be able to get it. # Handle N=1 without any multiprocessing if N == 1: if Nt == 1: out = imap(_fun, seq) else: pool = mpd.Pool(Nt) # thread pools don't have the pickle issues out = pool.imap(_fun, seq) if progress: out = counter(out) for count, item in enumerate(out): if isinstance(item, _Exception): item.E.seq_index = count if not item.infun: exception = 'raise' # reset if exception == 'raise': raise item.E elif exception == 'return': item = item.E elif exception == 'proc': pass else: raise ValueError( "Unrecognized `exception` setting '{}'".format( exception)) yield item if Nt > 1: pool.close() return q_in = mp.JoinableQueue( ) # Will need to `join` later to make sure is empty q_out = mp.Queue() # Start the workers workers = [ mp.Process(target=_worker, args=(_fun, q_in, q_out, Nt)) for _ in range(N) ] for worker in workers: worker.daemon = daemon worker.start() # Create a separate thread to add to the queue in the background def add_to_queue(): for iixs in _iter_chunks(enumerate(seq), chunksize): q_in.put(iixs) # Once (if ever) it is exhausted, send None to close workers for _ in xrange(N): q_in.put(None) add_to_queue_thread = Thread(target=add_to_queue) add_to_queue_thread.start() # Define a generator that will pull from the q_out and then run through # the rest of our generator/iterator chain for progress and ordering def queue_getter(): finished = 0 count = 0 while finished < N: out = q_out.get() if out is None: finished += 1 continue yield out # Chain generators on output out = queue_getter() if progress: out = counter(out) if ordered: out = _sort_generator_unique_integers(out, key=lambda a: a[0]) # Return items for item in out: count = item[0] item = item[1] if isinstance(item, _Exception): item.E.seq_index = count if not item.infun: exception = 'raise' # reset if exception == 'raise': for worker in workers: worker.terminate() raise item.E elif exception == 'return': item = item.E elif exception == 'proc': pass else: for worker in workers: worker.terminate() raise ValueError( "Unrecognized `exception` setting '{}'".format(exception)) yield item # Clean up threads and processes. Make sure the queue is exhausted add_to_queue_thread.join() # Make sure we've exhausted the input q_in.join() # Make sure there is nothing left in the queue for worker in workers: worker.join() # shut it down
def test_mfd2eml(self): self.three_argument_test(pm3_mfd2eml.main, imap(reversed, self.EML2MFD_TESTCASES), c14n=hex_c14n)
def total(self): ''' Returns sum of all counts in all features that are multisets. ''' feats = imap(lambda name: self[name], self._counters()) return sum(chain(*map(lambda mset: map(abs, mset.values()), feats)))