Example #1
0
    def test_prod_without_zeros_custom_acc_dtype(self):
        """
        Test ability to provide your own acc_dtype for a ProdWithoutZeros().
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        idx = 0
        for input_dtype in imap(str, theano.scalar.all_types):
            x = tensor.matrix(dtype=input_dtype)
            for acc_dtype in imap(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                # If acc_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                upcasted_dtype = scalar.upcast(input_dtype, acc_dtype)
                if (acc_dtype == upcasted_dtype or
                        (input_dtype in tensor.discrete_dtypes and
                            acc_dtype in tensor.continuous_dtypes)
                        ):
                    prod_woz_var = ProdWithoutZeros(
                            axis=axis, acc_dtype=acc_dtype)(x)
                    assert prod_woz_var.owner.op.acc_dtype == acc_dtype

                    if (acc_dtype.startswith('complex') and
                        input_dtype != acc_dtype):
                        continue
                    f = theano.function([x], prod_woz_var)
                    data = numpy.random.rand(2, 3) * 3
                    data = data.astype(input_dtype)
                    f(data)
                else:
                    self.assertRaises(TypeError,
                            ProdWithoutZeros(axis=axis, acc_dtype=acc_dtype),
                            x)

                idx += 1
Example #2
0
 def upre_bdd(self, dst_states_bdd, env_strat=None, get_strat=False,
              use_trans=False):
     """
     UPRE = EXu.AXc.EL' : T(L,Xu,Xc,L') ^ dst(L') [^St(L,Xu)]
     """
     # take a transition step backwards
     # TECH NOTE: the restrict_fun=~dst... works ONLY because I will use the
     # result and take the union with dst_states afterwards...
     p_bdd = self.substitute_latches_next(
         dst_states_bdd,
         restrict_fun=~dst_states_bdd,
         use_trans=use_trans)
     # use the given strategy
     if env_strat is not None:
         p_bdd &= env_strat
     # there is an uncontrollable action such that for all contro...
     temp_bdd = p_bdd.univ_abstract(
         BDD.make_cube(imap(funcomp(BDD, symbol_lit),
                            self.iterate_controllable_inputs())))
     p_bdd = temp_bdd.exist_abstract(
         BDD.make_cube(imap(funcomp(BDD, symbol_lit),
                            self.iterate_uncontrollable_inputs())))
     # prepare the output
     if get_strat:
         return temp_bdd
     else:
         return p_bdd
Example #3
0
    def encode_to_server(self, iv, timestamp, return_code, host_name,
                         svc_description, plugin_output):
        # note that this will pad the strings with 0's instead of random digits.  Oh well.
        toserver = [
                self.proto_version,
                0, # crc32_value
                timestamp,
                return_code,
                host_name,
                svc_description,
                plugin_output,
        ]

        # calculate crc32 and insert into the list
        crc32 = binascii.crc32(struct.pack(self.toserver_fmt, *toserver))
        toserver[1] = crc32

        # convert to bytes
        toserver_pkt = struct.pack(self.toserver_fmt, *toserver)

        # and XOR with the IV
        toserver_pkt = ''.join([chr(p^i)
                        for p,i in itertools.izip(
                                itertools.imap(ord, toserver_pkt),
                                itertools.imap(ord, itertools.cycle(iv)))])

        return toserver_pkt
 def multiplex_neighbours(self,vertex_object,layer=None):
     'Returns an iterator of vertices in layer, that are multiplex neighbours of vertex_object.'
     
     
     #define helper functions, necessary as using a lambda function would disabkle pickling of objects later ...
     def ret_multiplex_citation_key(x):
         return self._multiplex_citation[x].keys()
     
     def ret_multiplex_collab_key(x):
         return self._multiplex_collab[x].keys()
     
     
     if layer==None:
         print "###################################"
         print "Specify start_layer of mapping first!"
         print "USE layer='collab' OR layer='citation'"
         print "####################################"
         return
             
     if layer=='collab':
         multiplex_neighbours_TMP=itertools.imap(ret_multiplex_citation_key,self._multiplex_collab[vertex_object].keys())
         multiplex_neighbours=itertools.chain.from_iterable(multiplex_neighbours_TMP)
         return multiplex_neighbours
     
     if layer=='citation':
         multiplex_neighbours_TMP=itertools.imap(ret_multiplex_collab_key,self._multiplex_citation[vertex_object].keys())
         multiplex_neighbours=itertools.chain.from_iterable(multiplex_neighbours_TMP)
         return multiplex_neighbours
Example #5
0
    def yield_sequences_in_list(paths):
        """
        Yield the discrete sequences within paths.  This does not try to
        determine if the files actually exist on disk, it assumes you already
        know that.

        :param paths: a list of paths
        :rtype: generator
        """
        seqs = {}
        _check = DISK_RE.match

        for match in ifilter(None, imap(_check, imap(utils.asString, paths))):
            dirname, basename, frame, ext = match.groups()
            if not basename and not ext:
                continue
            key = (dirname, basename, ext)
            seqs.setdefault(key, set())
            if frame:
                seqs[key].add(frame)

        for (dirname, basename, ext), frames in seqs.iteritems():
            # build the FileSequence behind the scenes, rather than dupe work
            seq = FileSequence.__new__(FileSequence)
            seq._dir = dirname or ''
            seq._base = basename or ''
            seq._ext = ext or ''
            if frames:
                seq._frameSet = FrameSet(set(imap(int, frames))) if frames else None
                seq._pad = FileSequence.getPaddingChars(min(imap(len, frames)))
            else:
                seq._frameSet = None
                seq._pad = ''
            seq.__init__(str(seq))
            yield seq
Example #6
0
    def test_imap(self):
        import itertools

        obj_list = [object(), object(), object()]
        it = itertools.imap(None, obj_list)
        for x in obj_list:
            assert it.next() == (x, )
        raises(StopIteration, it.next)

        it = itertools.imap(None, [1, 2, 3], [4], [5, 6])
        assert it.next() == (1, 4, 5)
        raises(StopIteration, it.next)

        it = itertools.imap(None, [], [], [1], [])
        raises(StopIteration, it.next)

        it = itertools.imap(str, [0, 1, 0, 1])
        for x in ['0', '1', '0', '1']:
            assert it.next() == x
        raises(StopIteration, it.next)

        import operator
        it = itertools.imap(operator.add, [1, 2, 3], [4, 5, 6])
        for x in [5, 7, 9]:
            assert it.next() == x
        raises(StopIteration, it.next)
Example #7
0
def prefix_filter_flowgrams(flowgrams, squeeze=False):
    """Filters flowgrams by common prefixes.

    flowgrams: iterable source of flowgrams

    squeeze: if True, collapse all poly-X to X

    Returns prefix mapping.
    """

    # collect flowgram sequences
    if squeeze:
        seqs = imap(
            lambda f: (f.Name, squeeze_seq(str(f.toSeq(truncate=True)))),
            flowgrams)
    else:
        seqs = imap(lambda f: (f.Name, str(f.toSeq(truncate=True))), flowgrams)
    # equivalent but more efficient than
    #seqs = [(f.Name, str(f.toSeq(truncate=True))) for f in flowgrams]

    # get prefix mappings
    mapping = build_prefix_map(seqs)
    l = len(mapping)
    orig_l = sum([len(a) for a in mapping.values()]) + l

    return (l, orig_l, mapping)
Example #8
0
def secure_compare(a,b):
    """Return 'a == b', but try not to leak timing information about the
    arguments. In the event that the length of the two strings are not
    equal, we leak the length of the right argument, b.
    """
    retval = True

    if not (isinstance(a, bytes) & isinstance(b, bytes) \
            & (a is not b) \
            & (len(a) != 0) & (len(b) != 0)):
        raise TypeError('Arguments must be distinct bytes objects with nonzero length')

    # copy b to a if the lengths of a and b are unequal
    retval &= len(a) == len(b)
    # some gymnastics we have to do because of small integer caching
    new_a = [None] * len(b)
    a_mask = -long(retval)
    b_mask = ~a_mask
    for i in xrange(len(new_a)):
        # It's conceivable that the pattern of memory accesses here may leak
        # information about the length of a. However, I belive that this is
        # unlikely
        new_a[i] = ord(a[i & a_mask]) & a_mask \
                   | ord(b[i & b_mask]) & b_mask
    a = ''.join(imap(chr, new_a))
    del new_a, b_mask, a_mask, i

    retval &= (reduce(or_, imap(xor, imap(ord, a),
                                     imap(ord, b)), 0L) == 0L)
    return retval
Example #9
0
def get_posts_tags(subscribers, object_list, feed, tag_name):
	'''Adds a qtags property in every post object in a page.
		Use "qtags" instead of "tags" in templates to avoid unnecesary DB hits.'''

	tagd = dict()
	user_obj = None
	tag_obj = None
	tags = models.Tag.objects.extra(
		select=dict(post_id='{0}.{1}'.format(
			*it.imap( connection.ops.quote_name,
				('feedjack_post_tags', 'post_id') ) )),
		tables=['feedjack_post_tags'],
		where=[
		'{0}.{1}={2}.{3}'.format(*it.imap( connection.ops.quote_name,
			('feedjack_tag', 'id', 'feedjack_post_tags', 'tag_id') )),
		'{0}.{1} IN ({2})'.format(
			connection.ops.quote_name('feedjack_post_tags'),
			connection.ops.quote_name('post_id'),
			', '.join([str(post.id) for post in object_list]) ) ] )

	for tag in tags:
		if tag.post_id not in tagd: tagd[tag.post_id] = list()
		tagd[tag.post_id].append(tag)
		if tag_name and tag.name == tag_name: tag_obj = tag

	subd = dict()
	for sub in subscribers: subd[sub.feed.id] = sub
	for post in object_list:
		if post.id in tagd: post.qtags = tagd[post.id]
		else: post.qtags = list()
		post.subscriber = subd[post.feed.id]
		if feed == post.feed: user_obj = post.subscriber

	return user_obj, tag_obj
def prepare_update(state, neighborhood, rule):
    '''
    state should be a 1D numpy array.
    neighborhood should be a function that takes the automata length and an
    index and returns a list of indices.
    rule should be a hashmap which maps each initial condition, as a 1D numpy
    array, to the correct final condition, which is a single boolean value.
    returns an iterable.
    '''

    # needed for iterating through the state array.
    l = len(state)
    # convenience function for nx_generator objects
    if hasattr(neighborhood, 'set_length'):
        neighborhood.set_length(l)

    # generate neighborhood selections for each index of state
    #n = imap(neighborhood, range(0,l)) # e.g. neighborhood(0)
    # apply each slice to state
    #o = imap(state.__getitem__, n) # e.g. state[neighborhood(0)]
    # convert each numpy array into an immutable tuple
    #p = imap(tuple, o) # e.g. tuple(state[neighborhood(0)])
    # determine the new state for each of the state slices
    #q = imap(rule.__getitem__, p) # e.g. rule[tuple(state[neighborhood(0)])]

    #return q

    # comments above left for readability, all of it strung together for speed:
    return imap(rule.__getitem__, imap(tuple, imap(state.__getitem__, imap(neighborhood, range(0,l)))))
Example #11
0
    def process_training_set(self, training_set):
        input_set, output_set = imap(numpy.asarray, izip(*training_set))  # convert training set to numpy array ...
        self.input_normalization, self.output_normalization = imap(self.normalization_class, (input_set, output_set))

        normalized_input_set = self.input_normalization.normalize(input_set)
        normalized_output_set = self.output_normalization.normalize(output_set)
        return normalized_input_set, normalized_output_set
Example #12
0
 def navigate(self, dest):
     """Find a path from current location to the given destination."""
     # Make our node objects for A*.
     rospy.loginfo("POSE: %f, %f" %(self.pose.x, self.pose.y))
     start = Landmark(name="START", ltype="END", x=self.pose.x, y=self.pose.y)
     goal = Landmark(name="GOAL", ltype="END", x=dest.x, y=dest.y)
     # Find visible neighbors of start and goal to add as edges to the graph.
     landmarks = imap(lambda pair: pair[0], self.landmark_graph.itervalues())
     # Here we manually add the goal to the candidate landmarks for start
     # to allow for navigation directly to the goal location.
     start_zone = self.find_nearest_visibles(start, chain(landmarks, [goal]))
     # We have to add start here for the empty zone check below.
     landmarks = imap(lambda pair: pair[0], self.landmark_graph.itervalues())
     goal_zone = self.find_nearest_visibles(goal, chain(landmarks, [start]))
     rospy.loginfo("Start zone: %s" % (", ".join(l.name for l in start_zone)))
     rospy.loginfo("Goal zone: %s" % (", ".join(l.name for l in goal_zone)))
     if not start_zone or not goal_zone:
         rospy.logerr("Cannot connect start and goal! A* failed.")
         return [];
     # A* functions.
     is_goal = lambda node: node.name == "GOAL"
     heuristic = lambda node: point_distance(node, goal)
     def neighbors(node):
         if node.name == "START":
             return start_zone
         nbrs = self.landmark_graph[node.name][1]
         if node in goal_zone:
             # Intentionally use list concat to make a copy of the list.
             # If we just modify nbrs, it will modify the original graph.
             return nbrs + [goal]
         return nbrs
     return a_star(start, is_goal, neighbors, point_distance, heuristic)
Example #13
0
def init_env(env_ext=dict()):
	global global_env, macro_table, symbol_table
	symbol_table = dict()

	global_env = add_globals(Env())
	global _quote, _if, _set, _define, _lambda, _begin, _definemacro
	_quote, _if, _set, _define, _lambda, _begin, _definemacro = it.imap(
		Sym, ['quote', 'if', 'set', 'define', 'lambda', 'begin', 'define-macro'] )
	global _quasiquote, _unquote, _unquotesplicing
	_quasiquote, _unquote, _unquotesplicing = it.imap(
		Sym, ['quasiquote', 'unquote', 'unquote-splicing'] )
	global _append, _cons, _let, quotes
	_append, _cons, _let = it.imap(Sym, ['append', 'cons', 'let'])
	quotes = {"'":_quote, '`':_quasiquote, ',':_unquote, ',@':_unquotesplicing}

	macro_table = {_let:let}

	peval('''(begin
	(define-macro and (lambda args
		(if (null? args) #t
			(if (= (length args) 1) (car args)
				`(if ,(car args) (and ,@(cdr args)) #f)))))
	(define-macro or (lambda args
		(if (null? args) #f
			(if (= (length args) 1) (car args)
				`(if ,(car args) ,(car args) (or ,@(cdr args)))))))
	)''')

	for sym,val in env_ext.iteritems(): global_env[Sym(sym)] = val
    def map_colors(self, colors, cmap=None, lut=None, mode='hexs', **norm_kw):
        """return a list of rgb tuples/hexs from color numbers.

            - colors: a seq of color numbers.
            - cmap: a Colormap or a name like 'jet' (passto cm.get_cmap(cmap, lut)
            - mode: one of  ['hexs', 'tuples', 'arrays']

        Ref: http://www.scipy.org/Cookbook/Matplotlib/Show_colormaps

        from: http://nullege.com/codes/show/src%40p%40y%40pycogent-HEAD%40cogent%40draw%40multivariate_plot.py/6/matplotlib.colors.Colormap/python
        """
        modes = ['hexs', 'tuples', 'arrays']
        if mode not in modes:
            raise ValueError('mode must be one of %s, but got %s'
            % (modes, mode))
        if not isinstance(cmap, Colormap):
            cmap = cm.get_cmap(cmap, lut=lut)
        rgba_arrays = cmap(Normalize(**norm_kw)(colors))
        rgb_arrays = rgba_arrays[:, :-1] #without alpha
        if mode == 'arrays':
            return rgb_arrays
        elif mode == 'tuples':
            return list(imap(tuple, rgb_arrays))
        else: # mode == 'hexs':
            return list(imap(rgb2hex, rgb_arrays))
Example #15
0
def main(files):
  if len(files) > 1:
    file_a = open(files[0], 'r')
    file_b = open(files[1], 'r')
  elif len(files) > 0:
    file_a = open(files[0], 'r')
    file_b = None
  else:
    file_a = sys.stdin
    file_b = None

  mem_a = analyse(file_a)
  file_a.close()
  key_space = max(imap(len, mem_a))
  if file_b is None:
    for key in sorted(mem_a):
      v = mem_a[key]
      if v['pss'] != 0 or v['uss'] != 0:
        print '{key:<{width}} {pss:>7} {uss:>7}'.format(key = key,
                                                                  width = key_space,
                                                                  pss = v['pss'],
                                                                  uss = v['uss'])
  else:
    mem_b = analyse(file_b)
    file_b.close()
    report = diff(mem_a, mem_b)
    key_space = max(imap(len, report))
    for key in sorted(report):
      v = report[key]
      if v['pss'] != 0 or v['uss'] != 0:
        print '{dir} {key:<{width}} {pss:>+7} {uss:>+7}'.format(dir = "-|+"[v['dir'] + 1],
                                                                            key = key,
                                                                            width = key_space,
                                                                            pss = v['pss'],
                                                                            uss = v['uss'])
def build_cliff( altitudes, key, start_cliff, end_cliff ):
    keys = [key, (key[0]+1,key[1]), (key[0]+1,key[1]+1), (key[0],key[1]+1)]
    alts = []
    for k in keys:
        # we need to have a full cell and there is none
        if not altitudes.has_key(k):
            return None
        alts.append(altitudes[k])
    deltas=[(abs(alts[(i+1)%4]-alts[i]),i) for i in range(len(alts))]
    good_deltas = filter(lambda x: x[0]>=start_cliff and x[0]<end_cliff, deltas)
    if len(good_deltas)>2:
        print "special case good deltas"
    if len(good_deltas) < 2: # no cliffs found
        # 1 means we are at the end. In that case it should be found from another cliff.
        return None   
    good_deltas.sort(reverse=True)

    idx1=good_deltas[0][1]
    idx2=good_deltas[1][1]
    if alts[idx1]<alts[(idx1+1)%4]:
        idx1,idx2=idx2,idx1

    cliff_line=[divide_by_scalar(imap(add, keys[idx1],keys[(idx1+1)%4]),2.0), 
                divide_by_scalar(imap(add, keys[idx2],keys[(idx2+1)%4]),2.0),]
    return cliff_line
Example #17
0
def urlparse(fh, part=None, query_params=None, decode=False, **kwargs):
    """URLParse"""
    
    _yield_func = lambda x: x
    if query_params and part == 'query':
        if query_params.find(',') == -1:
            _yield_func = lambda x: val.get(query_params, (None,))[0]
        else:
            # Multiple query params specified on command line
            query_params = query_params.split(",")
            _yield_func = lambda x: \
                    [val.get(p, (None,))[0] for p in query_params]
    
    if decode is True:
        for line in imap(lambda x: x.strip(), fh):
            yield unquote_plus(line)
    else:
        for line in imap(lambda x: x.strip(), fh):
            url = urlsplit(line)
            val = {
                "scheme": url.scheme,
                "domain": url.netloc,
                "netloc": url.netloc,
                "path":   url.path,
                "query":  parse_qs(url.query)
            }[part]
            
            yield _yield_func(val)
Example #18
0
    def emit_mac(self):
        """Call this method when all the plaintext has been supplied.
        This method will return any remaining ciphertext chunks and the MAC, concatenated.
        """
        if not self.encrypt_not_decrypt:
            raise KeccakError('This instance is intended for decryption, not encryption')
        if self.last_block is None:
            raise KeccakError('MAC has already been emitted, no further encryption may be performed')
        
        retval = ''
        assert len(self.input_cache) < self.block_size
        encoded_input_cache_len = chr(len(self.input_cache))
        self.input_cache = self.k.pad10star1(self.input_cache, self.block_size*8)
        assert len(self.input_cache) == self.block_size
        assert len(self.last_block) == self.mac_size
        final_ciphertext_block = ''.join(imap(chr, imap(operator.xor, imap(ord, self.input_cache),
                                                                      imap(ord, self.last_block[:self.block_size]))))
        assert len(final_ciphertext_block) == self.block_size
        retval += final_ciphertext_block

        self.last_block = self.k(encoded_input_cache_len + self.input_cache + self.mac_round_byte)
        assert len(self.last_block) == self.mac_size
        retval += self.last_block

        self.last_block = None
        self.input_cache = ''
        assert len(retval) == self.block_size + self.mac_size
        return retval
Example #19
0
    def test_prod_custom_dtype(self):
        """
        Test the ability to provide your own output dtype for a prod.
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [0], [1], [0, 1]]
        idx = 0
        for input_dtype in imap(str, theano.scalar.all_types):
            x = tensor.matrix(dtype=input_dtype)
            for output_dtype in imap(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                # If output_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                upcasted_dtype = scalar.upcast(input_dtype, output_dtype)
                if (output_dtype == upcasted_dtype or
                        (input_dtype in tensor.discrete_dtypes and
                            output_dtype in tensor.continuous_dtypes)
                        ):
                    prod_var = x.prod(dtype=output_dtype, axis=axis)
                    assert prod_var.dtype == output_dtype

                    if "complex" in output_dtype:
                        continue
                    # Check that we can take the gradient
                    grad_var = tensor.grad(prod_var.sum(), x,
                            disconnected_inputs='ignore')
                else:
                    self.assertRaises(TypeError,
                            x.prod, dtype=output_dtype, axis=axis)

                idx += 1
Example #20
0
def owner_cluster(con,cur,nitem=None,reverse=True,nshingle=2,store=False,**kwargs):
    c = Simhash(**kwargs)

    cmd = 'select ownerid,name from owner'
    if reverse:
        cmd += ' order by rowid desc'
    if nitem:
        cmd += ' limit %i' % nitem

    name_dict = {}
    for (i,(ownerid,name)) in enumerate(cur.execute(cmd)):
        words = name.split()
        shings = list(shingle(name,nshingle))

        features = shings + words
        weights = list(np.linspace(1.0,0.0,len(shings))) + list(np.linspace(1.0,0.0,len(words)))

        c.add(features,weights=weights,label=ownerid)
        name_dict[ownerid] = name

        if i%10000 == 0:
            print i

    ipairs = c.unions
    npairs = map(lambda p: map(name_dict.get,p),ipairs)
    print 'Found %i pairs' % len(ipairs)

    if store:
        cur.execute('drop table if exists pair')
        cur.execute('create table pair (ownerid1 int, ownerid2 int, name1 text, name2 text)')
        cur.executemany('insert into pair values (?,?,?,?)',imap(lambda ((o1,o2),(n1,n2)): (o1,o2,n1,n2),izip(ipairs,npairs)))
        con.commit()
    else:
        return ipairs
Example #21
0
def do_join(eval_ctx, value, d=u""):
    """Return a string which is the concatenation of the strings in the
    sequence. The separator between elements is an empty string per
    default, you can define it with the optional parameter:

    .. sourcecode:: jinja

        {{ [1, 2, 3]|join('|') }}
            -> 1|2|3

        {{ [1, 2, 3]|join }}
            -> 123
    """
    # no automatic escaping?  joining is a lot eaiser then
    if not eval_ctx.autoescape:
        return unicode(d).join(imap(unicode, value))

    # if the delimiter doesn't have an html representation we check
    # if any of the items has.  If yes we do a coercion to Markup
    if not hasattr(d, "__html__"):
        value = list(value)
        do_escape = False
        for idx, item in enumerate(value):
            if hasattr(item, "__html__"):
                do_escape = True
            else:
                value[idx] = unicode(item)
        if do_escape:
            d = escape(d)
        else:
            d = unicode(d)
        return d.join(value)

    # no html involved, to normal joining
    return soft_unicode(d).join(imap(soft_unicode, value))
Example #22
0
def levenshtein(a, b, casecost = 1, spacecost = 1, totals = False):
    """Calculates the Levenshtein edit distance between strings a and b. 'casecost' is the cost of replacement when only the case is changed, not the actual character.
    If totals=True, returns total character costs of both strings, in addition to the distance value, as a triple (dist, cost_a, cost_b).
    >>> levenshtein("Ala", "OLa")
    2
    >>> levenshtein("Ala", "OLa", 0.5)
    1.5
    >>> round(levenshtein(" a ala", "aala ", 1, 0.1), 5)
    0.3
    >>> levenshtein(" a ala Ola ", "aalaola ", 1, 2)
    7
    """
    #_a, _b = a,b
    reorder = False
    n, m = len(a), len(b)
    if n < m:                                   # ensure that n >= m ('a' is longer), to speed up calculations (short outer loop); but mem usage is O(max(n,m))
        a,b = b,a
        n,m = m,n
        reorder = True
    
    charcost = lambda c: spacecost if c == ' ' else 1
    isint = util.isint(casecost) and util.isint(spacecost)
    typecode = 'l' if isint else 'd'
    zero = array(typecode, [0])
    zeron = zero * n
    
    try:
        alow = a.lower()
        blow = b.lower()
        acost = array(typecode, imap(charcost, a))
        bcost = array(typecode, imap(charcost, b))
        
        #current = range(n+1)                    
        current = zero + acost                  # initially, current[j] is the total cost of letters in a[:j], for j = 0,1,...,n
        for j in range(2,n+1):
            current[j] += current[j-1]          # 'current' must hold cumulative a[:j] costs rather than single-letter costs
        #print current
        
        # loop invariant: current[j] is the cost of transforming a[:j] into b[:i] 
        for i in range(1,m+1):                  # loop over characters of 'b'
            cur_b, cur_bcost = b[i-1], bcost[i-1]
            previous = current
            current = array(typecode, [previous[0] + cur_bcost]) + zeron
            for j in range(1,n+1):              # loop over characters of 'a'
                add = previous[j] + cur_bcost
                delete = current[j-1] + acost[j-1]
                change = previous[j-1]
                if a[j-1] != cur_b:
                    if alow[j-1] == blow[i-1]: change += casecost
                    else: change += max(cur_bcost, acost[j-1]) #1
                current[j] = min(add, delete, change)
    
    except UnicodeWarning:    
        print "unicode error in levenshtein(%s, %s)" % (repr(a), repr(b))
        raise
    
    if totals: 
        if reorder: return current[n], sum(bcost), sum(acost)
        else: return current[n], sum(acost), sum(bcost)
    return current[n]
Example #23
0
def service_get(env, jones):
    if not jones.exists():
        return redirect(url_for('index'))

    children = jones.get_child_envs(Env.Root)
    is_leaf = lambda child: len(child) and not any(
        c.find(child + '/') >= 0 for c in children)

    try:
        version, config = jones.get_config_by_env(env)
    except NoNodeException:
        return redirect(url_for('services', service=jones.service))

    childs = imap(dict, izip(
        izip(repeat('env'), imap(Env, children)),
        izip(repeat('is_leaf'), imap(is_leaf, children))))

    vals = {
        "env": env,
        "version": version,
        "children": list(childs),
        "config": config,
        "view": jones.get_view_by_env(env),
        "service": jones.service,
        "associations": jones.get_associations(env)
    }
    if request_wants('application/json'):
        return jsonify(vals)
    else:
        return render_template('service.j2', **vals)
Example #24
0
    def parse_now_playing(self, response):
        """Scrapes USA openings this week and top 10 in week"""
        self.log("Parsing USA Top Week")
        hxs = HtmlXPathSelector(response)

        _urljoin = lambda url: self._urljoin(response, url)

        #
        # openings this week
        #
        openings = hxs.select('//table[@class="movies"]//a[@class="title"]')
        boxoffice = hxs.select('//table[@class="boxoffice movies"]//a[@class="title"]')

        opening_titles = openings.select('text()').extract()
        opening_urls = imap(_urljoin, openings.select('@href').extract())

        box_titles = boxoffice.select('text()').extract()
        box_urls = imap(_urljoin, boxoffice.select('@href').extract())

        # items 
        opening_items = (UsaOpeningWeekMovie(title=title, url=url)
                            for (title, url)
                            in izip(opening_titles, opening_urls))

        box_items = (UsaTopWeekMovie(title=title, url=url) 
                        for (title, url)
                        in izip(box_titles, box_urls))

        # movie requests
        requests = imap(self.make_requests_from_url,
                        chain(opening_urls, box_urls))

        return chain(opening_items, box_items, requests)
Example #25
0
    def __init__(self, diff, differ='diff', format='udiff'):
        """
        :param diff:   a text in diff format or generator
        :param format: format of diff passed, `udiff` or `gitdiff`
        """
        if isinstance(diff, basestring):
            diff = [diff]

        self.__udiff = diff
        self.__format = format
        self.adds = 0
        self.removes = 0

        if isinstance(self.__udiff, basestring):
            self.lines = iter(self.__udiff.splitlines(1))

        elif self.__format == 'gitdiff':
            udiff_copy = self.copy_iterator()
            self.lines = itertools.imap(self.escaper,
                                        self._parse_gitdiff(udiff_copy))
        else:
            udiff_copy = self.copy_iterator()
            self.lines = itertools.imap(self.escaper, udiff_copy)

        # Select a differ.
        if differ == 'difflib':
            self.differ = self._highlight_line_difflib
        else:
            self.differ = self._highlight_line_udiff
Example #26
0
    def decrypt(self, m):
        """Decrypt the bytes m and return as much plaintext as is available.
        There may not be plaintext available every time this method is called.
        There is no guarantee about the length of the plaintext compared to the length of the ciphertext.
        Ciphertext chunks must be fed to the decrypt method in the same order that they were produced
            by the encrypt method
        """
        if self.encrypt_not_decrypt:
            raise KeccakError('This instance is intended for encryption, not decryption')
        if self.last_block is None:
            raise KeccakError('MAC has already been verified, no further decryption may be performed')
        if not isinstance(m, bytes):
            raise TypeError("argument must be a bytes")

        self.input_cache += m
        retval = ''

        while len(self.input_cache) > self.block_size+self.mac_size:
            chunk, self.input_cache = self.input_cache[:self.block_size], \
                                      self.input_cache[self.block_size:]
            assert len(self.last_block) == self.mac_size
            plain = ''.join(imap(chr, imap(operator.xor, imap(ord, chunk),
                                                         imap(ord, self.last_block[:self.block_size]))))

            self.last_block = self.k(chr(len(plain))+plain+self.cipher_round_byte)
            retval += plain
            assert len(self.last_block) == self.mac_size
        return retval
Example #27
0
def setup(events, field, period, lookback, flip_signal):
    events = list(events)
    values = [e[field] for e in events]

    direction = None
    flip_dir = flip_signal['direction']

    if flip_dir == flip.BEAR and all(itertools.imap(operator.lt,
                                                    values[lookback:],
                                                    values[:period])):
        direction = BUY

    elif flip_dir == flip.BULL and all(itertools.imap(operator.gt,
                                                      values[lookback:],
                                                      values[:period])):
        direction = SELL

    if not direction:
        return

    bars = events[lookback:]
    lowes = [bar['low'] for bar in bars]
    highs = [bar['high'] for bar in bars]

    high = np.max(highs)
    low = np.max(lowes)

    if direction == BUY:
        perfection = np.min(lowes[-4:-2])
    else:
        perfection = np.max(highs[-4:-2])

    return Signal(direction, high, low, bars, perfection, flip_signal)
Example #28
0
def _gen_batches(idxs, subjects, batch_size):
    '''divide row indicies for deepkt.

    divide indices into batches by subject ids and indices for each subject are
        further divided into sub batches by some minimal size. The first 2 rows of each
        subject are removed by necessity due to the recursive structure of the model

    Args:
        idxs (int[]): row indices
        subjects (int[]): list of subject ids corresponding to each row
            (could also be an EnumColumn). Subject ids must be pre-sorted.
        batch_size: the size of the subject's sub batches

    Returns:
        int[][]: list of batches

    Example:
        >>> _gen_batches(xrange(11), [1] * 6 + [2] * 5, 2)
        [[2, 3], [4, 5], [8, 9]]
    '''
    batches = gen_batches_by_keys(idxs, [subjects])
    batches = imap(lambda idxs: islice(idxs, 2, None), batches)
    sub_batches = imap(lambda idxs: gen_batches_by_size(list(idxs), batch_size), batches)
    batches = chain.from_iterable(sub_batches)
    batches = ifilter(lambda b: b, batches)
    batches = list(batches)
    return batches
Example #29
0
def pipe_sort(context=None, _INPUT=None, conf=None, **kwargs):
    """An operator that sorts the input source according to the specified key.
    Not loopable. Not lazy.

    Parameters
    ----------
    context : pipe2py.Context object
    _INPUT : pipe2py.modules pipe like object (iterable of items)
    kwargs -- other inputs, e.g. to feed terminals for rule values
    conf : {
        'KEY': [
            {
                'field': {'type': 'text', 'value': 'title'},
                'dir': {'type': 'text', 'value': 'DESC'}
            }
        ]
    }

    Returns
    -------
    _OUTPUT : generator of sorted items
    """
    test = kwargs.pop('pass_if', None)
    _pass = utils.get_pass(test=test)
    key_defs = imap(DotDict, utils.listize(conf['KEY']))
    get_value = partial(utils.get_value, **kwargs)
    parse_conf = partial(utils.parse_conf, parse_func=get_value, **kwargs)
    keys = imap(parse_conf, key_defs)
    order = ('%s%s' % ('-' if k.dir == 'DESC' else '', k.field) for k in keys)
    comparers = map(get_comparer, order)
    cmp_func = partial(multikeysort, comparers=comparers)
    _OUTPUT = _INPUT if _pass else iter(sorted(_INPUT, cmp=cmp_func))
    return _OUTPUT
Example #30
0
    def test_prod_without_zeros_custom_dtype(self):
        """
        Test the ability to provide your own output dtype for a ProdWithoutZeros().
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [0], [1], [0, 1]]
        idx = 0
        for input_dtype in imap(str, theano.scalar.all_types):
            x = tensor.matrix(dtype=input_dtype)
            for output_dtype in imap(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                # If output_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                upcasted_dtype = scalar.upcast(input_dtype, output_dtype)
                if (output_dtype == upcasted_dtype or
                        (input_dtype in tensor.discrete_dtypes and
                            output_dtype in tensor.continuous_dtypes)
                        ):
                    prod_woz_var = ProdWithoutZeros(
                            axis=axis, dtype=output_dtype)(x)
                    assert prod_woz_var.dtype == output_dtype
                else:
                    self.assertRaises(TypeError,
                            ProdWithoutZeros(axis=axis, dtype=output_dtype),
                            x)

                idx += 1
 def led_contents(i):
     pmin = float(i) / num_leds * model.position_count
     pmax = pmin + float(model.position_count) / num_leds
     return any(
         imap(model.contents, model.contents_range(pmin, pmax)))
Example #32
0
 def itervalues(self):
     return imap(self.get, self._keys)
Example #33
0
def text_shingle(n, value):
    return itertools.imap(
        u''.join,
        shingle(n, value),
    )
Example #34
0
 def all(self):
     return itertools.imap(Person, database.iter_people())
 def sort_uniq(sequence):
     return itertools.imap(
         operator.itemgetter(0),
         itertools.groupby(sorted(sequence)))
Example #36
0
 def all(self):
     return itertools.imap(Event, database.iter_events())
Example #37
0
def find_frequent_itemsets(dataset,
                           min_support,
                           min_bad_rate,
                           include_support=True):
    """
    Find frequent itemsets in the given loans using FP-growth. This
    function returns a generator instead of an eagerly-populated list of items.

    The `dataset` parameter can be any iterable of iterables of items.
    `min_support` should be an integer specifying the minimum number of
    occurrences of an itemset for it to be accepted.

    Each item must be hashable (i.e., it must be valid as a member of a
    dictionary or a set).

    If `include_support` is true, yield (itemset, support) pairs instead of
    just the itemsets.

    Parameters
    ----------
    dataset : list
        The dataset (a list of transactions) from which to generate 
        candidate itemsets.

    min_support : interger
        The minimum support threshold. Defaults to None.

    include_support : bool
        Include support in output (default=False).

    """
    items = defaultdict(lambda: 0)  # mapping from items to their supports
    processed_transactions = []

    # Load the passed-in transactions and count the support that individual
    # items have.
    for transaction in dataset:
        processed = []
        for item in transaction:
            items[item] += 1
            processed.append(item)
        processed_transactions.append(processed)

    # Remove infrequent items from the item support dictionary.
    items = dict((item, support) for item, support in items.iteritems()
                 if support >= min_support)

    # Build our FP-tree. Before any transactions can be added to the tree, they
    # must be stripped of infrequent items and their surviving items must be
    # sorted in decreasing order of frequency.
    def clean_transaction(transaction):
        transaction = filter(lambda v: v in items, transaction)
        transaction.sort(key=lambda v: items[v], reverse=True)
        return transaction

    master = FPTree()
    for transaction in imap(clean_transaction, processed_transactions):
        master.add(transaction)

    support_data = {}

    def find_with_suffix(tree, suffix):
        for item, nodes in tree.items():
            support = sum(n.count for n in nodes)
            #support = float(sum(n.count for n in nodes)) / len(dataset)
            if support >= min_support and item not in suffix:
                # New winner!
                found_set = [item] + suffix
                #print found_set

                support_data[frozenset(found_set)] = support
                yield (found_set, support) if include_support else found_set

                # Build a conditional tree and recursively search for frequent
                # itemsets within it.
                cond_tree = conditional_tree_from_paths(
                    tree.prefix_paths(item), min_support)
                for s in find_with_suffix(cond_tree, found_set):
                    yield s  # pass along the good news to our caller

    # Search for frequent itemsets, and yield the results we find.
    for itemset in find_with_suffix(master, []):
        yield itemset
Example #38
0
 def map_async(self, func, args):
     from itertools import imap
     for _ in imap(func, args):
         pass
Example #39
0
 def __repr__(self, repr=repr):
     return ' -> '.join(imap(repr, self.maps))
Example #40
0
def tienepar1(lista):
    return any(imap(lambda x: x % 2 == 0, lista))
Example #41
0
    def xrefs_from(self):
        """Xrefs from this line.

        :return: Xrefs as `sark.code.xref.Xref` objects.
        """
        return imap(Xref, idautils.XrefsFrom(self.ea))
Example #42
0
 def __len__(self, len=len, sum=sum, imap=imap):
     return sum(imap(len, self.maps))
Example #43
0
    def versions(self):
        """
        Extract the external names and versions from an installed LCGCMT.

        @return: dictionary mapping external names to versions
        """
        from itertools import imap
        def statements(lines):
            """
            Generator of CMT statements from a list of lines.
            """
            statement = "" # we start with an empty statement
            for l in imap(lambda l: l.rstrip(), lines): # CMT ignores spaces at the end of line when checking for '\'
                # append the current line to the statement so far
                statement += l
                if statement.endswith("\\"):
                    # in this case we need  to strip the '\' and continue the concatenation
                    statement = statement[:-1]
                else:
                    # we can stop concatenating, but we return only non-trivial statements
                    statement = statement.strip()
                    if statement:
                        yield statement
                        statement = "" # we start collecting a new statement

        def tokens(statement):
            """
            Split a statement in tokens.

            Trivial implementation assuming the tokens do not contain spaces.
            """
            return statement.split()

        def macro(args):
            """
            Analyze the arguments of a macro command.

            @return: tuple (name, value, exceptionsDict)
            """
            unquote = lambda s: s.strip('"')
            name = args[0]
            value = unquote(args[1])
            # make a dictionary of the even to odd remaining args (unquoting the values)
            exceptions = dict(zip(args[2::2],
                                  map(unquote, args[3::2])))
            return name, value, exceptions

        # prepare the dictionary for the results
        versions = {}
        # We extract the statements from the requirements file of the LCG_Configuration package
        req = open(os.path.join(self.lcgcmt_root, "LCG_Configuration", "cmt", "requirements"))
        for toks in imap(tokens, statements(req)):
            if toks.pop(0) == "macro": # get only the macros ...
                name, value, exceptions = macro(toks)
                if name.endswith("_config_version"): # that end with _config_version
                    name = name[:-len("_config_version")]
                    name = self.__special_names__.get(name, name)
                    for tag in ["target-slc"]: # we use the alternative for 'target-slc' if present
                        value = exceptions.get(tag, value)
                    versions[name] = value.replace('(', '{').replace(')', '}')
        return versions
Example #44
0
def inv_dict(d):
    return dict(imap(reversed, d.iteritems()))
Example #45
0
def fio_cfg_compile(source, fname, test_params):
    it = parse_all_in_1(source, fname)
    it = (apply_params(sec, test_params) for sec in it)
    it = flatmap(process_cycles, it)
    it = flatmap(process_repeats, it)
    return itertools.imap(finall_process, it)
Example #46
0
def power_of(x):
    """Generator returning powers of the provided number (fastest)
    """
    pow_of_x = partial(pow, x)
    return imap(pow_of_x, count())
Example #47
0
 def get_release_type(self, version):
     if version in imap(lambda f: f.version, self.stable_firmwares):
         return 'stable'
     if version in imap(lambda f: f.version, self.dev_firmwares):
         return 'dev'
     return 'unknown'
Example #48
0
def mosaicify(target, sources, tiles=32, zoom=1):
    """Create mosaic of photos.

    The function wraps all process of the creation of a mosaic, given
    the target, the list of source images, the number of tiles to use
    per side, the zoom level (a.k.a.  how large the mosaic will be), and
    finally if we want to display the output on screen or dump it on
    a file.

    First, open the target image, divide it into the specified number of
    tiles, and store information about the tiles average color. In
    order to reduce the amount of used memory, we will free the *blobs*
    associated to each processed image, as soon as possible, aka inside
    the ``postfunc`` function.

    Then, index all the source images by color. Given that we are aware
    about the size and the ratio of the tiles of the target, we can use
    the ``prefunc`` to reduce the dimension of the image; consequently
    the amount of computations needed to compute the average color will
    smaller. Moreover, as in the previous paragraph, there is no need to
    keep into processed images, hence we are going to use the
    ``postfunc`` method to delete them.

    Finally, for each tile extracted from the target image, we need to
    find the most similar contained inside the list of source images,
    and paste it in the right position inside the mosaic image.

    When done, show the result on screen or dump it on the disk.

    """
    # Load the target image into memory
    mosaic = ImageWrapper(filename=target)

    # Generate the list of rectangles identifying mosaic tiles
    (original_width, original_height) = mosaic.size
    rectangles = list(lattice(original_width, original_height, tiles))

    # Compute the size of the tiles after the zoom factor has been applied
    (zoomed_tile_width, zoomed_tile_height) = (zoom * original_width // tiles,
                                               zoom * original_height // tiles)

    # Initialize the pool of workers
    workers = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(workers)

    # Load tiles into memory and resize them accordingly
    source_tiles = dict(
        itertools.izip(
            sources,
            load_raw_tiles(sources, mosaic.ratio,
                           (zoomed_tile_width, zoomed_tile_height), pool,
                           workers)))

    # Indicize all the source images by their average color
    source_list = ImageList(source_tiles.values())

    # Compute the average color of each mosaic tile
    mosaic_avg_colors = list(
        extract_average_colors(mosaic, rectangles, pool, workers))

    # Find which source image best fits each mosaic tile
    best_matching_imgs = list(
        search_matching_images(source_list, mosaic_avg_colors, pool, workers))

    # Shut down the pool of workers
    pool.close()
    pool.join()

    # Apply the zoom factor
    (zoomed_width, zoomed_height) = (tiles * zoomed_tile_width,
                                     tiles * zoomed_tile_height)
    mosaic.resize((zoomed_width, zoomed_height))
    rectangles = list(lattice(zoomed_width, zoomed_height, tiles))

    return Mosaic(
        mosaic,
        itertools.izip(rectangles,
                       itertools.imap(source_tiles.get, best_matching_imgs)))
Example #49
0
 def itervalues(self):
     return itertools.imap(self.__getitem__, reversed(self._o))
Example #50
0
def parmap(fun,seq,N=None,Nt=1,chunksize=1,ordered=True,\
                daemon=False,progress=False,
                args=(),kwargs=None,
                star=False,kwstar=False,
                exception=None):
    """
    parmap -- Simple parallel mapper that can split amongst processes (N)
              and threads (Nt) (within the processes).

              Does *NOT* require functions to be pickleable (unlike
              vanilla multiprocess.Pool.map)

    Inputs:
    -------
    fun
        Single input function. Use lambdas or functools.partial
        to enable/exapnd multi-input. See example

    seq
        Sequence of inputs to map in parallel

    Options:
    --------
    N [None] (integer or None)
        Number of processes to use. If `None`, will use the CPU_COUNT

    Nt [1] (integer)
        Number of threads to use. See notes below on multi-threaded vs
        multi-processes.

    chunksize [1] (int)
        How to be break up the incoming sequence. Useful if also using threads.
        Will be (re)set to max(chunksize,Nt). 
        
        Alternativly, if len(seq) exists and chunksize=-1 it will be reset
        to ceil(len(seq)/(N*Nt)). If chunksize=-1 and len(sequence) is not
        known, a warning will be emitted and chucksize will be reset to 
        max(chunksize,Nt)
        

    ordered [True] (bool)
        Whether or not to order the results. If False, will return in whatever
        order they finished.

    daemon [False] (bool)
        Sets the multiprocessing `daemon` flag. If  True, can not spawn child
        processes (i.e. cannot nest parmap) but should allow for CTRL+C type
        stopping. Supposedly, there may be issues with CTRL+C with it set to
        False. Use at your own risk

    progress [False] (bool)
        Display a progress bar or counter.
        Warning: Inconsistant in iPython/Jupyter notebooks and may clear
        other printed content. Instead, specify as 'nb' to use a Jupyter 
        Widget progress bar.
    
    args [tuple()]
        Specify additional arguments for the function
    
    kwargs [dict()]
        Specify additional keyword arguments

    star [False]
        If True, the arguments to the function will be "starred" so, for example
        if `seq = [ (1,2), (3,4) ]`, the function will be called as
            star is False: fun((1,2))
            star is True:  fun(1,2) <==> fun(*(1,2))
        Can also set to None to not send anything
        
    kwstar [False]
        Assumes all items are (vals,kwvals) where `vals` RESPECTS `star` 
        setting and still includes `args` and `kwvals`. See "Additional 
        Arguments" section below.
    
    exception ['raise' if N>1 else 'proc']
        Choose how to handle an exception in a child process
        
        'raise'     : [Default] raise the exception (outside of the Process). 
                      Also terminates all existing processes.
        'return'    : Return the Exception instead of raising it.
        'proc'      : Raise the exception inside the process. NOT RECOMMENDED
                      unless used in debugging (and with N=1)
        
        Note: An additional attribute called `seq_index` will also be set
              in the exception (whether raised or returned) to aid in debugging.
        
    Additional Arguments
    --------------------
    As noted above, there are many ways to pass additional arguments to
    your function. All of these are not completely needed since parmap
    makes using lambdas so easy, but they are there if preffered.
    
    Assume the following function:
    
        def dj(dictA,dictB):
            '''Join dictA and dictB where dictB takes precedence'''
            dictA = dictA.copy()
            dictA.update(dictB) # NOTE: dictB takes precedence
            return dictA

    Then the behavior is as follows where `args` and `kwargs` come from
    they main function call. The `val` (singular), `vals` (sequence/tuple of 
    values), and `kwvals` are set via the sequence.
    
    | star  | kwstar | expected item | function args  | function keywords   |
    |-------|--------|---------------|----------------|---------------------|
    | False | False  | val           | *((val,)+args) | **kwargs            |†
    | True  | False  | vals          | *(vals+args)   | **kwargs            |
    | None  | False  | ---           | *args          | **kwargs            |°
    | None  | True   | ---           | *args          | **dj(kwargs,kwvals) |‡
    | False | True   | val,kwval     | *((val,)+args) | **dj(kwargs,kwvals) |‡
    | True  | True   | vals,kwval    | *(vals+args)   | **dj(kwargs,kwvals) |‡
                                                        
                † Default
                ° If kwargs and args are empty, basically calls with nothing
                ‡ Note the ordering so kwvals takes precedance

    Note:
    ------
    Performs SEMI-lazy iteration based on chunksize. It will exhaust the input
    iterator but will yield as results are computed (This is similar to the
    `multiprocessing.Pool().imap` behavior)

    Explicitly wrap the parmap call in a list(...) to force immediate
    evaluation

    Threads and/or processes:
    -------------------------
    This tool has the ability to split work amongst python processes
    (via multiprocessing) and python threads (via the multiprocessing.dummy
    module). Python is not very performant in multi-threaded situations
    (due to the GIL) therefore, processes are the usually the best for CPU
    bound tasks and threading is good for those that release the GIL (such
    as IO-bound tasks). 
    
    WARNING: Many NumPy functions *do* release the GIL and can be threaded, 
             but many NumPy functions are, themselves, multi-threaded.

    Alternatives:
    -------------

    This tool allows more data types, can split with threads, has an optional
    progress bar, and has fewer pickling issues, but these come at a small cost. 
    For simple needs, the following may be better:

    >>> import multiprocessing as mp
    >>> pool = mp.Pool(N) # Or mp.Pool() for N=None
    >>> results = list( pool.imap(fun,seq) ) # or just pool.map
    >>> pool.close()
    
    Additional Note
    ---------------
    For the sake of convienance, a `map=imap=__call__` and
    `close = lamba *a,**k:None` are also added so a parmap function can mimic
    a multiprocessing pool object with duck typing

    Version:
    -------
    __version__
    
    """

    # Build up a dummy function with args,vals,kwargs, and kwvals
    if kwargs is None:
        kwargs = {}

    def _fun(ss):
        _args = list(args)
        _kw = kwargs.copy()
        try:
            # Check for None before boolean
            if star is None and kwstar:  # 4
                _kw.update(ss)
            elif star is None and not kwstar:  # 3
                pass
            elif not star and not kwstar:  # 1
                _args = [ss] + _args
            elif star and not kwstar:  # 2
                _args = list(ss) + _args
            elif not star and kwstar:  # 5
                _args = [ss[0]] + _args
                _kw.update(ss[1])
            elif star and kwstar:  # 6
                _args = list(ss[0]) + _args
                _kw.update(ss[1])
            else:
                raise TypeError()

        except TypeError:  # Mostly because bad input types
            return _Exception(
                TypeError('Ensure `args` are tuples and `kwargs` are dicts'),
                infun=False)
        except Exception as E:
            return _Exception(E, infun=False)

        if exception == 'proc':
            return fun(*_args, **_kw)  # Outside of a try
        try:
            return fun(*_args, **_kw)
        except Exception as E:
            return _Exception(E)
            # It would be great to include all of sys.exc_info() but tracebacks
            # cannot be pickled.

    try:
        tot = len(seq)
    except TypeError:
        tot = None

    N = CPU_COUNT if N is None else N

    if exception is None:
        exception = 'raise' if N > 1 else 'proc'

    if chunksize == -1:
        if tot is None:
            warnings.warn(
                'chunksize=-1 does not work when len(seq) is not known')
        else:
            chunksize = math.ceil(tot / (N * Nt))
    chunksize = max(chunksize, Nt)  # Reset

    # Consider resetting N
    if tot is not None:
        N = min(N, tot // chunksize)

    # Build a counter iterator based on settings and tqdm
    if tqdm is None:
        if   isinstance(progress,(str,unicode))\
         and progress.lower() in ['jupyter','notebook','nb']:
            counter = partial(_counter_nb, tot=tot)
        else:
            counter = partial(_counter, tot=tot)
    else:
        if   isinstance(progress,(str,unicode))\
         and progress.lower() in ['jupyter','notebook','nb']\
         and hasattr(tqdm,'tqdm_notebook'):
            counter = partial(tqdm.tqdm_notebook, total=tot)
        else:
            counter = partial(
                tqdm.tqdm,
                total=tot)  # Set the total since tqdm won't be able to get it.

    # Handle N=1 without any multiprocessing
    if N == 1:
        if Nt == 1:
            out = imap(_fun, seq)
        else:
            pool = mpd.Pool(Nt)  # thread pools don't have the pickle issues
            out = pool.imap(_fun, seq)

        if progress:
            out = counter(out)
        for count, item in enumerate(out):
            if isinstance(item, _Exception):
                item.E.seq_index = count
                if not item.infun:
                    exception = 'raise'  # reset
                if exception == 'raise':
                    raise item.E
                elif exception == 'return':
                    item = item.E
                elif exception == 'proc':
                    pass
                else:
                    raise ValueError(
                        "Unrecognized `exception` setting '{}'".format(
                            exception))
            yield item

        if Nt > 1:
            pool.close()
        return

    q_in = mp.JoinableQueue(
    )  # Will need to `join` later to make sure is empty
    q_out = mp.Queue()

    # Start the workers
    workers = [
        mp.Process(target=_worker, args=(_fun, q_in, q_out, Nt))
        for _ in range(N)
    ]
    for worker in workers:
        worker.daemon = daemon
        worker.start()

    # Create a separate thread to add to the queue in the background
    def add_to_queue():
        for iixs in _iter_chunks(enumerate(seq), chunksize):
            q_in.put(iixs)

        # Once (if ever) it is exhausted, send None to close workers
        for _ in xrange(N):
            q_in.put(None)

    add_to_queue_thread = Thread(target=add_to_queue)
    add_to_queue_thread.start()

    # Define a generator that will pull from the q_out and then run through
    # the rest of our generator/iterator chain for progress and ordering
    def queue_getter():
        finished = 0
        count = 0
        while finished < N:
            out = q_out.get()
            if out is None:
                finished += 1
                continue
            yield out

    # Chain generators on output
    out = queue_getter()
    if progress:
        out = counter(out)

    if ordered:
        out = _sort_generator_unique_integers(out, key=lambda a: a[0])

    # Return items
    for item in out:
        count = item[0]
        item = item[1]
        if isinstance(item, _Exception):
            item.E.seq_index = count
            if not item.infun:
                exception = 'raise'  # reset

            if exception == 'raise':
                for worker in workers:
                    worker.terminate()
                raise item.E
            elif exception == 'return':
                item = item.E
            elif exception == 'proc':
                pass
            else:
                for worker in workers:
                    worker.terminate()
                raise ValueError(
                    "Unrecognized `exception` setting '{}'".format(exception))
        yield item

    # Clean up threads and processes. Make sure the queue is exhausted
    add_to_queue_thread.join()  # Make sure we've exhausted the input
    q_in.join()  # Make sure there is nothing left in the queue
    for worker in workers:
        worker.join()  # shut it down
Example #51
0
 def test_mfd2eml(self):
     self.three_argument_test(pm3_mfd2eml.main,
             imap(reversed, self.EML2MFD_TESTCASES), c14n=hex_c14n)
Example #52
0
 def total(self):
     '''
     Returns sum of all counts in all features that are multisets.
     '''
     feats = imap(lambda name: self[name], self._counters())
     return sum(chain(*map(lambda mset: map(abs, mset.values()), feats)))