def __init__(self): self.subweight_shapes = [ (16 , 1 , 5, 5), (16 , 16 , 5, 5), (16 , 4*4*16 ), (10 , 16 ) ] self.subweight_offsets = [ sum(prod(shape) for shape in self.subweight_shapes[:depth]) for depth in range(len(self.subweight_shapes)+1) ] self.subweight_scales = [ (shape[0] + prod(shape[1:]))**(-0.5) for shape in self.subweight_shapes ] self.weights = torch.cat([ torch.randn(prod(shape), requires_grad=True) for shape in self.subweight_shapes ]) self.get_subweight = lambda depth: ( self.subweight_scales[depth] * self.weights[self.subweight_offsets[depth]: self.subweight_offsets[depth+1]] .view(self.subweight_shapes[depth]) )
def compute(): """ Start in the top left corner of the matrix Go from left to right and (if possible) check products down, right, and diagonally (left and right) We don't need to check left, since the right product will catch all products which would be caught with left """ filename = "p011.txt" # convert to list of lists with open(filename) as f: matrix = [list(map(int, line.split())) for line in f] max_p = 0 for r in range(len(matrix) - 4): for c in range(len(matrix)): down = prod((matrix[r - i][c] for i in range(4))) if c >= 3: left_diag = prod((matrix[r + i][c - i] for i in range(4))) if c <= 16: right = prod((matrix[r][c + i] for i in range(4))) right_diag = prod((matrix[r + i][c + i] for i in range(4))) if c >= 3 and c <= 16: m = max((down, left_diag, right, right_diag)) elif c < 3: m = max((down, right, right_diag)) elif c > 16: m = max((down, left_diag)) if m > max_p: max_p = m return max_p
def __init__(self, observations_shape, action_shape): super(DQN, self).__init__() self.linearised = prod(observations_shape) + prod(action_shape) self.lin1 = nn.Linear(self.linearised, self.linearised // 3) self.lin2 = nn.Linear(self.linearised // 3, self.linearised // 2) self.lin3 = nn.Linear(self.linearised // 2, prod(action_shape))
def __init__(self, observation_shape, action_shape): super(PolicyNet, self).__init__() # Calculates the dimension of the shape observation_shape = prod(observation_shape) self.lin1 = nn.Linear(observation_shape, observation_shape // 3) self.lin2 = nn.Linear(observation_shape // 3, observation_shape // 2) self.lin3 = nn.Linear(observation_shape // 2, prod(action_shape))
def Dkl(matrix, nu, lamb): p_matrix = [normalize([exp(-lamb * ep) for ep in eps]) for eps in matrix] term0 = nu * prod( sum(exp(ep) * p for ep, p in zip(eps, p_eps)) for eps, p_eps in zip(matrix, p_matrix)) term1 = lamb * prod( sum(ep * p for ep, p in zip(eps, p_eps)) for eps, p_eps in zip(matrix, p_matrix)) L = len(matrix) term2 = -log(Zb_from_matrix(matrix, 4**L)) return term0 + term1 + term2
def __init__(self, pp, logger, frepshape): """ Manual gradients """ self.name = "SinghNet" self.grid_inp = pp['singh_grid'] self.frepshape = frepshape if self.grid_inp: self.wdim = prod(frepshape) + prod([7, 7, 2 * pp['n_channels']]) else: self.wdim = prod(frepshape) super().__init__(name=self.name, pp=pp, logger=logger)
def counts_to_cols(counts): """return numer of cols associated given counts""" N = sum(counts) # all_cols = 4**N metacounts = Counter(counts) counts_to_bases = fac(4) / prod( fac(multiplicity) for multiplicity in metacounts.values()) if N <= 170: bases_to_pos = fac(N) / prod(fac(count) for count in counts) else: #print "Warning: possible numerical issues in counts_to_cols" bases_to_pos = round( exp(log_fac(N) - sum(log_fac(count) for count in counts))) return counts_to_bases * bases_to_pos
def crt(xs, ps): """ Given list ps of coprime residues, and list xs where xs[i] is a residue mod ps[i], find z (mod prod(ps)) such that z = xs[i] mod ps[i] over all i. """ from utils import prod if len(xs) != len(ps): raise ValueError( 'Number of residues different from number of moduli: ' + \ str(len(xs)) + ', ' + str(len(ps)) ) if len(ps) == 1: return xs[0] % ps[0] if gcd(*ps) != 1: raise ValueError('Moduli not coprime: ' + str(ps)) n = len(xs) # Formally, z lives in Z/PZ. P = prod(ps) Ns = [P // ps[i] for i in range(n)] z = 0 for (x, p, N) in zip(xs, ps, Ns): # rN = 1 mod p r = modinv(N, p) # if r == None : raise ValueError('ps not coprime: ' + str(ps)) # By construction, rNx = x mod p and, of course, rNx = 0 mod N # which implies rNx = 0 mod p' for all p' != p. z += r * N * x return z % P
def pohlighellman(g: int, p: int, t: int, qfactors: list): from ntheory import crt from utils import prod q = prod(qfactors) # Impose an order on factors. qfactors = list(qfactors) _dlog_check_params(g, p, t, q) # Raising g/t to q//qfactor projects g/t to the subgroup of <g> of order # qfactor. gprojs = [pow(g, q // qfactor, p) for qfactor in qfactors] tprojs = [pow(t, q // qfactor, p) for qfactor in qfactors] # Choose underlying discrete logarithm algorithm. DLOG = babygiantstep # Solve dlog_gproj(tproj) = x[i] = x mod qfactor. xs = [ DLOG(gproj, p, tproj, qfactor) for (gproj, tproj, qfactor) in zip(gprojs, tprojs, qfactors) ] # <g> is isomorphic to (Z/q, +). return crt(xs, qfactors)
def __init__(self, output_dims, hidden_features=128): """ :param output_dims: (c,h,w) :param hidden_features: """ initial_dims = (64, math.ceil( (output_dims[1] + 2) / 8), math.ceil((output_dims[2] + 2) / 8)) super().__init__(output_dims, initial_dims, hidden_features) self.fc = nn.Linear(self.hidden_features, utils.prod(self.initial_dims)) self.deconv1 = nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1) self.deconv2 = nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1) self.deconv3 = nn.ConvTranspose2d(32, self.output_dims[0], kernel_size=4, stride=2, padding=1) print(f" -created default decoder, initial dims {self.initial_dims}")
def fitness(matrix,motif,G): """multiplicative fitness of occupancy over all sites""" eps = [score_seq(matrix,site) for site in motif] fgs = [exp(-ep) for ep in eps] Zb = Zb_from_matrix(matrix,G) Z = sum(fgs) + Zb return prod(fg/Z for fg in fgs)
def fitness(matrix, motif, mu, Ne): nu = Ne - 1 def phat(s): ep = score_seq(matrix, s) return (1 + exp(ep - mu))**(-nu) return prod(map(phat, motif))
def Check_Secrets_B(yzw, B_history): D0, g0 = B_history[-1] if n > 0: D, g = B_history[n-1] Y = prod(yzw[n2][0] for n2 in range(n)) Z = prod(yzw[n2][1] for n2 in range(n)) W = [sum(yzw[n2][2][m] for n2 in range(n)) for m in range(M)] assert g == g0**Y assert all(star((Y,Z,W[m]), D0[m]) == D[m] for m in range(M) ) Y = prod(yzw[n2][0] for n2 in range(N)) Z = prod(yzw[n2][1] for n2 in range(N)) W = [sum(yzw[n2][2][m] for n2 in range(N)) for m in range(M)] if n < N-1: D, g = B_history[N-1] assert g == g0**Y assert all(star((Y,Z,W[m]), D0[m]) == D[m] for m in range(M) ) return Z
def partial_pohlighellman(g, p, t, qfactors, B) : from utils import prod q = prod(qfactors) # Find the smooth part of q: z. smoothfactors = {q_**e for q_,e in qfactors.items() if q_**e <= B} z = prod(smoothfactors) # Do Pohlig-Hellman on the projection of g down to Z/zZ, so that we find a # relation V = x mod z. If x < z, then V = x mod n too; otherwise, x = Az + # V for some A to be determined. V = pohlighellman(pow(g, q//z, p) , p, pow(t, q//z, p), smoothfactors) # Hope V = x mod n. return V
def q(self) : from ntheory import order if self._q == None : if self._qfactors != None : from utils import prod self._q = prod(r**e for r,e in self._qfactors.items()) else : self._q = order(self.g, self.p) return self._q
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.alpha = self.pp['alpha'] self.alpha_decay = self.pp['alpha_decay'] # State-action pair numbers self.d = prod(self.dims) self.qvals = np.zeros(self.d) self.sa_nums = np.arange(self.d).reshape(self.dims) self.azap = np.zeros((self.d, self.d))
def load(self, fpath): from exprparser import parse with open(os.path.join(config.input_directory, fpath), "rb") as f: reader = csv.reader(f) lines = skip_comment_cells(strip_rows(reader)) header = lines.next() self.expressions = [parse(s, autovariables=True) for s in header] table = [] for line in lines: if any(value == "" for value in line): raise Exception("empty cell found in %s" % fpath) table.append([eval(value) for value in line]) ndim = len(header) unique_last_d, dupe_last_d = unique_duplicate(table.pop(0)) if dupe_last_d: print( "Duplicate column header value(s) (for '%s') in '%s': %s" % (header[-1], fpath, ", ".join(str(v) for v in dupe_last_d)) ) raise Exception( "bad alignment data in '%s': found %d " "duplicate column header value(s)" % (fpath, len(dupe_last_d)) ) # strip the ndim-1 first columns headers = [[line.pop(0) for line in table] for _ in range(ndim - 1)] possible_values = [list(unique(values)) for values in headers] if ndim > 1: # having duplicate values is normal when there are more than 2 # dimensions but we need to test whether there are duplicates of # combinations. dupe_combos = list(duplicates(zip(*headers))) if dupe_combos: print("Duplicate row header value(s) in '%s':" % fpath) print(PrettyTable(dupe_combos)) raise Exception( "bad alignment data in '%s': found %d " "duplicate row header value(s)" % (fpath, len(dupe_combos)) ) possible_values.append(unique_last_d) self.possible_values = possible_values self.probabilities = list(chain.from_iterable(table)) num_possible_values = prod(len(values) for values in possible_values) if len(self.probabilities) != num_possible_values: raise Exception( "incoherent alignment data in '%s': %d data cells " "found while it should be %d based on the number " "of possible values in headers (%s)" % ( fpath, len(self.probabilities), num_possible_values, " * ".join(str(len(values)) for values in possible_values), ) )
def solution(entries: Iterator[int], number_of_combinations: int): combinations_of_numbers: Iterator[Tuple[int, ...]] = combinations( entries, number_of_combinations) sum_of_combinations: Iterator[Tuple[int, int]] = map( lambda x: (sum(x), prod(x)), combinations_of_numbers) find_solution: Iterator[Tuple[int, int]] = dropwhile(lambda x: x[0] != 2020, sum_of_combinations) result = next(find_solution) return result[1]
def calc_mutation_prob(self, seq1, seq2, t): freqs = calc_nuc_sites(seq1, seq2) mutate_probs = [] for change in freqs: start_nuc = change[0] end_nuc = change[1] prob = self.model.matrix[start_nuc][end_nuc](t) mutate_probs.append(prob) return prod(mutate_probs)
def khatri_rao(As): k = np.shape(As[0])[1] total_dims = ut.prod([np.shape(A)[0] for A in As]) B = np.zeros((total_dims, k)) for i in range(k): b = np.ones(1); for A in As: b = np.kron(b, A[:,i]) B[:,i] = b return B
def partial_pohlighellman(g: int, p: int, t: int, qfactors: list, B: int): """ Solve the discrete logarithm problem for target t to base g, when both are projected down to the maximal subgroup of <g> which has B-smooth order. """ from utils import prod q = prod(qfactors) # Find the smooth part of q: z. # smoothfactors = {q_**e for q_,e in qfactors.items() if q_**e <= B} smoothfactors = [qfactor for qfactor in qfactors if qfactor <= B] z = prod(smoothfactors) # Do Pohlig-Hellman on the projection of g down to Z/zZ, so that we find a # relation V = x mod z. If x < z, then V = x mod n too; otherwise, x = Az + # V for some A to be determined. V = pohlighellman(pow(g, q // z, p), p, pow(t, q // z, p), smoothfactors) # Hope V = x mod n. return V
def solve2(): """Solve second puzzle""" cave_map = np.array(input(convert_fn=lambda r: [int(c) for c in r])) # 4 connected components kernel = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]]) # use connected components labeling detection labels, ncomp = label(cave_map != 9, kernel) basins = [] for n in range(1, ncomp + 1): basins.append(np.count_nonzero(labels == n)) return prod((sorted(basins, reverse=True)[:3]))
def __init__(self, input_dims, out_features=128): """ :param input_dims: intended dims of input, excluding batch size (height, width, channels) """ super().__init__(input_dims, out_features) self.final_dims = (64, self.input_dims[1] // 2 // 2 // 2, self.input_dims[2] // 2 // 2 // 2) self.conv1 = nn.Conv2d(self.input_dims[0], 32, kernel_size=3) self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) self.conv3 = nn.Conv2d(64, 64, kernel_size=3, padding=1) if out_features is None: # setting out_features to none removes the final layer print(" (final linear layer is disabled)") self.fc = nn.Identity() self.out_features = utils.prod(self.final_dims) else: self.fc = nn.Linear(utils.prod(self.final_dims), self.out_features) print(f" -created default encoder, final dims {self.final_dims}")
def build(self): self.frep = tf.placeholder(tf.int32, self.frepshape, "feature_reps") self.reward = tf.placeholder(tf.float32, [], "rewards") self.avg_reward = tf.placeholder(tf.float32, [], "avg_reward") self.prob_ph = tf.placeholder(tf.float32, [1], "act_prob") self.act_ph = tf.placeholder(tf.int32, [1], "act") frep = tf.cast(scale_freps(self.frep), tf.float32) if self.grid_inp: gridshape = [None, self.rows, self.cols, self.n_channels * 2] wdim = prod(self.frepshape[1:]) + prod(gridshape[1:]) self.grid = tf.placeholder(tf.bool, gridshape, "grid") grid = tf.cast(self.grid, tf.float32) net_inp = tf.concat([grid, frep], axis=3) else: wdim = prod(self.frepshape[1:]) # Number of parameters in neural net net_inp = frep net_inp_rv = tf.layers.flatten(net_inp) # x_t Row vector net_inp_cv = tf.transpose(net_inp_rv) # Col vector hidden = tf.Variable(tf.zeros(shape=(wdim, 1)), name="dense") dense = tf.matmul(net_inp_rv, hidden) self.prob = tf.nn.sigmoid(dense) bernoulli = tf.distributions.Bernoulli(probs=self.prob) self.act = bernoulli.sample() grads = -(self.reward - self.avg_reward) * ( tf.cast(self.act_ph, tf.float32) - self.prob_ph) * net_inp_cv grads_and_vars = [(grads, hidden)] trainer, self.lr, global_step = build_default_trainer( net_lr=self.pp['alpha'], net_lr_decay=self.pp['alpha_decay'], optimizer=self.pp['optimizer']) self.do_train = trainer.apply_gradients(grads_and_vars, global_step=global_step) return None, None
def get_adjacent_sums(x, y): """returns the largest adjacent sum given grid coordinates""" return c(x, y) * max( prod((c(x + 1, y), c(x + 2, y), c(x + 3, y))), # east prod((c(x - 1, y), c(x - 2, y), c(x - 3, y))), # west prod((c(x, y - 1), c(x, y - 2), c(x, y - 3))), # north prod((c(x, y + 1), c(x, y + 2), c(x, y + 3))), # south #northwest prod((c(x - 1, y - 1), c(x - 2, y - 2), c(x - 3, y - 3))), #northeast prod((c(x + 1, y - 1), c(x + 2, y - 2), c(x + 3, y - 3))), #southwest prod((c(x - 1, y + 1), c(x - 2, y + 2), c(x - 3, y + 3))), #southeast prod((c(x + 1, y + 1), c(x + 2, y + 2), c(x + 3, y + 3))), )
def get_topk_percent(tensor, top_k_percent_pixels): """ Returns the top_k pixels of a tensor. Similar to https://github.com/tensorflow/models/blob/master/research/deeplab/utils/train_utils.py Args: tensor: At least 2D. top_k_percent_pixels (float): percent of pixels we want to return (between 0 and 1) """ assert len(tensor.shape) >= 2 num_pixels = prod(tensor[0].shape) top_k_pixels = int(top_k_percent_pixels * num_pixels) tensor = tensor.view(tensor.shape[0], -1) return tensor.topk(top_k_pixels)
def compute_inverse_filter_basic_fft(ker, eps, ps): """ ft (nks, ps, ps) """ nks = ker.shape[0] device = ker.device inv_ker = [] for n in range(nks): K = psf2otf(ker[n], (ps, ps)) D = utils.conj(K) / ( utils.prod(utils.conj(K), K).sum(-1, keepdim=True) + eps) d = otf2psf(D, (ps, ps)) inv_ker.append(d) inv_ker = torch.cat(inv_ker) return inv_ker
def partial_pohlighellman(g, p, t, qfactors, B) : from utils import prod # from ntheory import modinv # from sage.all import Integers, factor # Get information about <g> as a subgroup of Z/pZ. #R = Integers(p) #g = R(g) #n = g.order() q = prod(qfactors) # Find the smooth part of q, z. # fs = dict(factor(n)) smoothfactors = {q_**e for q_,e in qfactors.items() if q_**e <= B} z = prod(smoothfactors) # Do Pohlig-Hellman on the projection of g down to Z/zZ, so that we find a # relation V = x mod z. If x < z, then V = x mod n too; otherwise, x = Az + # V for some A to be determined. V = pohlighellman(pow(g, q//z, p) , p, pow(t, q//z, p), smoothfactors) # Hope V = x mod n. return V
def __init__(self, input_dims, out_features=128): """ :param input_dims: intended dims of input, excluding batch size (height, width, channels) """ super().__init__(input_dims, out_features) self.final_dims = (64, self.input_dims[1] // 6, self.input_dims[2] // 6) # based on nature self.conv1 = nn.Conv2d(self.input_dims[0], 32, kernel_size=3, stride=3) self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) self.conv3 = nn.Conv2d(64, 64, kernel_size=3, padding=1) self.fc = nn.Linear(utils.prod(self.final_dims), self.out_features) print(f" -created fast encoder, final dims {self.final_dims}")
def generate_p_ch_cartesian(ms): ranges = [range(m) for m in ms] cp = list(itertools.product(*ranges)) ps = [generate_p(m) for m in ms] r = len(ms) m = ut.prod(ms) p_ch = np.zeros((r, m)); print cp i = 0; for tuples in cp: for c in range(r): p_ch[c,i] = ps[c][tuples[c]]; i += 1 return p_ch
def Check_Secrets_B0(uv, alpha_beta): if n > 0: u = [sum(uv[n2][0][k] for n2 in range(n)) for k in range(K)] v = [sum(uv[n2][1][k] for n2 in range(n)) for k in range(K)] a, b = prod(f0[k]**u[k] for k in range(K)), prod(f0[k]**v[k] for k in range(K)) assert prod(alpha_beta[n2][0] for n2 in range(n)) == a assert prod(alpha_beta[n2][1] for n2 in range(n)) == b u = [sum(uv[n2][0][k] for n2 in range(N)) for k in range(K)] v = [sum(uv[n2][1][k] for n2 in range(N)) for k in range(K)] if n < N-1: a, b = prod(f0[k]**u[k] for k in range(K)), prod(f0[k]**v[k] for k in range(K)) assert delta == (a,b) return u, v
def solve_second(): def n_combinations(x): if x == 2: return 2 if x == 3: return 4 else: return n_combinations(x - 1) * 2 - 1 input_data = prepare_data() j_diff = [ input_data[i + 1] - input_data[i] for i in range(len(input_data) - 1) ] j_diff.insert(0, 3) # convert array of differences into string diff_str = "".join([str(x) for x in j_diff]) # extract the 1-difference fields one_counts = re.compile('(?=3(1{2,})3)').findall(diff_str) return prod([n_combinations(len(c)) for c in one_counts])
def build_conv(arch, img_width, stack=3): if arch == "ilya": # architecture used in https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail # by Ilya Kostrikov conv_layers = nn.ModuleList([ nn.Conv2d(stack * 3, 32, 8, stride=4), nn.Conv2d(32, 32, 4, stride=2), nn.Conv2d(32, 32, 3), ]) elif arch == "ilya_bn": conv_layers = nn.ModuleList([ nn.BatchNorm2d(stack * 3), nn.Conv2d(stack * 3, 32, 8, stride=4), nn.BatchNorm2d(32), nn.Conv2d(32, 32, 4, stride=2), nn.BatchNorm2d(32), nn.Conv2d(32, 32, 3), ]) elif arch == "impala": conv_layers = nn.ModuleList([ nn.Conv2d(stack * 3, 16, 8, stride=4), nn.Conv2d(16, 32, 4, stride=2), ]) elif arch == "impala_bn": conv_layers = nn.ModuleList([ nn.BatchNorm2d(stack * 3), nn.Conv2d(stack * 3, 16, 8, stride=4), nn.BatchNorm2d(16), nn.Conv2d(16, 32, 4, stride=2), ]) conv_output_dim = utils.prod( utils.conv_list_out_dim(conv_layers, img_width, img_width)) return conv_layers, conv_output_dim
def euler8(): s = "73167176531330624919225119674426574742355349194934" +\ "96983520312774506326239578318016984801869478851843" +\ "85861560789112949495459501737958331952853208805511" +\ "12540698747158523863050715693290963295227443043557" +\ "66896648950445244523161731856403098711121722383113" +\ "62229893423380308135336276614282806444486645238749" +\ "30358907296290491560440772390713810515859307960866" +\ "70172427121883998797908792274921901699720888093776" +\ "65727333001053367881220235421809751254540594752243" +\ "52584907711670556013604839586446706324415722155397" +\ "53697817977846174064955149290862569321978468622482" +\ "83972241375657056057490261407972968652414535100474" +\ "82166370484403199890008895243450658541227588666881" +\ "16427171479924442928230863465674813919123162824586" +\ "17866458359124566529476545682848912883142607690042" +\ "24219022671055626321111109370544217506941658960408" +\ "07198403850962455444362981230987879927244284909188" +\ "84580156166097919133875499200524063689912560717606" +\ "05886116467109405077541002256983155200055935729725" +\ "71636269561882670428252483600823257530420752963450" #convert to digits s = [int(i) for i in s] return max([prod(x) for x in window(s, 5)])
def Wrap_B0(): _u, _v = [H.random_index() for k in range(K)], [H.random_index() for k in range(K)] a, b = prod(f0[k]**_u[k] for k in range(K)), prod(f0[k]**_v[k] for k in range(K)) alpha_beta = [broadcast((a, b)) if n==n2 else recv(n2) for n2 in range(N)] delta = (prod(x[0] for x in alpha_beta), prod(x[1] for x in alpha_beta)) return delta, _u, _v, alpha_beta
L = len(matrix) sites = [random_site(L) for _ in xrange(trials)] mean_ZS = mean(exp(log_fhat((matrix, mu, Ne), [site])) for site in sites) log_ZS = L*log(4) + log(mean_ZS) return log_ZS def log_ZM_empirical((matrix, mu, Ne), N, trials=1000): log_ZS = log_ZS_empirical((matrix, mu, Ne), trials=trials) return N * log_ZS def log_ZM_empirical_ref((matrix, mu, Ne), N, trials=1000): L = len(matrix) acc = 0 for i in xrange(trials): eps = [score_seq(matrix, random_site(L)) for _ in range(N)] acc += prod(1.0/(1+exp(ep-mu))**(Ne-1) for ep in eps) est_mean = acc / trials log_Zs = N*L*log(4) + log(est_mean) return log_Zs def log_ZM_empirical_ref2(theta, N, trials=1000): L = len(theta[0]) lfhs = [log_fhat(theta, random_motif(L, N)) for _ in xrange(trials)] return N*L * log(4) + logsum(lfhs) - log(trials) def log_ZM_empirical_ref3(theta, N,trials=1000): L = len(theta[0]) lfhs = [log_fhat(theta, random_motif(L, 1)) for _ in xrange(trials)] log_avg = logsum(lfhs) - log(trials) log_ZS = L*log(4) + log_avg log_ZM = N * log_ZS
def align_get_indices_nd( context, filter_value, score, expressions, possible_values, probabilities, take_filter=None, leave_filter=None, weights=None, past_error=None, method="default", ): assert len(expressions) == len(possible_values) if filter_value is not None: num_to_align = np.sum(filter_value) else: num_to_align = context_length(context) # TODO: allow any temporal variable if "period" in [str(e) for e in expressions]: period = context["period"] expressions, possible_values, probabilities = extract_period( period, expressions, possible_values, probabilities ) if expressions: assert len(probabilities) == prod(len(pv) for pv in possible_values) # retrieve the columns we need to work with columns = [expr_eval(expr, context) for expr in expressions] if filter_value is not None: groups = partition_nd(columns, filter_value, possible_values) else: groups = partition_nd(columns, True, possible_values) else: if filter_value is not None: groups = [filter_value.nonzero()[0]] else: groups = [np.arange(num_to_align)] assert len(probabilities) == 1 # the sum is not necessarily equal to len(a), because some individuals # might not fit in any group (eg if some alignment data is missing) num_aligned = sum(len(g) for g in groups) if num_aligned < num_to_align: if filter_value is not None: to_align = set(filter_value.nonzero()[0]) else: to_align = set(xrange(num_to_align)) aligned = set() for member_indices in groups: aligned |= set(member_indices) unaligned = to_align - aligned print("Warning: %d individual(s) do not fit in any alignment category" % len(unaligned)) print( PrettyTable([["id"] + expressions] + [[col[row] for col in [context["id"]] + columns] for row in unaligned]) ) if filter_value is not None: bool_filter_value = filter_value.copy() else: bool_filter_value = True maybe_filter = bool_filter_value if take_filter is not None: # XXX: I wonder if users would prefer if filter_value was taken into # account or not. This only impacts what it displayed on the console, # but still... take = np.sum(take_filter) take_indices = (take_filter & bool_filter_value).nonzero()[0] maybe_filter &= ~take_filter else: take = 0 take_indices = None if leave_filter is not None: leave = np.sum(leave_filter) maybe_filter &= ~leave_filter else: leave = 0 if take_filter is not None or leave_filter is not None: maybe_indices = maybe_filter.nonzero()[0] else: maybe_indices = None total_underflow = 0 total_overflow = 0 total_affected = 0 total_indices = [] to_split_indices = [] to_split_overflow = [] for group_idx, members_indices, probability in izip(count(), groups, probabilities): if len(members_indices): if weights is None: expected = len(members_indices) * probability else: expected = np.sum(weights[members_indices]) * probability affected = int(expected) if past_error is not None: group_overflow = past_error[group_idx] if group_overflow != 0: affected -= group_overflow past_error[group_idx] = 0 if random.random() < expected - affected: affected += 1 total_affected += affected if take_indices is not None: group_always = np.intersect1d(members_indices, take_indices, assume_unique=True) if weights is None: num_always = len(group_always) else: num_always = np.sum(weights[group_always]) total_indices.extend(group_always) else: num_always = 0 # pdb.set_trace() if affected > num_always: if maybe_indices is not None: group_maybe_indices = np.intersect1d(members_indices, maybe_indices, assume_unique=True) else: group_maybe_indices = members_indices if isinstance(score, np.ndarray): maybe_members_rank_value = score[group_maybe_indices] if method == "default": sorted_local_indices = np.argsort(maybe_members_rank_value) sorted_global_indices = group_maybe_indices[sorted_local_indices] elif method == "walk": sorted_local_indices = np.random.permutation(group_maybe_indices) sorted_global_indices = group_maybe_indices[sorted_local_indices] else: raise Exception("If not default, method option should be walk") else: assert isinstance(score, (bool, int, float)) # if the score expression is a constant, we don't need to # sort indices. In that case, the alignment will take # the last individuals created first (highest id). sorted_global_indices = group_maybe_indices # maybe_to_take is always > 0 maybe_to_take = affected - num_always if method == "default": if weights is None: # take the last X individuals (ie those with the highest # score) indices_to_take = sorted_global_indices[-maybe_to_take:] else: maybe_weights = weights[sorted_global_indices] # we need to invert the order because members are sorted # on score ascendingly and we need to take those with # highest score. weight_sums = np.cumsum(maybe_weights[::-1]) threshold_idx = np.searchsorted(weight_sums, maybe_to_take) if threshold_idx < len(weight_sums): num_to_take = threshold_idx + 1 # if there is enough weight to reach "maybe_to_take" overflow = weight_sums[threshold_idx] - maybe_to_take if overflow > 0: # the next individual has too much weight, so we # need to split it. id_to_split = sorted_global_indices[threshold_idx] past_error[group_idx] = overflow to_split_indices.append(id_to_split) to_split_overflow.append(overflow) else: # we got exactly the number we wanted assert overflow == 0 else: # we can't reach our target number of individuals # (probably because of a "leave" filter), so we # take all the ones we have # XXX: should we add *this* underflow to the past_error # too? It would probably accumulate! num_to_take = len(weight_sums) indices_to_take = sorted_global_indices[-num_to_take:] if method == "walk": # U draws a random number and then makes mybe_to_take step of length 1 U = random.random() + np.arange(min(maybe_to_take, len(sorted_global_indices))) # if the weoghted case maybe_to_take should be bigger than len(sorted_global_indices, # we limit to a vector of size len(sorted_global_indices), and we cut later to have the goo # weighted value the last indice should then occurs many times at the end of infices_to_take # on the random sample, score are cumulated and then, we extract indices # of each value before each value of U indices_to_take = np.searchsorted(np.cumsum(score[sorted_local_indices]), U) indices_to_take = sorted_local_indices[indices_to_take] # we apply the same sidewalke method and keeping # on peut donc enlever ça, si on ne fait plus de weight if weights is not None: # TODO: test that case maybe_weights = weights[indices_to_take] weight_sums = np.cumsum(maybe_weights[::-1]) threshold_idx = np.searchsorted(weight_sums, maybe_to_take) indices_to_take = indices_to_take[: (threshold_idx + 1)] underflow = maybe_to_take - len(indices_to_take) if underflow > 0: total_underflow += underflow total_indices.extend(indices_to_take) elif affected < num_always: total_overflow += num_always - affected # this assertion is only valid in the non weighted case # assert len(total_indices) == \ # total_affected + total_overflow - total_underflow print(" %d/%d" % (len(total_indices), num_aligned), end=" ") if (take_filter is not None) or (leave_filter is not None): print("[take %d, leave %d]" % (take, leave), end=" ") if total_underflow: print("UNDERFLOW: %d" % total_underflow, end=" ") if total_overflow: print("OVERFLOW: %d" % total_overflow, end=" ") if to_split_indices: return total_indices, (to_split_indices, to_split_overflow) else: return total_indices, None
def evaluate(self, context): expressions = self.expressions columns = [expr_eval(e, context) for e in expressions] if self.filter is not None: filter_value = expr_eval(self.filter, context) # TODO: make a function out of this, I think we have this pattern # in several places filtered_columns = [ col[filter_value] if isinstance(col, np.ndarray) and col.shape else [col] for col in columns ] else: filtered_columns = columns possible_values = [np.unique(col) for col in filtered_columns] groups = partition_nd(filtered_columns, True, possible_values) # TODO: use group_indices_nd directly to avoid using np.unique # this is twice as fast (unique is very slow) but breaks because # the rest of the code assumes all combinations are present # if self.filter is not None: # filter_value = expr_eval(self.filter, context) # else: # filter_value = True # # d = group_indices_nd(columns, filter_value) # pvalues = sorted(d.keys()) # ndim = len(columns) # possible_values = [[pv[i] for pv in pvalues] # for i in range(ndim)] # groups = [d[k] for k in pvalues] # groups is a (flat) list of list. # the first variable is the outer-most "loop", # the last one the inner most. # add total for each row folded_exprs = len(expressions) - 1 len_pvalues = [len(vals) for vals in possible_values] width = len_pvalues[-1] height = prod(len_pvalues[:-1]) def xy_to_idx(x, y): # divide by the prod of possible values of expressions to its # right, mod by its own number of possible values offsets = [(y / prod(len_pvalues[v + 1 : folded_exprs])) % len_pvalues[v] for v in range(folded_exprs)] return sum(v * prod(len_pvalues[i + 1 :]) for i, v in enumerate(offsets)) + x groups_wh_totals = [] for y in range(height): line_indices = [] for x in range(width): member_indices = groups[xy_to_idx(x, y)] groups_wh_totals.append(member_indices) line_indices.extend(member_indices) groups_wh_totals.append(line_indices) # width just increased because of totals width += 1 # add total for each column (including the "total per row" one) for x in range(width): column_indices = [] for y in range(height): column_indices.extend(groups_wh_totals[y * width + x]) groups_wh_totals.append(column_indices) # evaluate the expression on each group expr = self.expr used_variables = expr.collect_variables(context) used_variables.add("id") data = [] for member_indices in groups_wh_totals: local_context = context_subset(context, member_indices, used_variables) data.append(expr_eval(expr, local_context)) if self.percent: # convert to np.float64 to get +-inf if total_value is int(0) # instead of Python's built-in behavior of raising an exception. # This can happen at least when using the default expr (grpcount()) # and the filter yields empty groups total_value = np.float64(data[-1]) data = [100.0 * value / total_value for value in data] # if self.by or self.percent: # if self.percent: # total_value = data[-1] # divisors = [total_value for _ in data] # else: # num_by = len(self.by) # inc = prod(len_pvalues[-num_by:]) # num_groups = len(groups) # num_categories = prod(len_pvalues[:-num_by]) # # categories_groups_idx = [range(cat_idx, num_groups, inc) # for cat_idx in range(num_categories)] # # divisors = ... # # data = [100.0 * value / divisor # for value, divisor in izip(data, divisors)] # gender | False | True | total # | 20 | 16 | 35 # gender | False | True | # dead | | | total # False | 20 | 15 | 35 # True | 0 | 1 | 1 # total | 20 | 16 | 36 # | dead | False | True | # agegroup | gender | | | total # 5 | False | 20 | 15 | xx # 5 | True | 0 | 1 | xx # 10 | False | 25 | 10 | xx # 10 | True | 1 | 1 | xx # | total | xx | xx | xx # add headers labels = [str(e) for e in expressions] if folded_exprs: result = [ [""] * (folded_exprs - 1) + [labels[-1]] + list(possible_values[-1]) + [""], # 2nd line labels[:-1] + [""] * len(possible_values[-1]) + ["total"], ] categ_values = list(product(*possible_values[:-1])) last_line = [""] * (folded_exprs - 1) + ["total"] categ_values.append(last_line) height += 1 else: # if there is only one expression, the headers are different result = [[labels[-1]] + list(possible_values[-1]) + ["total"]] categ_values = [[""]] for y in range(height): result.append(list(categ_values[y]) + data[y * width : (y + 1) * width]) return PrettyTable(result)
mean_ZS = mean(exp(log_fhat((matrix, mu, Ne), [site])) for site in sites) log_ZS = L * log(4) + log(mean_ZS) return log_ZS def log_ZM_empirical((matrix, mu, Ne), N, trials=1000): log_ZS = log_ZS_empirical((matrix, mu, Ne), trials=trials) return N * log_ZS def log_ZM_empirical_ref((matrix, mu, Ne), N, trials=1000): L = len(matrix) acc = 0 for i in xrange(trials): eps = [score_seq(matrix, random_site(L)) for _ in range(N)] acc += prod(1.0 / (1 + exp(ep - mu))**(Ne - 1) for ep in eps) est_mean = acc / trials log_Zs = N * L * log(4) + log(est_mean) return log_Zs def log_ZM_empirical_ref2(theta, N, trials=1000): L = len(theta[0]) lfhs = [log_fhat(theta, random_motif(L, N)) for _ in xrange(trials)] return N * L * log(4) + logsum(lfhs) - log(trials) def log_ZM_empirical_ref3(theta, N, trials=1000): L = len(theta[0]) lfhs = [log_fhat(theta, random_motif(L, 1)) for _ in xrange(trials)] log_avg = logsum(lfhs) - log(trials)
def w(eps): """number of ways to """ return (fac(L)**len(eps))/prod(fac(ep)*fac(L-ep) for ep in eps)*3**sum(eps)
def load_ndarray(fpath, celltype=None): print(" - reading", fpath) with open(fpath, "rb") as f: reader = csv.reader(f) line_stream = skip_comment_cells(strip_rows(reader)) header = line_stream.next() str_table = [] for line in line_stream: if any(value == '' for value in line): raise Exception("empty cell found in %s" % fpath) str_table.append(line) ndim = len(header) # handle last dimension header (horizontal values) last_d_header = str_table.pop(0) # auto-detect type of values for the last d and convert them last_d_pvalues = convert_1darray(last_d_header) unique_last_d, dupe_last_d = unique_duplicate(last_d_pvalues) if dupe_last_d: print(("Duplicate column header value(s) (for '%s') in '%s': %s" % (header[-1], fpath, ", ".join(str(v) for v in dupe_last_d)))) raise Exception("bad data in '%s': found %d " "duplicate column header value(s)" % (fpath, len(dupe_last_d))) # handle other dimensions header # strip the ndim-1 first columns headers = [[line.pop(0) for line in str_table] for _ in range(ndim - 1)] headers = [convert_1darray(pvalues_str) for pvalues_str in headers] if ndim > 1: # having duplicate values is normal when there are more than 2 # dimensions but we need to test whether there are duplicates of # combinations. dupe_combos = list(duplicates(zip(*headers))) if dupe_combos: print(("Duplicate row header value(s) in '%s':" % fpath)) print((PrettyTable(dupe_combos))) raise Exception("bad alignment data in '%s': found %d " "duplicate row header value(s)" % (fpath, len(dupe_combos))) possible_values = [np.array(list(unique(pvalues))) for pvalues in headers] possible_values.append(np.array(unique_last_d)) shape = tuple(len(values) for values in possible_values) num_possible_values = prod(shape) # transform the 2d table into a 1d list str_table = list(chain.from_iterable(str_table)) if len(str_table) != num_possible_values: raise Exception("incoherent data in '%s': %d data cells " "found while it should be %d based on the number " "of possible values in headers (%s)" % (fpath, len(str_table), num_possible_values, ' * '.join(str(len(values)) for values in possible_values))) #TODO: compare time with numpy built-in conversion: # if dtype is None, numpy tries to detect the best type itself # which it does a good job of if the values are already numeric values # if dtype is provided, numpy does a good job to convert from string # values. if celltype is None: celltype = detect_column_type(str_table) data = convert_1darray(str_table, celltype) array = np.array(data, dtype=celltype) return LabeledArray(array.reshape(shape), header, possible_values)
def linear_prob_site(site, psfm): return prod([col["ACGT".index(b)] for col, b in zip(psfm, site)])
def xy_to_idx(x, y): # divide by the prod of possible values of expressions to its # right, mod by its own number of possible values offsets = [(y / prod(len_pvalues[v + 1 : folded_exprs])) % len_pvalues[v] for v in range(folded_exprs)] return sum(v * prod(len_pvalues[i + 1 :]) for i, v in enumerate(offsets)) + x
def generic(shape, dtype=float, *_, **__): if not isinstance(shape, tuple): shape = (shape,) dt = np.dtype(dtype) return prod(shape) * dt.itemsize
def dQ(s): ds = prod([ps["ACGT".index(sj)]/float(sum(ps)) # not quite right but... for ps, sj in zip(pss,s)]) return ds
def dLdqij(qss,i,j): term0 = alpha*exp(matrix[i][j])/sum(qij*exp(epij) for qij,epij in zip(qss[i],matrix[i])) expectation = prod([sum(qij*exp(epij) for qij,epij in zip(qsi,epsi)) for qsi,epsi in zip(qss,matrix)]) return term0*expectation + log(qss[i][j]) + 1
def dQ(s): ds = prod([qs["ACGT".index(sj)] # not quite right but... for qs, sj in zip(qss,s)]) return ds
def multiplicities(eps): metacounts = Counter(eps) substitutions = fac(n)/prod(fac(multiplicity) for multiplicity in metacounts.values()) return substitutions
def compute_inverse_filter_fft_penalized(ker, eps, ps, betas): """ fts (len(betas), 3, ps, ps) """ nks, hks, wks = ker.shape if wks < 3 or hks < 3: hei = max(3, hks) wid = max(3, wks) ker2 = torch.zeros(nks, hei, wid, device=ker.device) ker2[:, hei // 2 - hks // 2:hei // 2 + hks // 2 + 1, wid // 2 - wks // 2:wid // 2 + wks // 2 + 1] = ker ker = ker2 _, hks, wks = ker.shape centx = wks // 2 centy = hks // 2 hps = wks // 2 grad_y = torch.zeros(1, 3, 3, device=ker.device) grad_y[0, 1, 0] = -1 grad_y[0, 1, 1] = 1 grad_x = grad_y.transpose(1, 2) grad = torch.zeros(2, hks, wks, device=ker.device) grad[0, centx - 1:centx + 2, centy - 1:centy + 2] = grad_y grad[1, centx - 1:centx + 2, centy - 1:centy + 2] = grad_x # compute denom otfks = [] for n in range(nks): otfks.append(psf2otf(ker[n], (ps, ps))) otfks.append(psf2otf(grad_y[0], (ps, ps))) otfks.append(psf2otf(grad_x[0], (ps, ps))) mod2_otfks = [] for n in range(nks + 2): K = otfks[n] mod2_otfks.append(utils.prod(utils.conj(K), K)) sum_mod2 = torch.stack(mod2_otfks).sum(0) inv_filters = {'beta': [], 'ker': [], 'fts': []} inv_filters['beta'] = betas inv_filters['ker'] = torch.cat([ker, grad]) for beta in betas: denom = sum_mod2.mul(beta) + eps ft = torch.zeros(nks, 3, ps, ps, device=ker.device) for n in range(nks): K = otfks[n] D = utils.conj(K) / denom d = otf2psf(D, (ps, ps)) ft[n, 0] = d K = otfks[-2] D = utils.conj(K) / denom d = otf2psf(D, (ps, ps)) ft[n, 1] = d K = otfks[-1] D = utils.conj(K) / denom d = otf2psf(D, (ps, ps)) ft[n, 2] = d inv_filters['fts'].append(ft) inv_filters['fts'] = torch.cat(inv_filters['fts']) return inv_filters
yzw.reverse() uv = [broadcast((_u, _v)) if n2==n else recv(n2) for n2 in range(N-1,-1,-1)] uv.reverse() lmbd = [broadcast(_lmbd) if n2==n else recv(n2) for n2 in range(N-1,-1,-1)] lmbd.reverse() l = Check_Secrets_A(lmbd, D_0, A_history) u,v = Check_Secrets_B0(uv, B0_history) Z = Check_Secrets_B(yzw, B_history) return l, u, v, Z def Unwrap_A((a,b,e), gp, l): assert a**l[0] * b**l[1] * gp == e return (a,b) def Unwrap_B( ((a,ab,b,e), f), Zi, u, v): Ui = prod(f[k]**u[k] for k in range(K)).invert() V = prod(f[k]**v[k] for k in range(K)) ra, rb = a**Ui, b**Zi assert (ra*rb)**V == ab return (ra,rb,e) def Unwrap_Check_Deck( (DB, g), Z, u, v, l): Zi = Z.invert() # Zi is Z^-1 DA = [Unwrap_B(d, Zi, u,v) for d in DB] # check 'e elements' are distinct assert len(set(d[2].v for d in DA)) == len(DA) gp = g**l[2] return [Unwrap_A(d, gp, l) for d in DA] while True: act = cmd.recv()
def compute(): s = str(NUMBER) strs = [s[i:i + 13] for i in range(len(s) - 13)] return max([prod([int(i) for i in j]) for j in strs])
def evaluate(self, context): expr = self.expr expr_vars = collect_variables(expr, context) expressions = self.expressions labels = [str(e) for e in expressions] columns = [expr_eval(e, context) for e in expressions] if self.filter is not None: filter_value = expr_eval(self.filter, context) #TODO: make a function out of this, I think we have this pattern # in several places filtered_columns = [col[filter_value] if isinstance(col, np.ndarray) and col.shape else [col] for col in columns] filtered_context = context_subset(context, filter_value, expr_vars) else: filtered_columns = columns filtered_context = context possible_values = self.pvalues if possible_values is None: possible_values = [np.unique(col) for col in filtered_columns] # We pre-filtered columns instead of passing the filter to partition_nd # because it is a bit faster this way. The indices are still correct, # because we use them on a filtered_context. groups = partition_nd(filtered_columns, True, possible_values) if not groups: return LabeledArray([], labels, possible_values) # evaluate the expression on each group data = [expr_eval(expr, context_subset(filtered_context, indices, expr_vars)) for indices in groups] #TODO: use group_indices_nd directly to avoid using np.unique # this is twice as fast (unique is very slow) but breaks because # the rest of the code assumes all combinations are present # if self.filter is not None: # filter_value = expr_eval(self.filter, context) # else: # filter_value = True # # d = group_indices_nd(columns, filter_value) # pvalues = sorted(d.keys()) # ndim = len(columns) # possible_values = [[pv[i] for pv in pvalues] # for i in range(ndim)] # groups = [d[k] for k in pvalues] # groups is a (flat) list of list. # the first variable is the outer-most "loop", # the last one the inner most. # add total for each row len_pvalues = [len(vals) for vals in possible_values] width = len_pvalues[-1] height = prod(len_pvalues[:-1]) rows_indices = [np.concatenate([groups[y * width + x] for x in range(width)]) for y in range(height)] cols_indices = [np.concatenate([groups[y * width + x] for y in range(height)]) for x in range(width)] cols_indices.append(np.concatenate(cols_indices)) # evaluate the expression on each "combined" group (ie compute totals) row_totals = [expr_eval(expr, context_subset(filtered_context, inds, expr_vars)) for inds in rows_indices] col_totals = [expr_eval(expr, context_subset(filtered_context, inds, expr_vars)) for inds in cols_indices] if self.percent: # convert to np.float64 to get +-inf if total_value is int(0) # instead of Python's built-in behaviour of raising an exception. # This can happen at least when using the default expr (count()) # and the filter yields empty groups total_value = np.float64(col_totals[-1]) data = [100.0 * value / total_value for value in data] row_totals = [100.0 * value / total_value for value in row_totals] col_totals = [100.0 * value / total_value for value in col_totals] # if self.by or self.percent: # if self.percent: # total_value = data[-1] # divisors = [total_value for _ in data] # else: # num_by = len(self.by) # inc = prod(len_pvalues[-num_by:]) # num_groups = len(groups) # num_categories = prod(len_pvalues[:-num_by]) # # categories_groups_idx = [range(cat_idx, num_groups, inc) # for cat_idx in range(num_categories)] # # divisors = ... # # data = [100.0 * value / divisor # for value, divisor in izip(data, divisors)] # convert to a 1d array. We don't simply use data = np.array(data), # because if data is a list of ndarray (for example if we use # groupby(a, expr=id), *and* all the ndarrays have the same length, # the result is a 2d array instead of an array of ndarrays like we # need (at this point). arr = np.empty(len(data), dtype=type(data[0])) arr[:] = data data = arr # and reshape it data = data.reshape(len_pvalues) return LabeledArray(data, labels, possible_values, row_totals, col_totals)
def compute(self, context, *expressions, **kwargs): if not expressions: raise TypeError("groupby() takes at least 1 argument") # TODO: allow lists/tuples of arguments to group by the combinations # of keys for expr in expressions: if isinstance(expr, (bool, int, float)): raise TypeError("groupby() does not work with constant " "arguments") if isinstance(expr, (tuple, list)): raise TypeError("groupby() takes expressions as arguments, " "not a list of expressions") # On python 3, we could clean up this code (keyword only arguments). expr = kwargs.pop('expr', None) if expr is None: expr = Count() # by = kwargs.pop('by', None) filter_value = kwargs.pop('filter', None) percent = kwargs.pop('percent', False) possible_values = kwargs.pop('pvalues', None) totals = kwargs.pop('totals', True) expr_vars = [v.name for v in collect_variables(expr)] labels = [str(e) for e in expressions] columns = [expr_eval(e, context) for e in expressions] columns = [expand(c, context_length(context)) for c in columns] if filter_value is not None: filtered_columns = [col[filter_value] for col in columns] # FIXME: use the actual filter_expr instead of not_hashable filtered_context = context.subset(filter_value, expr_vars, not_hashable) else: filtered_columns = columns filtered_context = context if possible_values is None: possible_values = [np.unique(col) for col in filtered_columns] # We pre-filtered columns instead of passing the filter to partition_nd # because it is a bit faster this way. The indices are still correct, # because we use them on a filtered_context. groups = partition_nd(filtered_columns, True, possible_values) if not groups: return LabeledArray([], labels, possible_values) # evaluate the expression on each group # we use not_hashable to avoid storing the subset in the cache contexts = [filtered_context.subset(indices, expr_vars, not_hashable) for indices in groups] data = [expr_eval(expr, c) for c in contexts] # TODO: use group_indices_nd directly to avoid using np.unique # this is twice as fast (unique is very slow) but breaks because # the rest of the code assumes all combinations are present # if self.filter is not None: # filter_value = expr_eval(self.filter, context) # else: # filter_value = True # # d = group_indices_nd(columns, filter_value) # pvalues = sorted(d.keys()) # ndim = len(columns) # possible_values = [[pv[i] for pv in pvalues] # for i in range(ndim)] # groups = [d[k] for k in pvalues] # groups is a (flat) list of list. # the first variable is the outer-most "loop", # the last one the inner most. # add total for each row len_pvalues = [len(vals) for vals in possible_values] if percent: totals = True if totals: width = len_pvalues[-1] height = prod(len_pvalues[:-1]) rows_indices = [np.concatenate([groups[y * width + x] for x in range(width)]) for y in range(height)] cols_indices = [np.concatenate([groups[y * width + x] for y in range(height)]) for x in range(width)] cols_indices.append(np.concatenate(cols_indices)) # evaluate the expression on each "combined" group (ie compute totals) row_ctxs = [filtered_context.subset(indices, expr_vars, not_hashable) for indices in rows_indices] row_totals = [expr_eval(expr, ctx) for ctx in row_ctxs] col_ctxs = [filtered_context.subset(indices, expr_vars, not_hashable) for indices in cols_indices] col_totals = [expr_eval(expr, ctx) for ctx in col_ctxs] else: row_totals = None col_totals = None if percent: # convert to np.float64 to get +-inf if total_value is int(0) # instead of Python's built-in behaviour of raising an exception. # This can happen at least when using the default expr (count()) # and the filter yields empty groups total_value = np.float64(col_totals[-1]) data = [100.0 * value / total_value for value in data] row_totals = [100.0 * value / total_value for value in row_totals] col_totals = [100.0 * value / total_value for value in col_totals] # if self.by or self.percent: # if self.percent: # total_value = data[-1] # divisors = [total_value for _ in data] # else: # num_by = len(self.by) # inc = prod(len_pvalues[-num_by:]) # num_groups = len(groups) # num_categories = prod(len_pvalues[:-num_by]) # # categories_groups_idx = [range(cat_idx, num_groups, inc) # for cat_idx in range(num_categories)] # # divisors = ... # # data = [100.0 * value / divisor # for value, divisor in izip(data, divisors)] # convert to a 1d array. We don't simply use data = np.array(data), # because if data is a list of ndarray (for example if we use # groupby(a, expr=id), *and* all the ndarrays have the same length, # the result is a 2d array instead of an array of ndarrays like we # need (at this point). arr = np.empty(len(data), dtype=type(data[0])) arr[:] = data data = arr # and reshape it data = data.reshape(len_pvalues) return LabeledArray(data, labels, possible_values, row_totals, col_totals)