def interpret_main_experiment(results_dict,named_fs=None): if named_fs is None: named_fs = [(fitness,"fitness"),(lambda org:motif_ic(extract_sites(org)),"Motif IC"), (lambda org:total_motif_mi(extract_sites(org)),"Motif MI")] rec_muts,site_muts = map(lambda x:sorted(set(x)),transpose(results_dict.keys())) fs,names = transpose(named_fs) subplot_dimension = ceil(sqrt(len(fs))) for idx,f in enumerate(fs): mat = np.zeros((len(rec_muts),len(site_muts))) for i,rec_mut in enumerate(sorted(rec_muts)): for j,site_mut in enumerate(sorted(site_muts)): pop,hist = results_dict[(rec_mut,site_mut)] mat[i,j] = mean([f(x) for x,fit in pop]) print i,j,mat[i,j] plt.subplot(subplot_dimension,subplot_dimension,idx) plt.imshow(mat,interpolation='none') plt.xticks(range(len(site_muts)),map(str,site_muts)) plt.yticks(range(len(rec_muts)),map(str,rec_muts)) #plt.yticks(rec_muts) plt.xlabel("site mutation rate") plt.ylabel("rec mutation rate") plt.colorbar() title = names[idx] plt.title(title) plt.show()
def make_thin(im): loaded = utils.load_image(im) utils.apply_to_each_pixel(loaded, lambda x: 0.0 if x > 10 else 1.0) print("loading phase done") t1 = [[1, 1, 1], [0, 1, 0], [0.1, 0.1, 0.1]] t2 = utils.transpose(t1) t3 = reverse(t1) t4 = utils.transpose(t3) t5 = [[0, 1, 0], [0.1, 1, 1], [0.1, 0.1, 0]] t7 = utils.transpose(t5) t6 = reverse(t7) t8 = reverse(t5) thinners = [t1, t2, t3, t4, t5, t6, t7] usage = True while (usage): usage = apply_all_structures(loaded, thinners) print("single thining phase done") print("thining done") utils.apply_to_each_pixel(loaded, lambda x: 255.0 * (1 - x)) utils.load_pixels(im, loaded) im.show()
def interpret_main_experiment(results_dict, named_fs=None): if named_fs is None: named_fs = [(fitness, "fitness"), (lambda org: motif_ic(extract_sites(org)), "Motif IC"), (lambda org: total_motif_mi(extract_sites(org)), "Motif MI")] rec_muts, site_muts = map(lambda x: sorted(set(x)), transpose(results_dict.keys())) fs, names = transpose(named_fs) subplot_dimension = ceil(sqrt(len(fs))) for idx, f in enumerate(fs): mat = np.zeros((len(rec_muts), len(site_muts))) for i, rec_mut in enumerate(sorted(rec_muts)): for j, site_mut in enumerate(sorted(site_muts)): pop, hist = results_dict[(rec_mut, site_mut)] mat[i, j] = mean([f(x) for x, fit in pop]) print i, j, mat[i, j] plt.subplot(subplot_dimension, subplot_dimension, idx) plt.imshow(mat, interpolation='none') plt.xticks(range(len(site_muts)), map(str, site_muts)) plt.yticks(range(len(rec_muts)), map(str, rec_muts)) #plt.yticks(rec_muts) plt.xlabel("site mutation rate") plt.ylabel("rec mutation rate") plt.colorbar() title = names[idx] plt.title(title) plt.show()
def make_thin(im): loaded = utils.load_image(im) utils.apply_to_each_pixel(loaded, lambda x: 0.0 if x > 10 else 1.0) print "loading phase done" t1 = [[1, 1, 1], [0, 1, 0], [0.1, 0.1, 0.1]] t2 = utils.transpose(t1) t3 = reverse(t1) t4 = utils.transpose(t3) t5 = [[0, 1, 0], [0.1, 1, 1], [0.1, 0.1, 0]] t7 = utils.transpose(t5) t6 = reverse(t7) t8 = reverse(t5) thinners = [t1, t2, t3, t4, t5, t6, t7] usage = True while(usage): usage = apply_all_structures(loaded, thinners) print "single thining phase done" print "thining done" utils.apply_to_each_pixel(loaded, lambda x: 255.0 * (1 - x)) utils.load_pixels(im, loaded) im.show()
def explore_coupling_const(iterations=1000000): """Given 3 state system, explore spin probabilities as function of coupling strength""" N = 10 x0 = [0] * N hs = [log(1000000)] * N def hamil(xs, J): return dot(xs, hs) + J * (xs[0] + sum([xi * xj for (xi, xj) in pairs(xs)])) Js = interpolate(-16, -8 + 1, 20) def proposal(xs): return [int(random.random() < 0.5) for i in range(N)] results = [] for J in Js: chain = mh(f=lambda xs: -hamil(xs, J), proposal=proposal, x0=x0, use_log=True, iterations=iterations) ps = map(mean, transpose(chain)) results.append((J, ps)) Js, pss = transpose(results) pss = transpose(pss) colors = "bgrcmyk" for i, ps in enumerate(pss): color = colors[i % len(colors)] plt.plot(Js, ps, marker="o", linestyle="", color=color) errs = [p + 1.96 * sqrt(p * (1 - p) / iterations) ** (i + 1) + p ** (i + 1) for p in pss[0]] print i, errs plt.plot(Js, [p ** (i + 1) for p in pss[0]]) # plt.errorbar(Js,[p**(i+1) for p in pss[0]],yerr=errs, # marker='',linestyle='--',color=color) plt.plot(Js, [1.0 / iterations for J in Js]) # plt.semilogy() return results
def plot_points(ps): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') xs,ys,zs = transpose([[1,0,0],[0,1,0],[0,0,1],[1,0,0]]) ax.plot(xs,ys,zs) pxs,pys,pzs = transpose(ps) ax.scatter(pxs,pys,pzs)
def downsample_signal(data, fs, target_fs): ''' This function downsamples the data in input Parameters ---------- data : np.ndarray Data to downsample. fs : int / float Sampling frequency of the data. target_fs : int / float Target sampling frequency of the data. Returns ------- y : np.ndarray Downsampled data. actual_fs : int Actual sample frequency. ''' decimation_ratio = np.round(fs / target_fs).astype('int') data = transpose(data, 'row') if fs < target_fs: raise ValueError("ERROR: fs < target_fs") else: try: y = decimate(data, decimation_ratio, 3, zero_phase=True) except: y = decimate(data, decimation_ratio, 3) actual_fs = fs / decimation_ratio return transpose(y, 'column'), actual_fs
def interpret_main_experiment(results_dict): mutation_rates, sigmas = map(lambda x: sorted(set(x)), transpose(results_dict.keys())) fs, names = transpose(named_fs) subplot_dimension = ceil(sqrt(len(fs))) for idx, f in enumerate(fs): mat = np.zeros((len(mutation_rates), len(sigmas))) for i, mutation_rate in enumerate(sorted(mutation_rates)): for j, sigma in enumerate(sorted(sigmas)): pop, hist, code = results_dict[(mutation_rate, sigma)] mean_fits = [row[1] for row in hist] stationary = is_stationary(mean_fits[len(mean_fits) / 2:]) if stationary: mat[i, j] = f(pop, hist, code) else: mat[i, j] = None print i, j, mat[i, j] plt.subplot(subplot_dimension, subplot_dimension, idx) plt.imshow(mat, interpolation='none') plt.xticks(range(len(sigmas)), map(str, sigmas)) plt.yticks(range(len(mutation_rates)), map(str, mutation_rates)) #plt.yticks(rec_muts) plt.xlabel("sigma") plt.ylabel("mutation rate") plt.colorbar() title = names[idx] plt.title(title) plt.show()
def encrypt_128(block, expanded_keys): """ Encrypts a single 16-byte block using AES 128. """ add_round_key(block, expanded_keys[0]) # performs AES encryption rounds on the block for i in range(9): sub_bytes(block) # transpose because columns are represented as rows in our # implementation block = transpose(block) shift_rows(block) block = transpose(block) block = mix_columns(block) add_round_key(block, expanded_keys[i + 1]) # performs the final round before returning the block sub_bytes(block) block = transpose(block) shift_rows(block) block = transpose(block) add_round_key(block, expanded_keys[len(expanded_keys) - 1]) return block
def plot_grad_descent(n): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') xs,ys,zs = transpose([[1,0,0],[0,1,0],[0,0,1],[1,0,0]]) ax.plot(xs,ys,zs) for i in tqdm(range(n)): ps = grad_descent(3,iterations=1000,eta=0.01) ax.plot(*transpose(ps))
def pmtm(data, NW=4, Fs=None, NFFT=None): ''' Compute the power spectrum via Multitapering. If the number of tapers == 1, it is a stft (short-time fourier transform) Parameters ---------- data : TYPE Input data vector. Fs : TYPE The sampling frequency. tapers : TYPE Matrix containing the discrete prolate spheroidal sequences (dpss). NFFT : TYPE Number of frequency points to evaluate the PSD at. Returns ------- Sk : TYPE Power spectrum computed via MTM. ''' # Number of channels if data.ndim == 1: data = np.expand_dims(data, axis=1) else: data = transpose(data, 'column') # Data length N = data.shape[0] channels = data.shape[1] if Fs == None: Fs = 2 * np.pi # set the NFFT if NFFT == None: NFFT = max(256, 2**nextpow2(N)) w = pmtm_params(Fs, NFFT) # Compute tapers tapers, concentration = dpss(N, NW, Kmax=2 * NW - 1, return_ratios=True) tapers = transpose(tapers, 'column') Sk = np.empty((NFFT, channels)) Sk[:] = np.NaN for channel in range(channels): # Compute the FFT Sk_complex = np.fft.fft( np.multiply(tapers.transpose(), data[:, channel]), NFFT) # Compute the whole power spectrum [Power] Sk[:, channel] = np.mean(abs(Sk_complex)**2, axis=0) return Sk_complex, Sk, w, NFFT
def plot_flattened_transport(n): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') xs,ys,zs = transpose([[1,0,0],[0,1,0],[0,0,1],[1,0,0]]) ax.plot(xs,ys,zs) q = project_to_simplex(np.array([1.0,1.0,1.0])) for i in range(n): p = simplex_sample(3) traj = map(project_to_simplex,circular_transport(p,q)) ax.plot(*transpose(traj))
def deringify_motif(motif): """if motif is constructed by mutating from ringer, transpose bases randomly in order to debias it.""" def ringify_col(col): perm = {a: b for (a, b) in zip("ACGT", permute("ACGT"))} return [perm[c] for c in col] return [ "".join(row) for row in transpose(map(ringify_col, transpose(motif))) ]
def analyze_column_frequencies(): """Do columnwise frequencies reveal stable patterns that could be explained by amino acid preferences?""" def dna_freqs(xs): return [xs.count(b)/float(len(xs)) for b in "ACGT"] all_freqs = concat([map(dna_freqs,transpose(getattr(tfdf_obj,tf))) for tf in tfdf_obj.tfs]) for k,(i,j) in enumerate(choose2(range(4))): plt.subplot(4,4,k) cols = transpose(all_freqs) plt.scatter(cols[i],cols[j])
def test_Board_makeMove(self): b = Board() self.assertEqual(utils.transpose(b.state)[5], [0, 0, 0, 0, 0, 0, 0]) b.make_move(1) self.assertEqual(utils.transpose(b.state)[5], [0, 1, 0, 0, 0, 0, 0]) b.make_move(1) self.assertEqual(utils.transpose(b.state)[4], [0, 2, 0, 0, 0, 0, 0]) b.make_move(6) self.assertEqual(utils.transpose(b.state)[5], [0, 1, 0, 0, 0, 0, 1]) b.make_move(5) self.assertEqual(utils.transpose(b.state)[5], [0, 1, 0, 0, 0, 2, 1])
def forward(self, U, sigma_u, proxy=False): A = self.A.detach() + torch.ger(self.rescale(self.u), self.rescale(self.v)) B = self.B.detach() C = self.C.detach() D = self.D.detach() U = transpose(U) X = self.f.apply(A, B, self.X0, U) Y = transpose(C @ X + D @ U) sigma = self.perturb(A, B, C, D, sigma_u, proxy=proxy) return Y, sigma#torch.zeros_like(sigma)#sigma
def forward(self, U, sigma_u, proxy=False): self.A_p.data.copy_(self.A_p.triu(1)) A = self.A.detach() + self.A_p B = self.B.detach() C = self.C.detach() D = self.D.detach() U = transpose(U) X = self.f.apply(A, B, self.X0, U) Y = transpose(C @ X + D @ U) sigma = self.perturb(A, B, C, D, sigma_u, proxy=proxy) return Y, sigma#torch.zeros_like(sigma)#sigma
def matrix_from_motif(seqs,pc=1): cols = transpose(seqs) N = float(len(seqs)) raw_mat = [[-log((col.count(b)+pc)/(N+4*pc)) for b in "ACGT"] for col in cols] # now normalize each column by the average value avg = mean(map(mean,raw_mat)) return [[x-avg for x in row] for row in raw_mat]
def interpret_main_experiment(results_dict,f=None): site_muts,rec_muts = map(lambda x:sorted(set(x)),transpose(results_dict.keys())) for idx in range(1,7+1): if idx == 6: f = recognizer_non_linearity elif idx == 7: f = motif_non_linearity mat = np.zeros((len(site_muts),len(rec_muts))) for i,site_mut in enumerate(sorted(site_muts)): for j,rec_mut in enumerate(sorted(rec_muts)): pop,hist = results_dict[(site_mut,rec_mut)] if f is None: last = hist[-1] mat[i,j] = last[idx] print i,j,site_mut,rec_mut,mat[i,j] else: mat[i,j] = mean([f(x) for x,fit in pop]) print i,j,mat[i,j] plt.subplot(3,3,idx) plt.imshow(mat,interpolation='none') plt.xticks(range(len(rec_muts)),map(str,rec_muts)) plt.yticks(range(len(site_muts)),map(str,site_muts)) #plt.yticks(rec_muts) plt.xlabel("rec mutation rate") plt.ylabel("site mutation rate") plt.colorbar() title = "turn f mean_fits mean_dna_ic mean_rec mean_recced rec_nonlinearity motif_nonlinearity".split()[idx] plt.title(title) plt.show()
def update(self): """Updates all derived attributes.""" console.log('updating scale ' + self.root.note + ' ' + self.scale) self.updateNotes() scaleNameStr = '{0} {1}'.format(self.root.note, self.scale) self.chords = Tonal.Scale.chords(scaleNameStr) self.all_chords = {} modes = Tonal.Scale.modeNames(scaleNameStr) # TODO: For blues, we only get 2 alternate modes. # TODO: It's possible to use Tonal.Detect.chords, but result is pretty fuzzy for mode in modes: rootNote = mode[0] chordsForNote = Tonal.Scale.chords('{0} {1}'.format( rootNote, mode[1])) if self.config.simple_chords: chordsForNote = chordsForNote.filter( lambda c: constants.CHORDS_SET.has(c)) self.all_chords[rootNote] = chordsForNote # Construct transposed array for available chords for each note in scale. arrays = [] for note in self.all_notes: if note.note in self.all_chords: arrays.append(self.all_chords[note.note]) else: # Empty list of chords for notes that are not in scale. arrays.append([]) self.all_chords_transposed = utils.transpose(arrays)
def partial_sobels(im): ySobel = im.filter( ImageFilter.Kernel((3, 3), utils.flatten(sobelOperator), 1)) xSobel = im.filter( ImageFilter.Kernel((3, 3), utils.flatten(utils.transpose(sobelOperator)), 1)) return (xSobel, ySobel)
def partial_sobels(im): ySobel = tuple( im.filter(ImageFilter.Kernel((3, 3), flatten(sobelOperator), 1))) xSobel = tuple( im.filter( ImageFilter.Kernel((3, 3), flatten(transpose(sobelOperator)), 1))) return (xSobel, ySobel)
def read_freq_from_log(log_str): freq_sets = [] for i in gaussian_match.LOG_HARMONIC_MATCH.finditer(log_str): i = i.groupdict()['info'] freqs = utils.flatten([ list(map(float, (j.groupdict())['freqs'].split())) for j in gaussian_match.LOG_FREQ_MATCH.finditer(i) ]) red_masses = utils.flatten([ list(map(float, (j.groupdict())['masses'].split())) for j in gaussian_match.LOG_FREQ_RED_MASS_MATCH.finditer(i) ]) freq_consts = utils.flatten([ list(map(float, (j.groupdict())['consts'].split())) for j in gaussian_match.LOG_FREQ_CONST_MATCH.finditer(i) ]) ir_inten = utils.flatten([ list(map(float, (j.groupdict())['inten'].split())) for j in gaussian_match.LOG_IR_INTEN_MATCH.finditer(i) ]) assert (len(freqs) == len(red_masses)) assert (len(freqs) == len(freq_consts)) assert (len(freqs) == len(ir_inten)) freq_sets.append( utils.transpose([freqs, red_masses, freq_consts, ir_inten])) return freq_sets
def plot_main_experiment_trajectories(results): for k, (pop, hist) in results.items(): print k traj = transpose(hist)[1] ou_param_recovery(traj) plt.plot(traj) plt.show()
def distribution(mels, keys): maj_pc_counts = [0] * 12 min_pc_counts = [0] * 12 maj_total_notes = 0 min_total_notes = 0 transposed_mels = utils.transpose(mels, keys) for mel, key in zip(transposed_mels, keys): for pitch in mel: pc = (int(pitch) % 12) if (pc in important) == False: continue if key[1] == 1: maj_pc_counts[pc] += 1 maj_total_notes += 1 else: min_pc_counts[pc] += 1 min_total_notes += 1 maj_distr = [(p * 1.0) / maj_total_notes for p in maj_pc_counts] min_distr = [(p * 1.0) / min_total_notes for p in min_pc_counts] #print(maj_distr) #print("\n") #print(min_distr) #quit() return maj_distr, min_distr
def uniform_motif_accept_reject(n, L, desired_ic, epsilon=0.1, beta=None, ps=None, count_sampler=None, verbose=False): print "uniform motif accept reject:", n, L, desired_ic, beta correction_per_col = 3 / (2 * log(2) * n) desired_ic_for_beta = desired_ic + L * correction_per_col if desired_ic_for_beta == 2 * L: # if we reach the upper limit, things break down cols = [sample_col_from_count((0, 0, 0, n)) for _ in range(L)] motif_p = map(lambda site: "".join(site), transpose(cols)) return motif_p if beta is None: beta = find_beta_for_mean_motif_ic(n, L, desired_ic_for_beta) if verbose: print "beta:", beta if ps is None: ps = count_ps_from_beta(n, beta) if count_sampler is None: count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps) def rQ_raw(): counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count) for count in counts] motif_p = map(lambda site: "".join(site), transpose(cols)) return motif_p def rQ(): return sample_until(lambda M: inrange(M, desired_ic, epsilon), rQ_raw, 1, progress_bar=False)[0] def dQhat(motif): return exp(beta * motif_ic(motif)) Imin = desired_ic - epsilon Imax = desired_ic + epsilon log_M = -beta * Imin if verbose: print "Imin, Imax, log_M:", Imin, Imax, log_M def dQ(motif): return exp(beta * motif_ic(motif) + log_M) def AR(motif): return 1.0 / dQ(motif) #M = exp(-beta*(desired_ic - epsilon)) # which ic? +/- correction trials = 0 while True: trials += 1 motif = rQ() r = random.random() if r < AR(motif): return motif if verbose and trials % 100 == 0: print trials, AR(motif)
def plot_main_experiment_trajectories(results): for k,(pop,hist) in results.items(): print k traj = transpose(hist)[1] ou_param_recovery(traj) plt.plot(traj) plt.show()
def test_estimate_stationary_statistic_ref_framework(): matrix = make_pssm(Escherichia_coli.LexA) n = len(Escherichia_coli.LexA) Nes = np.linspace(1,5,10) pred,obs = transpose([test_estimate_stationary_statistic_ref(matrix,n,Ne,T=motif_ic) for Ne in Nes]) plt.plot(Nes,pred) plt.plot(Nes,obs) return pred,obs
def interpret_trajectories(results_dict): for k, (pop, hist, code) in results_dict.items(): print k sel_fits, mean_fits = transpose(hist[:100000]) plt.plot(np.array(mean_fits) / mean_fits[0], label=k) plt.legend() plt.semilogy() plt.show()
def fourier_check3(): L = 2 K = int(4**L) ps = np.array(simplex_sample(K)) Lap = norm_laplacian(L) lambdas, V = np.linalg.eig(norm_laplacian(L)) ps_hat = fourier(ps) print L1(ps, sum(ph*np.array(v) for ph,v in zip(ps_hat,transpose(V))))
def test_plot(): import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D xs,ys,zs = map(concat,transpose(main_example())) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.plot(xs,ys,zs) plt.show()
def __init__(self, reactions, init_state, species_names): self.stoich_vectors, self.propensities = transpose(reactions) self.state = init_state self.time = 0 self.history = [(self.time, self.state)] self.verbose = False self.species_names = species_names self.reactions_performed = 0 self.finished_run = False
def detect_chunks(motif): L = len(motif[0]) col_ics = map(lambda col: 2 - entropy(col, correct=True, alphabet_size=4), transpose(motif)) f = cluster2(col_ics) profiles = [f(col_ic) > 0.5 for col_ic in col_ics] transitions = [profiles[0]] + [x != y for (x, y) in pairs(profiles) ] + [profiles[-1]] return sum(transitions) / 2.0
def weighted_regress_dep(xs,ys, sample_points=100): regress_points = [] avg_yval = mean(map(abs,ys)) ws = [exp(-abs(y)/avg_yval) for y in ys] for i in xrange(sample_points): rx,ry = inverse_cdf_sample(zip(xs,ys),ws,normalized=False) regress_points.append((rx,ry)) rxs,rys = transpose(regress_points) return (polyfit(rxs,rys,1))
def payoff(self): # unpack the two players player1, player2 = self.players # generate a payoff pair for each game iteration payoffs = (self.payoffmat[m1][m2] for (m1, m2) in self.history) # transpose to get a payoff sequence for each player pay1, pay2 = transpose(payoffs) # return a mapping of each player to its mean payoff return {player1: mean(pay1), player2: mean(pay2)}
def make_pssm(binding_sites): """ Return the PSSM as a list of dictionaries of the form: [{A:a_val,...,T:t_val}] """ cols = transpose(binding_sites) n = float(len(binding_sites)) return [{b: log2(((col.count(b) + 1) / (n + 4)) / 0.25) for b in "ACGT"} for col in cols]
def toTableaux(oneUtilities, twoUtilities): assert len(oneUtilities) == len(twoUtilities), 'The tables must be of same shape' assert len(oneUtilities[0]) == len(twoUtilities[0]), 'The tables must be of same shape' X = [utils.scalarProd(p, -1.0) + [0.0] * len(twoUtilities[0]) for p in utils.transpose(twoUtilities)] Y = [[0.0] * len(oneUtilities) + utils.scalarProd(p, -1.0) for p in oneUtilities] return X, Y
def as_qcircuit(self, C=None, R=None): r""" Typesets this circuit using the `Qcircuit`_ package for :math:`\text{\LaTeX}`. :param float C: Width (in ems) of each column. :param float R: Height (in ems) of each column. :rtype: :obj:`str` :returns: A string containing :math:`\text{\LaTeX}` source code for use with `Qcircuit`_. .. _Qcircuit: http://www.cquic.org/Qcircuit/ """ trans_cells = [] for timestep in self.group_by_time(): col = [r'\qw'] * self.nq # If nothing else, place a \qw. hidden_qubits = set() for loc in timestep: if any(qubit in hidden_qubits for qubit in range(min(loc.qubits), max(loc.qubits) + 1)): # A qubit is hidden, so append and reset. trans_cells.append(col) col = [r'\qw'] * self.nq # If nothing else, place a \qw. hidden_qubits = set() if loc.wt == 1: col[loc.qubits[0]] = r"\gate{{{0}}}".format( loc.kind if loc.kind != "I" else r"\id") elif loc.kind == 'CNOT': col[loc.qubits[0]] = r'\ctrl{{{0}}}'.format(loc.qubits[1] - loc.qubits[0]) col[loc.qubits[1]] = r'\targ' else: raise NotImplementedError( "Location kind {0.kind} not supported by this method.". format(loc)) hidden_qubits.update( range(min(loc.qubits), max(loc.qubits) + 1)) trans_cells.append(col) cells = u.transpose([[''] * self.nq] + trans_cells + [[r'\qw'] * self.nq]) return r""" \Qcircuit {C} {R} {{ {0} }} """.format(u.latex_array_contents(cells), C="@C{}em".format(C) if C is not None else "", R="@R{}em".format(R) if R is not None else "")
def prok_model_comparison(): sys.path.append("/home/pat/motifs") from parse_tfbs_data import tfdf prok_motifs = [getattr(tfdf, tf) for tf in tfdf.tfs] prok_comps = [model_comparison(motif) for motif in tqdm(prok_motifs)] pw_bics, li_bics = transpose(prok_comps) scatter(li_bics, pw_bics) plt.xlabel("Linear BIC") plt.ylabel("Pairwise BIC") plt.loglog()
def make_pssm(binding_sites): """ Return the PSSM as a list of dictionaries of the form: [{A:a_val,...,T:t_val}] """ cols = transpose(binding_sites) n = float(len(binding_sites)) return [{b:log2(((col.count(b)+1)/(n+4))/0.25) for b in "ACGT"} for col in cols]
def analyze_prodoric_collection_for_mi(): for tf in Escherichia_coli.tfs: motif = getattr(Escherichia_coli,tf) cols = transpose(motif) n,L = motif_dimensions(motif) corrs = motif_corr(motif) if corrs: print tf,n,L,[((i,j),p,mi(cols[i],cols[j])) for (i,j),p in corrs] else: print tf,n,L
def motif_corr(motif,n=1000): """find correlated columns in motif, correcting for multiple hypothesis testing""" ps = [mi_permute(col1,col2,p_value=True,n=n,mi_method=lambda xs,ys:mi(xs,ys,correct=False)) for (col1,col2) in (choose2(transpose(motif)))] q = fdr(ps) if q is None: return None else: L = len(motif[0]) return [((i,j),p) for (i,j),p in zip(choose2(range(L)),ps) if p <= q]
def viz_sample(sample,filename=None): """Visualize a sample trajectory""" plt.subplot(211) plt.imshow(transpose(sample),interpolation='nearest',aspect='auto') plt.ylabel("Position") plt.subplot(212) energies = map(hamiltonian,sample) plt.plot(energies) plt.ylabel("Energy") plt.xlabel("Iteration") maybesave(filename)
def multiple_ising(hs, J, iterations=50000, replicas=3, method=ising, burn_in=0): occ_list = [] for i in range(replicas): print "replica ", i occ_list.append(method(hs, J, iterations)[burn_in:]) # occ_list = [ising(hs,J,iterations) for i in range(replicas)] cols = [[(s + 1) / 2 for s in col] for col in transpose(occ_list)] means = map(mean, cols) sds = map(sd, cols) cis = [1.96 * s for s in sds] plt.errorbar(range(len(cols)), means, yerr=cis)
def make_plot(mus_ks, control_mus_ks, approximate_mus_ks, control_approximate_mus_ks, copy_number, outfile): plt.plot(*transpose([(k,mu) for (mu,k) in mus_ks]),label=r"$\mu$") plt.plot(*transpose([(k,mu) for (mu,k) in control_mus_ks]),label=r"Control $\mu$") plt.plot(*transpose([(k,mu) for (mu,k) in approximate_mus_ks]),label=r"$\hat\mu$") plt.plot(*transpose([(k,mu) for (mu,k) in control_approximate_mus_ks]), label=r"Control $\hat\mu$") if copy_number: plt.plot([copy_number,copy_number],[0,50],label="Copy number",linestyle="--") plt.xlabel("Copy number") plt.ylabel("Chemical potential + Const. (kBT)") plt.semilogx() plt.xlim(1,10**6) plt.title("Copy number vs. Chemical Potential") plt.legend(loc='upper left') plt.savefig(outfile,dpi=400) plt.close()
def get_payoffs(self): """ Return payoff received for both players """ player1, player2 = self.players # unpack the two players # generate a payoff pair for each game iteration in history payoffs = (self.payoffmat[m1][m2] for (m1,m2) in self.history) pay1, pay2 = transpose(payoffs) # transpose to get a payoff sequence for each player return { player1:mean(pay1), player2:mean(pay2) } # return a mapping of each player to its mean payoff
def analyze_all_pvals_at_once(org_obj=Escherichia_coli): """conclusion: fdr-adjusted p-values identify 25 significantly correlated column-pairs in 3753 pairwise tests (0.5%). """ ps = [mi_permute(col1,col2,p_value=True,n=1000,mi_method=lambda xs,ys:mi(xs,ys,correct=False)) for tf in tqdm(org_obj.tfs) for (col1,col2) in (choose2(transpose(getattr(org_obj,tf))))] q_bh = fdr(ps) q_bhy = bhy(ps) print "bh procedure: %s/%s" % (len(filter(lambda p:p <= q_bh,ps)),len(ps)) print "bhy procedure: %s/%s" % (len(filter(lambda p:p <= q_bhy,ps)),len(ps)) return ps
def get_pairwise_freqs(motif, pc=1/16.0): cols = transpose(motif) L = len(cols) N = len(motif) fs = [{(b1, b2):0 for (b1,b2) in dinucs} for _ in range(int(choose(L,2)))] for f, (col1, col2) in zip(fs, choose2(cols)): for b1, b2 in zip(col1, col2): f[b1, b2] += 1 for b1, b2 in dinucs: f[b1, b2] += pc f[b1, b2] /= float(N + 16*pc) return fs
def plot_hist(hist,show=True,labels=True): transposed_hist = transpose(hist) #hist.append((turn,f,mean_fits,mean_dna_ic,mean_rec,mean_recced)) plt.plot(transposed_hist[0],transposed_hist[1],label="sampled fitness"*labels,color='b') plt.plot(transposed_hist[0],transposed_hist[2],label="mean fitness"*labels,color='g') plt.plot(transposed_hist[0],transposed_hist[3],label="mean motif ic"*labels,color='r') plt.plot(transposed_hist[0],transposed_hist[4],label="rec prom"*labels,color='y') plt.plot(transposed_hist[0],transposed_hist[5],label="sites recced"*labels,color='m') #plt.semilogy() if labels: plt.legend() if show: plt.show()
def benchmark_cat(): """Compute correlation with expression for the CDC method of Zhang BMC Bioinformatics 2012""" for org in validation_orgs: print org try: gbk_filename = get_genome_filename(org,'gbk') genome = get_genome(org) cdss = get_cdss(genome) ncid = org2nc_id(org) cat_filename = os.path.join("index_results",ncid+"_CAT",ncid+".cat") with open(cat_filename) as f: lines = [line.split("\t") for line in f.readlines()[1:]] labels,cdcs = transpose([(fields[0],fields[10]) for fields in lines]) matches = [re.search(r":(c?)(\d+)-(\d+)",label).groups() for label in labels] locations = [((int(start),int(stop)) if c == '' else (int(stop) - 1,int(start))) for (c,start,stop) in matches] cat_dict = {location:float(cdc) for location,cdc in zip(locations,cdcs)} org_exp_dict = master_exp_dict[org] # a dictionary of form {(start,stop):[locus tags]} location2lt = {(feature.location.start+1,feature.location.end): feature.qualifiers['locus_tag'][0] for feature in genome.features if ('locus_tag' in feature.qualifiers)} correlates = [(cdc,org_exp_dict[location2lt[location]]) for location,cdc in cat_dict.items() if location in location2lt and location2lt[location] in org_exp_dict] cdcs,exps = transpose(correlates) rhos = [spearmanr(cdcs,map(lambda xs:xs[i],exps))[0] for i in range(len(exps[0]))] print "num correlates:",len(correlates) print "Correlation:",org,mean(rhos),se(rhos) except: print "Failed on:",org
def plot_mono_vs_di_likelihood(ll_dict = None): if ll_dict is None: ll_dict = likelihood_dict() normed_dict = {tf:tuple(map(lambda x:x/float(len(getattr(Escherichia_coli,tf))*len(getattr(Escherichia_coli,tf)[0])),(mono,di))) for (tf,(mono,di)) in ll_dict.items()} plt.scatter(*transpose(ll_dict.values())) for (tf,(mono,di)) in ll_dict.items(): sites = getattr(Escherichia_coli,tf) text = "%s\n#:%s\nw:%s\nIC:%1.2f" % (tf,len(sites),len(sites[0]),motif_ic(sites)) plt.annotate(text,(mono,di)) min_val = min(concat(ll_dict.values())) max_val = max(concat(ll_dict.values())) plt.xlabel("Mono LL") plt.ylabel("Di LL") plt.plot([min_val,max_val],[min_val,max_val],linestyle="--")
def analyze_motif(motif, trials=1000): cols = transpose(motif) L = len(cols) ps = [] for col1, col2 in (choose2(cols)): actual_mi = dna_mi(col1,col2) perm_mis = [dna_mi(col1,permute(col2)) for i in xrange(trials)] p = percentile(actual_mi, perm_mis) #print p ps.append(p) q = fdr(ps) correlated_pairs = [(i,j) for (i,j),p in zip(choose2(range(L)),ps) if p < q] num_correlated = len(correlated_pairs) print "correlated column pairs:", num_correlated, "%1.2f" % ((num_correlated)/choose(L,2)) return correlated_pairs
def main_example(): sequence="""CGAAAAAACGCGAAAAAACGCGAAAAAACGCGAAAAAACGCGAAAAAACGCG AAAAAACGCGAAAAAACGCGAAAAAACGCGAAAAAACGCGAAAAAACGCGAAAAAACGCGAAAAAA CGCGAAAAAACGCGAAAAAACG""".replace("\n","") lookup = functionify_model(aawedge) rise = 3.38 #Angstroms, from Gohlke b0 = transpose([[0,0,0]]) v0 = transpose([[0,0,rise]]) bs = [b0] vs = [v0] for pair in pairs(sequence): dinuc = "".join(pair) params = lookup(dinuc) ro, ti, tw = params["roll"],params["tilt"],params["twist"] last_b = bs[-1] last_v = vs[-1] new_v = reduce(matrix_mult,[roll_matrix(ro), tilt_matrix(ti), twist_matrix(tw), last_v]) new_b = matrix_add(last_b,new_v) bs.append(new_b) vs.append(new_v) return bs
def as_qcircuit(self, C=None, R=None): r""" Typesets this circuit using the `Qcircuit`_ package for :math:`\text{\LaTeX}`. :param float C: Width (in ems) of each column. :param float R: Height (in ems) of each column. :rtype: :obj:`str` :returns: A string containing :math:`\text{\LaTeX}` source code for use with `Qcircuit`_. .. _Qcircuit: http://www.cquic.org/Qcircuit/ """ trans_cells = [] for timestep in self.group_by_time(): col = [r'\qw'] * self.nq # If nothing else, place a \qw. hidden_qubits = set() for loc in timestep: if any(qubit in hidden_qubits for qubit in range(min(loc.qubits), max(loc.qubits)+1)): # A qubit is hidden, so append and reset. trans_cells.append(col) col = [r'\qw'] * self.nq # If nothing else, place a \qw. hidden_qubits = set() if loc.wt == 1: col[loc.qubits[0]] = r"\gate{{{0}}}".format(loc.kind if loc.kind != "I" else r"\id") elif loc.kind == 'CNOT': col[loc.qubits[0]] = r'\ctrl{{{0}}}'.format(loc.qubits[1] - loc.qubits[0]) col[loc.qubits[1]] = r'\targ' else: raise NotImplementedError("Location kind {0.kind} not supported by this method.".format(loc)) hidden_qubits.update(range(min(loc.qubits), max(loc.qubits)+1)) trans_cells.append(col) cells = u.transpose([[''] * self.nq] + trans_cells + [[r'\qw'] * self.nq]) return r""" \Qcircuit {C} {R} {{ {0} }} """.format(u.latex_array_contents(cells), C="@C{}em".format(C) if C is not None else "", R="@R{}em".format(R) if R is not None else "" )
def kmeans(xs,k=2): centroids = [simplex_sample(4) for i in range(k)] old_within_ss = 10**300 while True: # assign to clusters clusters = [[] for i in range(k)] for x in xs: idx = argmin([l2(x,centroid) for centroid in centroids]) clusters[idx].append(x) # recompute centroids centroids = [map(mean,transpose(cluster)) for cluster in clusters] cur_within_ss = sum([sum((l2(x,centroid)**2 for x in cluster)) for centroid,cluster in zip(centroids,clusters)]) print cur_within_ss if cur_within_ss == old_within_ss: break else: old_within_ss = cur_within_ss return clusters,centroids,within_ss
def analyze_composition_of_correlated_columns(obj,ps): p_idx = 0 cor_adj_counts = defaultdict(int) cor_nonadj_counts = defaultdict(int) uncor_counts = defaultdict(int) fdr_cutoff = 0 for tf in obj.tfs: motif = getattr(obj,tf) cols = transpose(motif) for (i,col1),(j,col2) in choose2(list(enumerate(cols))): if ps[p_idx] <= 0: print tf,i,j for pair in zip(cols[i],cols[j]): if i + 1 == j: cor_adj_counts[pair] += 1 else: cor_nonadj_counts[pair] += 1 #print mi_table(col1,col2) else: for pair in zip(cols[i],cols[j]): uncor_counts[pair] += 1 p_idx += 1 cor_adj_N = float(sum(cor_adj_counts.values())) cor_nonadj_N = float(sum(cor_nonadj_counts.values())) uncor_N = float(sum(uncor_counts.values())) # all_N = float(sum(all_counts.values())) # print "---" # for b1,b2 in sorted(counts.keys()): # # print b1,b2,"freq:",fmt(counts[(b1,b2)]/N),"background:",fmt(all_counts[(b1,b2)]/all_N),"OR:",fmt(counts[(b1,b2)]/N/(all_counts[(b1,b2)]/all_N)),p print "bases, adj, nonadj, noncor | adj freq, nonadj freq | noncor freq| adj OR, nonadj OR" # XXX split into adj_uncor, nonadj_uncor for b1,b2 in sorted(cor_adj_counts.keys()): cor_adj_freq = fmt(cor_adj_counts[(b1,b2)]/cor_adj_N) cor_nonadj_freq = fmt(cor_nonadj_counts[(b1,b2)]/cor_nonadj_N) uncor_freq = fmt(uncor_counts[(b1,b2)]/uncor_N) cor_adj_OR = fmt(cor_adj_freq/uncor_freq) cor_nonadj_OR = fmt(cor_nonadj_freq/uncor_freq) _,adj_p,_,_ = stats.chi2_contingency(np.array([[uncor_N,uncor_counts[(b1,b2)]], [cor_adj_N,cor_adj_counts[(b1,b2)]]])) _,non_adj_p,_,_ = stats.chi2_contingency(np.array([[uncor_N,uncor_counts[(b1,b2)]], [cor_nonadj_N,cor_nonadj_counts[(b1,b2)]]])) print b1,b2,cor_adj_counts[b1,b2],cor_nonadj_counts[b1,b2],uncor_counts[b1,b2],"|",cor_adj_freq,cor_nonadj_freq,"|",uncor_freq,"|",cor_adj_OR, significance(adj_p),cor_nonadj_OR,significance(non_adj_p) return cor_adj_counts, cor_nonadj_counts, uncor_counts