def finalize(self, sim, *args, **kwargs): self.results['cum_tested'].values = pl.cumsum( self.results['n_tested'].values) self.results['cum_diagnosed'].values = pl.cumsum( self.results['n_diagnosed'].values) sim.results.update(self.results) return
def drawAngleDistribution(filename): try: f = open(filename,'r') except IOError: return data = json.load(f) bounds = data["bin_boundaries"] bounds90 = [b-90 for b in bounds] bin_widths = [y-x for x,y in zip(bounds[:-1],bounds[1:])] weights = data["bin_weights"] total = sum(weights) densities = [weight/width for weight,width in zip(weights,bin_widths)] fractions = [density/total for density in densities] cumul_densities = pl.cumsum(densities) cumul_fractions = pl.cumsum(fractions) new_filename = fp.replace_extension(fp.down_folder(filename,"plots"),".png") new_filename = fp.down_folder(new_filename,"angles") drawAngleDistributionSub(fp.append_to_filename(new_filename,"_cf"), bounds90,cumul_fractions, "Cumulative angle distribution", "Angle to plane","Cumulative probability density") drawAngleDistributionSub(fp.append_to_filename(new_filename,"_cd"), bounds90,cumul_densities, "Cumulative angle histogram", "Angle to plane","Cumulative length per degree") drawAngleDistributionSub(fp.append_to_filename(new_filename,"_f"), bounds90,fractions, "Angle distribution", "Angle to plane","Probability density") drawAngleDistributionSub(fp.append_to_filename(new_filename,"_d"), bounds90,densities, "Angle histogram", "Angle to plane","Length per degree")
def finalize(self, verbose=None): self.results['cum_exposed'].values = pl.cumsum( self.results['new_infections'].values) + self[ 'n_infected'] # Include initially infected people self.results['cum_tested'].values = pl.cumsum( self.results['new_tests'].values) self.results['cum_diagnosed'].values = pl.cumsum( self.results['new_diagnoses'].values) self.results['cum_deaths'].values = pl.cumsum( self.results['new_deaths'].values) self.results['cum_recoveries'].values = pl.cumsum( self.results['new_recoveries'].values) # Add in the results from the interventions for intervention in self['interventions']: intervention.finalize(self) # Execute any post-processing # Scale the results for reskey in self.reskeys: if self.results[reskey].scale: self.results[reskey].values *= self['scale'] # Perform calculations on results self.compute_doubling() self.compute_r_eff() self.likelihood() # Convert to an odict to allow e.g. sim.people[25] later, and results to an objdict to allow e.g. sim.results.diagnoses self.people = sc.odict(self.people) self.results = sc.objdict(self.results) self.results_ready = True return
def finalize(self, sim, *args, **kwargs): self.results['cum_tested'] = cv.Result( 'Cumulative number tested', values=pl.cumsum(self.results['n_tested'].values)) self.results['cum_diagnosed'] = cv.Result( 'Cumulative number diagnosed', values=pl.cumsum(self.results['n_diagnosed'].values)) sim.results.update(self.results) return
def simulate_age_group_data(N=50, delta_true=150, pi_true=true_rate_function): """ generate simulated data """ # start with a simple model with N rows of data model = data_simulation.simple_model(N) # record the true age-specific rates model.ages = pl.arange(0, 101, 1) model.pi_age_true = pi_true(model.ages) # choose age groups randomly age_width = mc.runiform(1, 100, size=N) age_mid = mc.runiform(age_width / 2, 100 - age_width / 2, size=N) age_width[:10] = 10 age_mid[:10] = pl.arange(5, 105, 10) #age_width[10:20] = 10 #age_mid[10:20] = pl.arange(5, 105, 10) age_start = pl.array(age_mid - age_width / 2, dtype=int) age_end = pl.array(age_mid + age_width / 2, dtype=int) model.input_data['age_start'] = age_start model.input_data['age_end'] = age_end # choose effective sample size uniformly at random n = mc.runiform(100, 10000, size=N) model.input_data['effective_sample_size'] = n # integrate true age-specific rate across age groups to find true group rate model.input_data['true'] = pl.nan model.input_data['age_weights'] = '' for i in range(N): beta = mc.rnormal(0., .025**-2) # TODO: clean this up, it is computing more than is necessary age_weights = pl.exp(beta * model.ages) sum_pi_wt = pl.cumsum(model.pi_age_true * age_weights) sum_wt = pl.cumsum(age_weights) p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start]) model.input_data.ix[i, 'true'] = p[i] model.input_data.ix[i, 'age_weights'] = ';'.join( ['%.4f' % w for w in age_weights[age_start[i]:(age_end[i] + 1)]]) # sample observed rate values from negative binomial distribution model.input_data['value'] = mc.rnegative_binomial( n * model.input_data['true'], delta_true) / n print model.input_data.drop(['standard_error', 'upper_ci', 'lower_ci'], axis=1) return model
def simulate_age_group_data(N=50, delta_true=150, pi_true=true_rate_function): """ generate simulated data """ # start with a simple model with N rows of data model = data_simulation.simple_model(N) # record the true age-specific rates model.ages = pl.arange(0, 101, 1) model.pi_age_true = pi_true(model.ages) # choose age groups randomly age_width = mc.runiform(1, 100, size=N) age_mid = mc.runiform(age_width/2, 100-age_width/2, size=N) age_width[:10] = 10 age_mid[:10] = pl.arange(5, 105, 10) #age_width[10:20] = 10 #age_mid[10:20] = pl.arange(5, 105, 10) age_start = pl.array(age_mid - age_width/2, dtype=int) age_end = pl.array(age_mid + age_width/2, dtype=int) model.input_data['age_start'] = age_start model.input_data['age_end'] = age_end # choose effective sample size uniformly at random n = mc.runiform(100, 10000, size=N) model.input_data['effective_sample_size'] = n # integrate true age-specific rate across age groups to find true group rate model.input_data['true'] = pl.nan model.input_data['age_weights'] = '' for i in range(N): beta = mc.rnormal(0., .025**-2) # TODO: clean this up, it is computing more than is necessary age_weights = pl.exp(beta*model.ages) sum_pi_wt = pl.cumsum(model.pi_age_true*age_weights) sum_wt = pl.cumsum(age_weights) p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start]) model.input_data.ix[i, 'true'] = p[i] model.input_data.ix[i, 'age_weights'] = ';'.join(['%.4f'%w for w in age_weights[age_start[i]:(age_end[i]+1)]]) # sample observed rate values from negative binomial distribution model.input_data['value'] = mc.rnegative_binomial(n*model.input_data['true'], delta_true) / n print model.input_data.drop(['standard_error', 'upper_ci', 'lower_ci'], axis=1) return model
def random_walk(steps): thetas = n.random.uniform(0, 2*p.pi, steps) real = [n.cos(t) for t in thetas] imag = [n.sin(t) for t in thetas] xs = p.cumsum(real) ys = p.cumsum(imag) ell = [] for i in xrange(steps): ell.append([xs[i], ys[i]]) return n.array(ell)
def synthbeats(duration, meanhr=60, stdhr=1, samplingfreq=250, sinfreq=None): #Minimaly based on the parameters from: #http://physionet.cps.unizar.es/physiotools/ecgsyn/Matlab/ecgsyn.m #If freq exist it will be used to generate a sin instead of using rand #Inputs: duration in seconds #Returns: signal, peaks t = np.arange(duration * samplingfreq) / float(samplingfreq) signal = np.zeros(len(t)) print(len(t)) print(len(signal)) if sinfreq == None: npeaks = 1.2 * (duration * meanhr / 60) # add 20% more beats for some cummulative error hr = pl.randn(int(npeaks)) * stdhr + meanhr peaks = pl.cumsum(60. / hr) * samplingfreq peaks = peaks.astype('int') peaks = peaks[peaks < t[-1] * samplingfreq] else: hr = meanhr + sin(2 * pi * t * sinfreq) * float(stdhr) index = int(60. / hr[0] * samplingfreq) peaks = [] while index < len(t): peaks += [index] index += int(60. / hr[index] * samplingfreq) signal[peaks] = 1.0 return t, signal, peaks
def place_axes ( fig, x,y, fieldwidths, fieldheights, showy, margins, wh, twiny=False ): w,h = wh fieldwidths = pl.array ( fieldwidths, 'd' ) fieldheights = pl.array ( fieldheights, 'd' ) cwidths = pl.concatenate ( [[0],pl.cumsum ( fieldwidths )] ) ax = [] for i in xrange (len(fieldwidths)): xpos = float(x+cwidths[i]+margins[i])/w ypos = (y+1.)/h width = float(fieldwidths[i]-2*margins[i])/w heigh = (fieldheights[i]-2)/h if heigh>0: yax = showy[i] if len ( ax )>0 and twiny: ax.append ( graphics.prepare_axes ( fig.add_axes ( [xpos,ypos,width,heigh], sharey=ax[0] ), haveon=('bottom','left') if yax else ('bottom',) ) ) else: ax.append ( graphics.prepare_axes ( fig.add_axes ( [xpos,ypos,width,heigh] ), haveon=('bottom','left') if yax else ('bottom',) ) ) return ax
def graphTopKSubredditWords(subreddit, K): if K < 1: raise ValueError("Invalid K value") #get the key words for the subreddit subredditWords = scrape_subreddit(subreddit, 15) #sum dictionary to get the total number of keywords Len = sum(subredditWords.values()) #get top k words from ordered dictionary #0 ... K-1 for K words wordsToPlot = [] for key in range(0, K): wordsToPlot.append(subredditWords.popitem(False)) #print(wordsToPlot) percentArray = [] #set for long the x axis will be xArray = range(1, K + 1) for item in wordsToPlot: #find the percentage the top words are of entire word back x = percentage(item[1], Len) percentArray.append(x) #cumlitative sum of top k words test = pl.cumsum(percentArray) pl.plot(xArray, test) #after running multiple times starting at 100 #10 was found as the best way to display the data, since they never #went above 10 percent pl.axis([0, K, 0, 10]) pl.xticks(xArray) pl.ylabel("Percentage of keywords for Subreddit") pl.xlabel("The top K frequent keywords") pl.title(subreddit) pl.show() return
def graphTopKSubredditWords(subreddit,K): if K < 1: raise ValueError("Invalid K value") #get the key words for the subreddit subredditWords = scrape_subreddit(subreddit,15) #sum dictionary to get the total number of keywords Len = sum(subredditWords.values()) #get top k words from ordered dictionary #0 ... K-1 for K words wordsToPlot = [] for key in range(0,K): wordsToPlot.append(subredditWords.popitem(False)) #print(wordsToPlot) percentArray = [] #set for long the x axis will be xArray = range(1,K+1) for item in wordsToPlot: #find the percentage the top words are of entire word back x = percentage(item[1],Len) percentArray.append(x) #cumlitative sum of top k words test = pl.cumsum(percentArray) pl.plot(xArray,test) #after running multiple times starting at 100 #10 was found as the best way to display the data, since they never #went above 10 percent pl.axis([0,K,0,10]) pl.xticks(xArray) pl.ylabel("Percentage of keywords for Subreddit") pl.xlabel("The top K frequent keywords") pl.title(subreddit) pl.show() return
def synthbeats(duration, meanhr=60, stdhr=1, samplingfreq=250, sinfreq=None): #Minimaly based on the parameters from: #http://physionet.cps.unizar.es/physiotools/ecgsyn/Matlab/ecgsyn.m #If frequ exist it will be used to generate a sin instead of using rand #Inputs: duration in seconds #Returns: signal, peaks t = np.arange(duration * samplingfreq) / float(samplingfreq) signal = np.zeros(len(t)) print(len(t)) print(len(signal)) if sinfreq == None: npeaks = 1.2 * (duration * meanhr / 60) # add 20% more beats for some cummulative error hr = pl.randn(npeaks) * stdhr + meanhr peaks = pl.cumsum(60. / hr) * samplingfreq peaks = peaks.astype('int') peaks = peaks[peaks < t[-1] * samplingfreq] else: hr = meanhr + sin(2 * pi * t * sinfreq) * float(stdhr) index = int(60. / hr[0] * samplingfreq) peaks = [] while index < len(t): peaks += [index] index += int(60. / hr[index] * samplingfreq) signal[peaks] = 1.0 return t, signal, peaks
def drawGroupAngleDistribution(filename,filenames, control=False): data = [] for fn in filenames: try: f = open(fn,'r') except IOError: return data.append(json.load(f)) data[-1]["filename"] = fn if control: cbins,cweights = angleDistributionControlData(180) data.append({"bin_boundaries":cbins,"bin_weights":cweights,"filename":"Control"}) default_bounds = data[0]["bin_boundaries"] for d in data: bounds = d["bin_boundaries"] if bounds != default_bounds: print "Warning: bin boundaries differ in group angle distribution",data[0]["filename"],d["filename"] d["bounds90"] = [b-90 for b in bounds] bin_widths = [y-x for x,y in zip(bounds[:-1],bounds[1:])] weights = d["bin_weights"] total = sum(weights) d["fractions"] = [weight/(width*total) for weight,width in zip(weights,bin_widths)] d["cumul_fractions"] = pl.cumsum(d["fractions"]) new_filename = fp.replace_extension(fp.down_folder(filename,"plots"),".png") new_filename = fp.down_folder(new_filename,"angles") drawGroupAngleDistributionSub(fp.append_to_filename(new_filename,"_f"), [d["bounds90"] for d in data], [d["fractions"] for d in data], "Angle distributions", "Angle to plane", "Probability density") drawGroupAngleDistributionSub(fp.append_to_filename(new_filename,"_cf"), [d["bounds90"] for d in data], [d["cumul_fractions"] for d in data], "Cumulative angle distributions", "Angle to plane", "Cumulative probability density")
def getEffortSpent(project, session): ''' Determine the effort spent on the project. returned is a tuple (times, spent). Both values are lists of equal length. times are in weeks since project start. ''' efforts = session.query(PlannedEffort).\ filter(PlannedEffort.Project==project.Id).\ order_by(PlannedEffort.Week).all() times = [] current_week = None sums = [] current_sum = 0.0 for e in efforts: if e.Week != None: if e.Week != current_week: times.append(e.Week) current_week = e.Week sums.append(current_sum) current_sum = e.Hours else: current_sum += e.Hours sums.append(current_sum) times.append(e.Week) # Convert the date ordinal number to a week number since project start. times = [(t.dt.toordinal()-project.FirstWeek.dt.toordinal())/7.0 for t in times] # Convert the sums (which are in hours) into mandays sums = [s/8.0 for s in sums] csum = pylab.cumsum(sums) return times, csum
def calc_cma(arr): cum_sum_arr = plb.cumsum(arr) cum_avg_list = [] for i, x in enumerate(cum_sum_arr, 1): if i > 1: cum_avg_list.append(x / i) cum_avg_arr = plb.asarray(cum_avg_list) print("The CMA of this SMA is: ", cum_avg_arr)
def calcL2Error(weight1,weight2,bins): weights = [weight1,weight2] total = [sum(w) for w in weights] densities = [[weight/width for weight,width in zip(w,bins)] for w in weights] fractions = [[density/t for density in d] for (t,d) in zip(total,densities)] cumul_fractions = [pl.cumsum(f) for f in fractions] cumul_error = [a-b for (a,b) in zip(*cumul_fractions)] cumul_squared_error = [a*a for a in cumul_error] cumul_squared_integral = [a*b for (a,b) in zip(cumul_squared_error,bins)] return pl.sqrt(sum(cumul_squared_integral))
def DFA(data, npoints=None, degree=1, use_median=False): """ computes the detrended fluctuation analysis returns the fluctuation F and the corresponding window length L :args: data (n-by-1 array): the data from which to compute the DFA npoints (int): the number of points to evaluate; if omitted the log(n) will be used degree (int): degree of the polynomial to use for detrending use_median (bool): use median instead of mean fluctuation :returns: F, L: the fluctuation F as function of the window length L """ # max window length: n/4 #0th: compute integral integral = cumsum(data - mean(data)) #1st: compute different window lengths n_samples = npoints if npoints is not None else int(log(len(data))) lengths = sort(array(list(set( logspace(2,log(len(data)/4.),n_samples,base=exp(1)).astype(int) )))) #print lengths all_flucs = [] used_lengths = [] for wlen in lengths: # compute the fluctuation of residuals from a linear fit # according to Kantz&Schreiber, ddof must be the degree of polynomial, # i.e. 1 (or 2, if mean also counts? -> see in book) curr_fluc = [] # rrt = 0 for startIdx in arange(0,len(integral),wlen): pt = integral[startIdx:startIdx+wlen] if len(pt) > 3*(degree+1): resids = pt - polyval(polyfit(arange(len(pt)),pt,degree), arange(len(pt))) # if abs(wlen - lengths[0]) < -1: # print resids[:20] # elif rrt == 0: # print "wlen", wlen, "l0", lengths[0] # rrt += 1 curr_fluc.append(std(resids, ddof=degree+1)) if len(curr_fluc) > 0: if use_median: all_flucs.append(median(curr_fluc)) else: all_flucs.append(mean(curr_fluc)) used_lengths.append(wlen) return array(all_flucs), array(used_lengths)
def find_factors(idat, odat, k = None): """ A routine to compute the main predictors (linear combinations of coordinates) in idat to predict odat. *Parameters* idat: d x n data matrix, with n measurements, each of dimension d odat: q x n data matrix with n measurements, each of dimension q *Returns* **Depending on whether or not** *k* **is provided, the returned value is different** * if k is given, compute the first k regressors and return an orthogonal matrix that contains the regressors in its colums, i.e. reg[0,:] is the first regressor * if k is not given or None, return a d-dimensional vector v(k) explaining which fraction of the total predictable variance can be explained using only k regressors. **NOTE** #. idat and odat must have zero mean #. To interpret the regressors, it is advisable to have the for columns of idat having the same variance """ # transform into z-scores u, s, v = svd(idat, full_matrices = False) su = dot(diag(1./s), u.T) z = dot(su,idat) # ! Note that the covariance of z is *NOT* 1, but 1/n; z*z.T = 1 ! # least-squares regression: A = dot(odat, pinv(z)) uA, sigma_A, vA = svd(A, full_matrices = False) if k is None: vk = cumsum(sigma_A**2) / sum(sigma_A**2) return vk else: # choose k predictors sigma_A1 = sigma_A.copy() sigma_A1[k:] = 0 A1 = reduce(dot, [uA, diag(sigma_A1), vA]) B = dot(A1, su) uB, sigma_B, vB = svd(B, full_matrices = False) regs = vB[:k,:].T return regs
def plot_upd_impz(self, fig, taps, a=1): if not signal: return self.plot_init_impz(fig) ax1, ax2 = fig.get_axes() l = len(taps) impulse = pylab.repeat(0.,l); impulse[0] =1. x = arange(0,l) response = signal.lfilter(taps,a,impulse) ax1.stem(x, response) step = pylab.cumsum(response) ax2.stem(x, step)
def unwrap(xs, min_value, max_value, in_place=False, jump_fraction=0.5): range_ = max_value - min_value jump_threshold = range_ * jump_fraction diffs = pl.diff(xs) octave_diffs = pl.zeros(len(xs) - 1, dtype=pl.int64) octave_diffs[diffs > jump_threshold] = -1 octave_diffs[diffs < -jump_threshold] = 1 octaves = pl.append(0, pl.cumsum(octave_diffs)) if in_place: xs += octaves * range_ else: return xs + octaves * range_
def generate_data(N, delta_true, pi_true, heterogeneity, bias, sigma_prior): a = pl.arange(0, 101, 1) pi_age_true = pi_true(a) model = data_simulation.simple_model(N) model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10) model.parameters['p']['smoothness'] = dict(amount='Moderately') model.parameters['p']['heterogeneity'] = heterogeneity age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int) age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int) age_weights = pl.ones_like(a) sum_pi_wt = pl.cumsum(pi_age_true*age_weights) sum_wt = pl.cumsum(age_weights) p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start]) # correct cases where age_start == age_end i = age_start == age_end if pl.any(i): p[i] = pi_age_true[age_start[i]] n = mc.runiform(10000, 100000, size=N) model.input_data['age_start'] = age_start model.input_data['age_end'] = age_end model.input_data['effective_sample_size'] = n model.input_data['true'] = p model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n * pl.exp(bias) emp_priors = {} emp_priors['p', 'mu'] = pi_age_true emp_priors['p', 'sigma'] = sigma_prior*pi_age_true model.emp_priors = emp_priors model.a = a model.pi_age_true = pi_age_true model.delta_true = delta_true return model
def plot_cascade(cascade, model): weights = [] thresholds = [] for i, stage in enumerate(cascade.stages): #print("stage:" , i) if stage.feature_type == stage.Level2DecisionTree: weights.append(stage.weight) thresholds.append(stage.cascade_threshold) elif stage.feature_type == stage.Stumps: weights.append(stage.weight) thresholds.append(stage.cascade_threshold) else: raise Exception("Received an unhandled stage.feature_type") # end of "for each stage" for i, stage in enumerate(cascade.stages): #print("stage %i cascade threshold:" % i , stage.cascade_threshold) #print("stage %i weight:" % i , weights[i]) pass if thresholds[0] < -1E5: print("The provided model seems not have a soft cascade, " \ "skipping plot_cascade") return # create new figure #fig = pylab.figure() pylab.clf() # clear the figure pylab.gcf().set_facecolor("w") # set white background pylab.grid(True) #pylab.spectral() # set the default color map # draw the figure max_scores = pylab.cumsum(pylab.absolute(weights)) pylab.plot(max_scores, label="maximum possible score") pylab.plot(thresholds, label="cascade threshold") pylab.legend(loc="upper left", fancybox=True) pylab.xlabel("Cascade stage") pylab.ylabel("Detection score") title = "Soft cascade" if model: title = "Soft cascade for model '%s' over '%s' dataset" \ % (model.detector_name, model.training_dataset_name) pylab.title(title) pylab.draw() return
def plot_cascade(cascade, model): weights = [] thresholds = [] for i, stage in enumerate(cascade.stages): #print("stage:" , i) if stage.feature_type == stage.Level2DecisionTree: weights.append(stage.weight) thresholds.append(stage.cascade_threshold) elif stage.feature_type == stage.Stumps: weights.append(stage.weight) thresholds.append(stage.cascade_threshold) else: raise Exception("Received an unhandled stage.feature_type") # end of "for each stage" for i, stage in enumerate(cascade.stages): #print("stage %i cascade threshold:" % i , stage.cascade_threshold) #print("stage %i weight:" % i , weights[i]) pass if thresholds[0] < -1E5: print("The provided model seems not have a soft cascade, " \ "skipping plot_cascade") return # create new figure #fig = pylab.figure() pylab.clf() # clear the figure pylab.gcf().set_facecolor("w") # set white background pylab.grid(True) #pylab.spectral() # set the default color map # draw the figure max_scores = pylab.cumsum(pylab.absolute(weights)) pylab.plot(max_scores, label="maximum possible score") pylab.plot(thresholds, label="cascade threshold") pylab.legend(loc ="upper left", fancybox=True) pylab.xlabel("Cascade stage") pylab.ylabel("Detection score") title = "Soft cascade" if model: title = "Soft cascade for model '%s' over '%s' dataset" \ % (model.detector_name, model.training_dataset_name) pylab.title(title) pylab.draw() return
def plot_upd_impz(self, fig, taps, a=1): if not signal: return self.plot_init_impz(fig) ax1, ax2 = fig.get_axes() l = len(taps) impulse = pylab.repeat(0., l) impulse[0] = 1. x = arange(0, l) response = signal.lfilter(taps, a, impulse) ax1.stem(x, response) step = pylab.cumsum(response) ax2.stem(x, step)
def s(x,n): num = pl.zeros(n+1) denom = pl.zeros(n+1) result = pl.zeros(n+1) for i in pl.arange(0,n+1,1): num[i] = x**i denom[i] = factorial(i) result[i] = num[i]/denom[i] partial = pl.cumsum(result) pl.plot(pl.arange(0,n+1,1), partial, 'bo') pl.xlabel('n') pl.ylabel('s(x,n)') pl.title('Partial Sums Graph of s(x,n,)') pl.show() return partial
def plot(self, lw=2, fontsize=16, marker='o', logy=False): """plot the number of downloads The data used is the data stored in the :attr:`df` attribute. :param int lw: width of the curves :param int fontsize: fontsize used in titles :param marker: :param bool logy: set y-axis to logarithmic scale #. first plot shows the cumulative downloads #. second plot shows the individual downloads .. plot:: from pypiview import PYPIView p = PYPIView("requests", verbose=False) p.plot() """ times = self.df.index fig, (ax1, ax2) = pylab.subplots(2,1, figsize=(12,8)) fig.autofmt_xdate() for this in self.df.columns: data = self.df[this].values ax1.plot(times, data, lw=lw, marker='o') ax2.plot(times, pylab.cumsum(data), lw=lw, marker='o') ax1.legend(list(self.df.columns), loc="upper left") ax1.set_title("Downloads of each release", fontsize=fontsize) ax1.grid(True) #ax1.xticks(rotation=45, fontsize=fontsize) ax2.legend(list(self.df.columns), loc="upper left") ax2.set_title("Cumulative downloads of each release", fontsize=fontsize) ax2.grid(True) #ax2.xticks(rotation=45, fontsize=fontsize) try: pylab.tight_layout() except: pass if logy: ax1.semilogy() ax2.semilogy()
def repartplotdata(data_in, _color='k', _xlabel = 'Rank', _ylabel = 'Cumulative Percentage of Data', _title = 'Repartition of values', _name = 'Data', _lw = 2, _fs = 'x-large', _fs_legend='medium', _ls = '-', _loc=0): "Plot the repartition of a data array" data = pylab.array(data_in, copy=True) data.sort() rank = pylab.arange(1, len(data) + 1) values = pylab.cumsum(data[::-1]) pylab.plot(rank, 100 * values / values[-1], _color, lw = _lw, drawstyle = 'steps', label = _name, ls = _ls) pylab.xlabel(_xlabel, size = _fs) pylab.ylabel(_ylabel, size = _fs) pylab.title(_title, size = _fs) font = FontProperties(size = _fs_legend) pylab.legend(loc = _loc, prop = font)
def _generate_basis(self): """ This method generates a basis of the linear manifold w.r.t the \ canonical basis. Each basis vector leaves in the ambiant space \ dimension and the number of vectors is equal to intrinsic dimension. """ data = pl.stack([pl.ones(self.input_dimension), self.slope], 1).flatten().astype('float32') indices = pl.stack([ pl.arange(self.input_dimension), pl.full(self.input_dimension, self.input_dimension) ], 1).flatten().astype('int32') indptr = pl.full(self.input_dimension + 1, 2, dtype=pl.float32) indptr[0] = 0 basis = sp.csc_matrix( (data, indices, pl.cumsum(indptr)), shape=(self.ambiant_dimension, self.input_dimension)) return basis
def age_standardize_approx(name, age_weights, mu_age, age_start, age_end, ages): """ Generate PyMC objects for approximating the integral of gamma from age_start[i] to age_end[i] Parameters ---------- name : str age_weights : array, len == len(ages) mu_age : pymc.Node with values of PCGP age_start, age_end : array Results ------- Returns dict of PyMC objects, including 'mu_interval' the approximate integral of gamma data predicted stochastic """ cum_sum_weights = pl.cumsum(age_weights) @mc.deterministic(name='cum_sum_mu_%s' % name) def weighted_sum_mu(mu_age=mu_age, age_weights=age_weights): return pl.cumsum(mu_age * age_weights) age_start = age_start.__array__().clip(ages[0], ages[-1]) - ages[ 0] # FIXME: Pandas bug, makes clip require __array__() age_end = age_end.__array__().clip(ages[0], ages[-1]) - ages[0] pl.seterr('ignore') @mc.deterministic(name='mu_interval_%s' % name) def mu_interval(weighted_sum_mu=weighted_sum_mu, cum_sum_weights=cum_sum_weights, mu_age=mu_age, age_start=pl.array(age_start, dtype=int), age_end=pl.array(age_end, dtype=int)): mu = (weighted_sum_mu[age_end] - weighted_sum_mu[age_start]) / ( cum_sum_weights[age_end] - cum_sum_weights[age_start]) # correct cases where age_start == age_end i = age_start == age_end if pl.any(i): mu[i] = mu_age[age_start[i]] return mu return dict(mu_interval=mu_interval)
def serialize(settings, out_fn = 'tmpdata'): scen, output = output_from_settings(settings) f_diagrams = fds(output.ml, scen) capacity, w, ff, j = zip(*f_diagrams) _counts, _dt, _flows = map(lambda fn: fn(settings), [counts, dt, flows]) m = len(_counts[0]) n = len(output.ml_lengths) Time = pylab.array([range(m)] * n) / 3600.0 * _dt Lengths = pylab.array([output.ml_lengths] * m).transpose() / 1000.0 XLengths = pylab.array([pylab.cumsum(output.ml_lengths)] * m).transpose() / 1000.0 Lanes = pylab.array([output.ml_lanes] * m).transpose() density = _counts[output.ml_idxs,:] / Lengths / Lanes flow = _flows[output.ml_idxs,:] / _dt * 3600.0 / Lanes velocity = pylab.nan_to_num(flow / density) queues = pylab.array([_counts[idx, :] if idx is not None else [0.0] * m for idx in output.or_idxs]) srcs = queues = pylab.array([_counts[idx, :] if idx is not None else [0.0] * m for idx in output.src_idxs]) queues += srcs ser_out = [ output.ml_idxs, output.ml_lengths, output.ml_lanes, output.or_idxs, capacity, w, ff, j, Time, Lengths, XLengths, Lanes, density, flow, velocity, queues, _dt, n, m ] import pickle json = pickle with open(out_fn, 'w') as fn: json.dump(ser_out, fn)
def _test_unwrap(): pl.seed(1) xs = pl.cumsum(scipy.stats.norm.rvs(scale=1000, size=10000)) axes = pl.subplot(411) pl.plot(xs) xs %= 2**16 pl.subplot(412, sharex=axes) pl.plot(xs) in_place = False if in_place: pl.subplot(413, sharex=axes) unwrap(xs, 0, 2**16, True) pl.plot(xs) pl.subplot(414, sharex=axes) pl.plot(xs) else: pl.subplot(413, sharex=axes) pl.plot(unwrap(xs, 0, 2**16)) pl.subplot(414, sharex=axes) pl.plot(xs) pl.show()
def guessParam(self): """looks for 2 positive peaks of around the same width. first one is simply searched for at the maximum of the curve""" self.fitterAnnex = FitterLorentz() self.fitterAnnex.Xdata = self.Xdata self.fitterAnnex.Ydata = self.Ydata self.fitterAnnex.fit( verboseLevel=0, param_fixed={__X0_HZ__: self.Xdata[self.Ydata.argmax()]}) self.residu = self.Ydata - self.fitterAnnex.Y self.residucum = pylab.cumsum(self.residu) # left = self.Xdata[self.residucum.argmax()] # right = self.Xdata[self.residucum.argmin()] ### find the largest increase of residucum on an interval of FWHM(first peak) x0 = self.Xdata[0] FWHM1 = self.fitterAnnex.param_fitted[__GAMMA_HZ__] for i, x in enumerate(self.Xdata): if abs(x - x0) > abs(FWHM1): discreteWidth = i break discreteWidth = discreteWidth / 2 #this might help when the guess is to wide if discreteWidth == 0: discreteWidth = 1 vals = self.residucum[discreteWidth:] - self.residucum[:-discreteWidth] x0_2 = self.Xdata[vals.argmax() + discreteWidth / 2] FWHM2 = FWHM1 # x0_2 = (left+right)/2 # FWHM2 = abs(right-left)/5 a2 = vals.max() * FWHM2 / 5 param_guess = self.fitterAnnex.param_guess param_guess[__X0_HZ__] = self.fitterAnnex.param_fixed[__X0_HZ__] param_guess[__OFFSET__] = 0 #no offset param_guess[__GAMMA_2_HZ__] = param_guess[__GAMMA_HZ__] #FWHM2 param_guess[__X0_2__] = x0_2 param_guess[__AREA_2__] = a2 return param_guess
def view_filter(h, fp=None, fs=None): '''view filter''' w, H = signal.freqz(h, 1) H_phase = pl.unwrap([pl.degrees(cmath.phase(H[i])) for i in range(len(H))], 180) H = 20 * pl.log10(abs(H[:])) x = range(0, len(h)) step = pl.cumsum(h) pl.figure(figsize=(16, 6.6), dpi=80) pl.subplot(221) pl.stem(x, h) pl.ylabel('Amplitude') pl.xlabel(r'n (samples)') pl.title(r'Impulse response') pl.text(0.2, 0.7, 'N_taps = {0}'.format(len(h))) pl.subplot(222) pl.stem(x, step) pl.ylabel('Amplitude') pl.xlabel(r'n (samples)') pl.title(r'Step response') pl.subplot(223) pl.plot(w / (2.0 * pl.pi), H) pl.ylabel('Magnitude (db)') pl.xlabel(r'Normalized Frequency (x$\pi$rad/sample)') pl.title(r'Frequency response') if fp != None: pl.axvline(fp, linewidth=1, color='k', ls='-') if fs != None: pl.axvline(fs, linewidth=1, color='k', ls='-') pl.subplot(224) pl.plot(w / (2.0 * pl.pi), H_phase) pl.ylabel('Phase (radians)') pl.xlabel(r'Normalized Frequency (Hz)') pl.title(r'Phase response')
def age_standardize_approx(name, age_weights, mu_age, age_start, age_end, ages): """ Generate PyMC objects for approximating the integral of gamma from age_start[i] to age_end[i] Parameters ---------- name : str age_weights : array, len == len(ages) mu_age : pymc.Node with values of PCGP age_start, age_end : array Results ------- Returns dict of PyMC objects, including 'mu_interval' the approximate integral of gamma data predicted stochastic """ cum_sum_weights = pl.cumsum(age_weights) @mc.deterministic(name='cum_sum_mu_%s'%name) def weighted_sum_mu(mu_age=mu_age, age_weights=age_weights): return pl.cumsum(mu_age*age_weights) age_start = age_start.__array__().clip(ages[0], ages[-1]) - ages[0] # FIXME: Pandas bug, makes clip require __array__() age_end = age_end.__array__().clip(ages[0], ages[-1]) - ages[0] pl.seterr('ignore') @mc.deterministic(name='mu_interval_%s'%name) def mu_interval(weighted_sum_mu=weighted_sum_mu, cum_sum_weights=cum_sum_weights, mu_age=mu_age, age_start=pl.array(age_start, dtype=int), age_end=pl.array(age_end, dtype=int)): mu = (weighted_sum_mu[age_end] - weighted_sum_mu[age_start]) / (cum_sum_weights[age_end] - cum_sum_weights[age_start]) # correct cases where age_start == age_end i = age_start == age_end if pl.any(i): mu[i] = mu_age[age_start[i]] return mu return dict(mu_interval=mu_interval)
def stoc_eqs(INP,ts): V = INP Rate=np.zeros((8)) Change=np.zeros((8,3)) N=pl.sum(V[range(3)]) Rate[0] = beta*V[0]*V[1]/N; Change[0,:]=([-1, +1, 0]); Rate[1] = gamma*V[1]; Change[1,:]=([0, -1, +1]); Rate[2] = mu*N; Change[2,:]=([+1, 0, 0]); Rate[3] = mu*V[0]; Change[3,:]=([-1, 0, 0]); Rate[4] = mu*V[1]; Change[4,:]=([0, -1, 0]); Rate[5] = mu*V[2]; Change[5,:]=([0, 0, -1]); Rate[6] = epsilon*V[0]; Change[6,:]=[-1, +1, 0]; Rate[7] = delta; Change[7,:]=[0, +1, 0]; R1=pl.rand(); R2=pl.rand(); ts = -np.log(R2)/(np.sum(Rate)); m=min(pl.find(pl.cumsum(Rate)>=R1*pl.sum(Rate))); V[range(3)]=V[range(3)]+Change[m,:] V[3]=0; V[4]=0.; if m == 6: V[3] = 1. if m == 7: V[4] = 1. return [V,ts]
def guessParam(self): """looks for 2 positive peaks of around the same width. first one is simply searched for at the maximum of the curve""" self.fitterAnnex = FitterLorentz() self.fitterAnnex.Xdata = self.Xdata self.fitterAnnex.Ydata = self.Ydata self.fitterAnnex.fit(verboseLevel = 0,param_fixed = {__X0_HZ__:self.Xdata[self.Ydata.argmax()]}) self.residu = self.Ydata-self.fitterAnnex.Y self.residucum = pylab.cumsum(self.residu) # left = self.Xdata[self.residucum.argmax()] # right = self.Xdata[self.residucum.argmin()] ### find the largest increase of residucum on an interval of FWHM(first peak) x0 = self.Xdata[0] FWHM1 = self.fitterAnnex.param_fitted[__GAMMA_HZ__] for i,x in enumerate(self.Xdata): if abs(x-x0)>abs(FWHM1): discreteWidth = i break discreteWidth = discreteWidth/2 #this might help when the guess is to wide if discreteWidth==0: discreteWidth = 1 vals = self.residucum[discreteWidth:]-self.residucum[:-discreteWidth] x0_2 = self.Xdata[vals.argmax()+discreteWidth/2] FWHM2 = FWHM1 # x0_2 = (left+right)/2 # FWHM2 = abs(right-left)/5 a2 = vals.max()*FWHM2/5 param_guess = self.fitterAnnex.param_guess param_guess[__X0_HZ__] = self.fitterAnnex.param_fixed[__X0_HZ__] param_guess[__OFFSET__] = 0 #no offset param_guess[__GAMMA_2_HZ__] = param_guess[__GAMMA_HZ__]#FWHM2 param_guess[__X0_2__] = x0_2 param_guess[__AREA_2__] = a2 return param_guess
def view_filter(h, fp=None, fs=None): '''view filter''' w, H = signal.freqz(h,1) H_phase = pl.unwrap([pl.degrees(cmath.phase(H[i])) for i in range(len(H))], 180) H = 20 * pl.log10 (abs(H[:])) x = range(0,len(h)) step = pl.cumsum(h) pl.figure(figsize=(16, 6.6), dpi=80) pl.subplot(221) pl.stem(x, h) pl.ylabel('Amplitude') pl.xlabel(r'n (samples)') pl.title(r'Impulse response') pl.text(0.2, 0.7, 'N_taps = {0}'.format(len(h))) pl.subplot(222) pl.stem(x, step) pl.ylabel('Amplitude') pl.xlabel(r'n (samples)') pl.title(r'Step response') pl.subplot(223) pl.plot(w/(2.0*pl.pi), H) pl.ylabel('Magnitude (db)') pl.xlabel(r'Normalized Frequency (x$\pi$rad/sample)') pl.title(r'Frequency response') if fp != None: pl.axvline(fp, linewidth=1, color='k', ls='-') if fs != None: pl.axvline(fs, linewidth=1, color='k', ls='-') pl.subplot(224) pl.plot(w/(2.0*pl.pi), H_phase) pl.ylabel('Phase (radians)') pl.xlabel(r'Normalized Frequency (Hz)') pl.title(r'Phase response')
def qqplot ( th, D, interval=(0,1) ): """Create a q-q plot of theta with respect to density D :Parameters: *th* a sample that might be from the density D *D* a density object """ th = th.copy() th.sort() nq = len(th) q = (pl.arange ( 0, nq, dtype='d' )+1)/nq x = pl.mgrid[interval[0]:interval[1]:1j*4*nq] f = D(x) qD = pl.cumsum(f)*pl.diff(x)[0]/pl.trapz(f,x) th_ = [] for q_ in q: i = pl.where ( qD<=q_ )[0] th_.append ( x[i[-1]] ) pl.plot ( th, th_, '.' ) pl.plot ( [th[0],th[-1]],[th[0],th[-1]], 'k:' )
pl.subplot(2,2,2) for sigma in [2, 4]: for mode in [1, 2]: dc = compute_decay(N, sigma, mode, gamma, False) pl.plot(dc,color[sigma]+marker[mode]+"-") l.append("sigma=%d, start=%d" % (sigma, mode)) pl.legend(l) N = 15 l = [] pl.subplot(2,2,3) for sigma in [2, 4]: for mode in [1, 2]: dc = compute_decay(N, sigma, mode, gamma, False) pl.plot(pl.cumsum(dc),color[sigma]+marker[mode]+"-") l.append("sigma=%d, start=%d" % (sigma, mode)) pl.legend(l, loc="lower right") N = 50 l = [] pl.subplot(2,2,4) for sigma in [2, 4]: for mode in [1, 2]: dc = compute_decay(N, sigma, mode, gamma, False) pl.plot(pl.cumsum(dc),color[sigma]+marker[mode]+"-") l.append("sigma=%d, start=%d" % (sigma, mode)) pl.legend(l, loc="lower right")
non_simple_words = [] #sorted(list(set(words) - simple_english)) number_of_simple_words = 0 #len(words) - len(non_simple_words) for w in words: if w in simple_english: number_of_simple_words += 1 else: non_simple_words.append(w) non_simple_words = Counter(non_simple_words) toc = time() #print "style analysis (or, writer invariant, or writeprint):" print "total number of words {:d}".format(total_length) print "{:3.2f}% simple english".format(number_of_simple_words * 100. / total_length) sentences_length = map(len, [i.split(' ') for i in sentences]) print "average sentence length {:.2f}".format(pylab.mean(sentences_length)) cusum = pylab.cumsum( pylab.array(sentences_length) - pylab.mean(sentences_length)) print "average word length {:.2f}".format(pylab.mean(map(len, tokens))) #print "noun usage frequency {:.2f}%".format(normalized_counts['NN'] + normalized_counts['NNP'] + normalized_counts['NNS']) #print "verb usage frequency {:.2f}%".format(normalized_counts['VBD'] + normalized_counts['VBP'] + normalized_counts['VBZ']) #lexical richness #according to the paper http://link.springer.com/article/10.1023%2FA%3A1001749303137 (1998, 15 years ago, 203 citations), Yule K function, Z, and D are more constant than other measures #currently I'm using the Yule K function #vocabulary_size = len(set(tokens)) #print "lexical richness; type token ratio {:.3f}".format(vocabulary_size / float(len(tokens))) # from http://nltk.org/book3/ch01.html, also known as type token ratio #lexical richness; hapax legomena #print "hapax legomena", nltk.probability.FreqDist(words).hapaxes() #lexical richness; dis legomena #print "lexical richness; dis legomena {:.3f}".format(VN(2)*1./vocabulary_size) #lexical richness; yule K function freqs = Counter(nltk.probability.FreqDist(words).values())
print(nerr) Voldmesh = Vmesh Vmesh = Vs.compute_vertex_values() elapsed_time = time.time() - start_time print(fx(0.66, 0.01)) print(fy(0.66, 0.01)) print(elapsed_time) #Simulations import pylab T = 100 nsteps = 100000 #Noise Generator rBM = lambda tv: pylab.cumsum( pylab.randn(tv.size) * pylab.sqrt(pylab.diff(pylab.append(0, tv)))) tv = pylab.linspace(0, T, nsteps + 1) dt = tv[2] - tv[1] Bx = rBM(tv) By = rBM(tv) #Define functions that are used in the population dynamics simulation C = lambda x, y: Cmax * beta * x * y / (beta * x + Cmax) fx1 = lambda x, y: r * x * (1 - x / K) - C(x, y) - fx(x, y) * x fy1 = lambda x, y: epsilon * C(x, y) - dr * y - fy(x, y) * y gx = lambda x: sigmax * x gy = lambda y: sigmay * y #profit functions for the prey and predator profitX = lambda x, y: PrHer * sqrt(fx(x, y) * x) profitY = lambda x, y: PrCod * sqrt(fy(x, y) * y)
def validate_age_integrating_model_sim(N=500, delta_true=.15, pi_true=quadratic): ## generate simulated data a = pl.arange(0, 101, 1) pi_age_true = pi_true(a) model = data_simulation.simple_model(N) #model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10) #model.parameters['p']['smoothness'] = dict(amount='Very') age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int) age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int) age_weights = pl.ones_like(a) sum_pi_wt = pl.cumsum(pi_age_true * age_weights) sum_wt = pl.cumsum(age_weights) p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start]) # correct cases where age_start == age_end i = age_start == age_end if pl.any(i): p[i] = pi_age_true[age_start[i]] n = mc.runiform(100, 10000, size=N) model.input_data['age_start'] = age_start model.input_data['age_end'] = age_end model.input_data['effective_sample_size'] = n model.input_data['true'] = p model.input_data['value'] = mc.rnegative_binomial(n * p, delta_true * n * p) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=25, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) graphics.plot_one_type(model, model.vars['p'], {}, 'p') pl.plot(a, pi_age_true, 'r:', label='Truth') pl.legend(fancybox=True, shadow=True, loc='upper left') pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats( )['standard deviation'] data_simulation.add_quality_metrics(model.input_data) model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() data_simulation.add_quality_metrics(model.delta) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % ( model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) model.mu = pandas.DataFrame( dict(true=pi_age_true, mu_pred=model.vars['p']['mu_age'].stats()['mean'], sigma_pred=model.vars['p']['mu_age'].stats() ['standard deviation'])) data_simulation.add_quality_metrics(model.mu) model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[]) data_simulation.add_to_results(model, 'delta') data_simulation.add_to_results(model, 'mu') data_simulation.add_to_results(model, 'input_data') model.results = pandas.DataFrame(model.results, columns='param bias mae mare pc'.split()) print model.results return model
def run(self, initialize=True, do_plot=False, verbose=None, **kwargs): ''' Run the simulation ''' T = sc.tic() # Reset settings and results if verbose is None: verbose = self['verbose'] if initialize: self.initialize() # Create people, results, etc. # Main simulation loop self.stopped = False # We've just been asked to run, so ensure we're unstopped for t in range(self.npts): # Check timing and stopping function elapsed = sc.toc(T, output=True) if elapsed > self['timelimit']: print( f"Time limit ({self['timelimit']} s) exceeded; stopping..." ) self.stopped = { 'why': 'timelimit', 'message': 'Time limit exceeded at step {t}', 't': t } if self['stop_func']: self.stopped = self['stop_func']( self, t) # Feed in the current simulation object and the time # If this gets set, stop running -- e.g. if the time limit is exceeded if self.stopped: break # Zero counts for this time step. n_susceptible = 0 n_exposed = 0 n_deaths = 0 n_recoveries = 0 n_infectious = 0 n_infections = 0 n_symptomatic = 0 n_recovered = 0 # Extract these for later use. The values do not change in the person loop and the dictionary lookup is expensive. rand_popdata = (self['usepopdata'] == 'random') beta = self['beta'] asym_factor = self['asym_factor'] diag_factor = self['diag_factor'] cont_factor = self['cont_factor'] beta_pop = self['beta_pop'] # Print progress if verbose >= 1: string = f' Running day {t:0.0f} of {self.pars["n_days"]} ({elapsed:0.2f} s elapsed)...' if verbose >= 2: sc.heading(string) else: print(string) # Update each person, skipping people who are susceptible not_susceptible = filter(lambda p: not p.susceptible, self.people.values()) n_susceptible = len(self.people) for person in not_susceptible: n_susceptible -= 1 # If exposed, check if the person becomes infectious or develops symptoms if person.exposed: n_exposed += 1 if not person.infectious and t >= person.date_infectious: # It's the day they become infectious person.infectious = True sc.printv( f' Person {person.uid} became infectious!', 2, verbose) if not person.symptomatic and person.date_symptomatic is not None and t >= person.date_symptomatic: # It's the day they develop symptoms person.symptomatic = True sc.printv( f' Person {person.uid} developed symptoms!', 2, verbose) # If infectious, check if anyone gets infected if person.infectious: # Check for death died = person.check_death(t) n_deaths += died # Check for recovery recovered = person.check_recovery(t) n_recoveries += recovered # If the person didn't die or recover, check for onward transmission if not died and not recovered: n_infectious += 1 # Count this person as infectious # Calculate transmission risk based on whether they're asymptomatic/diagnosed/have been isolated thisbeta = beta * \ (asym_factor if person.symptomatic else 1.) * \ (diag_factor if person.diagnosed else 1.) * \ (cont_factor if person.known_contact else 1.) # Determine who gets infected if rand_popdata: # Flat contacts transmission_inds = cvu.bf(thisbeta, person.contacts) else: # Dictionary of contacts -- extra loop over layers transmission_inds = [] for ckey in self.contact_keys: layer_beta = thisbeta * beta_pop[ckey] transmission_inds.extend( cvu.bf(layer_beta, person.contacts[ckey])) # Loop over people who do for contact_ind in transmission_inds: target_person = self.get_person( contact_ind) # Stored by integer # This person was diagnosed last time step: time to flag their contacts if person.date_diagnosed is not None and person.date_diagnosed == t - 1: target_person.known_contact = True # Skip people who are not susceptible if target_person.susceptible: n_infections += target_person.infect( t, person) # Actually infect them sc.printv( f' Person {person.uid} infected person {target_person.uid}!', 2, verbose) # Count people who developed symptoms if person.symptomatic: n_symptomatic += 1 # Count people who recovered if person.recovered: n_recovered += 1 # End of person loop; apply interventions for intervention in self['interventions']: intervention.apply(self, t) if self['interv_func'] is not None: # Apply custom intervention function self = self['interv_func'](self, t) # Update counts for this time step self.results['n_susceptible'][t] = n_susceptible self.results['n_exposed'][t] = n_exposed self.results['deaths'][t] = n_deaths self.results['recoveries'][t] = n_recoveries self.results['n_infectious'][t] = n_infectious self.results['infections'][t] = n_infections self.results['n_symptomatic'][t] = n_symptomatic self.results['n_recovered'][t] = n_recovered # End of time loop; compute cumulative results outside of the time loop self.results['cum_exposed'].values = pl.cumsum( self.results['infections'].values) + self[ 'n_infected'] # Include initially infected people self.results['cum_tested'].values = pl.cumsum( self.results['tests'].values) self.results['cum_diagnosed'].values = pl.cumsum( self.results['diagnoses'].values) self.results['cum_deaths'].values = pl.cumsum( self.results['deaths'].values) self.results['cum_recoveries'].values = pl.cumsum( self.results['recoveries'].values) # Add in the results from the interventions for intervention in self['interventions']: intervention.finalize(self) # Execute any post-processing # Scale the results for reskey in self.reskeys: if self.results[reskey].scale: self.results[reskey].values *= self['scale'] # Perform calculations on results self.compute_doubling() self.compute_r_eff() self.likelihood() # Tidy up self.results_ready = True sc.printv(f'\nRun finished after {elapsed:0.1f} s.\n', 1, verbose) self.results['summary'] = self.summary_stats() if do_plot: self.plot(**kwargs) # Convert to an odict to allow e.g. sim.people[25] later, and results to an objdict to allow e.g. sim.results.diagnoses self.people = sc.odict(self.people) self.results = sc.objdict(self.results) return self.results
def raster_tuning(ax): fullbehaviorDir = behaviorDir + subject + '/' behavName = subject + '_tuning_curve_' + tuningBehavior + '.h5' tuningBehavFileName = os.path.join(fullbehaviorDir, behavName) tuning_bdata = loadbehavior.BehaviorData(tuningBehavFileName, readmode='full') freqEachTrial = tuning_bdata['currentFreq'] possibleFreq = np.unique(freqEachTrial) numberOfTrials = len(freqEachTrial) # -- The old way of sorting (useful for plotting sorted raster) -- sortedTrials = [] numTrialsEachFreq = [ ] #Used to plot lines after each group of sorted trials for indf, oneFreq in enumerate( possibleFreq ): #indf is index of this freq and oneFreq is the frequency indsThisFreq = np.flatnonzero( freqEachTrial == oneFreq) #this gives indices of this frequency sortedTrials = np.concatenate( (sortedTrials, indsThisFreq)) #adds all indices to a list called sortedTrials numTrialsEachFreq.append( len(indsThisFreq)) #finds number of trials each frequency has sortingInds = argsort( sortedTrials) #gives array of indices that would sort the sortedTrials # -- Load event data and convert event timestamps to ms -- tuning_ephysDir = os.path.join(settings.EPHYS_PATH, subject, tuningEphys) tuning_eventFilename = os.path.join(tuning_ephysDir, 'all_channels.events') tuning_ev = loadopenephys.Events( tuning_eventFilename) #load ephys data (like bdata structure) tuning_eventTimes = np.array( tuning_ev.timestamps ) / SAMPLING_RATE #get array of timestamps for each event and convert to seconds by dividing by sampling rate (Hz). matches with eventID and tuning_evID = np.array( tuning_ev.eventID ) #loads the onset times of events (matches up with eventID to say if event 1 went on (1) or off (0) tuning_eventOnsetTimes = tuning_eventTimes[ tuning_evID == 1] #array that is a time stamp for when the chosen event happens. #ev.eventChannel woul load array of events like trial start and sound start and finish times (sound event is 0 and trial start is 1 for example). There is only one event though and its sound start while (numberOfTrials < len(tuning_eventOnsetTimes)): tuning_eventOnsetTimes = tuning_eventOnsetTimes[:-1] ####################################################################################################### ###################THIS IS SUCH A HACK TO GET SPKDATA FROM EPHYSCORE################################### ####################################################################################################### thisCell = celldatabase.CellInfo( animalName=subject, ############################################ ephysSession=tuningEphys, tuningSession='DO NOT NEED THIS', tetrode=tetrode, cluster=cluster, quality=1, depth=0, tuningBehavior='DO NOT NEED THIS', behavSession=tuningBehavior) tuning_spkData = ephyscore.CellData(thisCell) tuning_spkTimeStamps = tuning_spkData.spikes.timestamps (tuning_spikeTimesFromEventOnset, tuning_trialIndexForEachSpike, tuning_indexLimitsEachTrial) = spikesanalysis.eventlocked_spiketimes( tuning_spkTimeStamps, tuning_eventOnsetTimes, tuning_timeRange) #print 'numTrials ',max(tuning_trialIndexForEachSpike)##################################### ''' Create a vector with the spike timestamps w.r.t. events onset. (spikeTimesFromEventOnset,trialIndexForEachSpike,indexLimitsEachTrial) = eventlocked_spiketimes(timeStamps,eventOnsetTimes,timeRange) timeStamps: (np.array) the time of each spike. eventOnsetTimes: (np.array) the time of each instance of the event to lock to. timeRange: (list or np.array) two-element array specifying time-range to extract around event. spikeTimesFromEventOnset: 1D array with time of spikes locked to event. o trialIndexForEachSpike: 1D array with the trial corresponding to each spike. The first spike index is 0. indexLimitsEachTrial: [2,nTrials] range of spikes for each trial. Note that the range is from firstSpike to lastSpike+1 (like in python slices) spikeIndices ''' tuning_sortedIndexForEachSpike = sortingInds[ tuning_trialIndexForEachSpike] #Takes values of trialIndexForEachSpike and finds value of sortingInds at that index and makes array. This array gives an array with the sorted index of each trial for each spike # -- Calculate tuning -- #nSpikes = spikesanalysis.spiketimes_to_spikecounts(spikeTimesFromEventOnset,indexLimitsEachTrial,responseRange) #array of the number of spikes in range for each trial '''Count number of spikes on each trial in a given time range. spikeTimesFromEventOnset: vector of spikes timestamps with respect to the onset of the event. indexLimitsEachTrial: each column contains [firstInd,lastInd+1] of the spikes on a trial. timeRange: time range to evaluate. Spike times exactly at the limits are not counted. returns nSpikes ''' ''' meanSpikesEachFrequency = np.empty(len(possibleFreq)) #make empty array of same size as possibleFreq # -- This part will be replace by something like behavioranalysis.find_trials_each_type -- trialsEachFreq = [] for indf,oneFreq in enumerate(possibleFreq): trialsEachFreq.append(np.flatnonzero(freqEachTrial==oneFreq)) #finds indices of each frequency. Appends them to get an array of indices of trials sorted by freq # -- Calculate average firing for each freq -- for indf,oneFreq in enumerate(possibleFreq): meanSpikesEachFrequency[indf] = np.mean(nSpikes[trialsEachFreq[indf]]) ''' #clf() #if (len(tuning_spkTimeStamps)>0): #ax1 = plt.subplot2grid((4,4), (3, 0), colspan=1) #spikesorting.plot_isi_loghist(spkData.spikes.timestamps) #ax3 = plt.subplot2grid((4,4), (3, 3), colspan=1) #spikesorting.plot_events_in_time(tuning_spkTimeStamps) #samples = tuning_spkData.spikes.samples.astype(float)-2**15 #samples = (1000.0/tuning_spkData.spikes.gain[0,0]) *samples #ax2 = plt.subplot2grid((4,4), (3, 1), colspan=2) #spikesorting.plot_waveforms(samples) #ax4 = plt.subplot2grid((4,4), (0, 0), colspan=3,rowspan = 3) plot(tuning_spikeTimesFromEventOnset, tuning_sortedIndexForEachSpike, '.', ms=3) #axvline(x=0, ymin=0, ymax=1, color='r') #The cumulative sum of the list of specific frequency presentations, #used below for plotting the lines across the figure. numTrials = cumsum(numTrialsEachFreq) #Plot the lines across the figure in between each group of sorted trials for indf, num in enumerate(numTrials): ax.axhline(y=num, xmin=0, xmax=1, color='0.90', zorder=0) tickPositions = numTrials - mean(numTrialsEachFreq) / 2 tickLabels = [ "%0.2f" % (possibleFreq[indf] / 1000) for indf in range(len(possibleFreq)) ] ax.set_yticks(tickPositions) ax.set_yticklabels(tickLabels) ax.set_ylim([-1, numberOfTrials]) ylabel('Frequency Presented (kHz), {} total trials'.format(numTrials[-1])) #title(ephysSession+' T{}c{}'.format(tetrodeID,clusterID)) xlabel('Time (sec)') ''' ax5 = plt.subplot2grid((4,4), (0, 3), colspan=1,rowspan=3) ax5.set_xscale('log') plot(possibleFreq,meanSpikesEachFrequency,'o-') ylabel('Avg spikes in window {0}-{1} sec'.format(*responseRange)) xlabel('Frequency') ''' #show() '''
def validate_age_integrating_model_sim(N=500, delta_true=.15, pi_true=quadratic): ## generate simulated data a = pl.arange(0, 101, 1) pi_age_true = pi_true(a) model = data_simulation.simple_model(N) #model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10) #model.parameters['p']['smoothness'] = dict(amount='Very') age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int) age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int) age_weights = pl.ones_like(a) sum_pi_wt = pl.cumsum(pi_age_true*age_weights) sum_wt = pl.cumsum(age_weights) p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start]) # correct cases where age_start == age_end i = age_start == age_end if pl.any(i): p[i] = pi_age_true[age_start[i]] n = mc.runiform(100, 10000, size=N) model.input_data['age_start'] = age_start model.input_data['age_end'] = age_end model.input_data['effective_sample_size'] = n model.input_data['true'] = p model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=25, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) graphics.plot_one_type(model, model.vars['p'], {}, 'p') pl.plot(a, pi_age_true, 'r:', label='Truth') pl.legend(fancybox=True, shadow=True, loc='upper left') pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation'] data_simulation.add_quality_metrics(model.input_data) model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() data_simulation.add_quality_metrics(model.delta) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) model.mu = pandas.DataFrame(dict(true=pi_age_true, mu_pred=model.vars['p']['mu_age'].stats()['mean'], sigma_pred=model.vars['p']['mu_age'].stats()['standard deviation'])) data_simulation.add_quality_metrics(model.mu) model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[]) data_simulation.add_to_results(model, 'delta') data_simulation.add_to_results(model, 'mu') data_simulation.add_to_results(model, 'input_data') model.results = pandas.DataFrame(model.results, columns='param bias mae mare pc'.split()) print model.results return model
def plot(self, do_save=None, fig_args=None, plot_args=None, scatter_args=None, axis_args=None, as_days=True, font_size=18, use_grid=True, verbose=None): ''' Plot the results -- can supply arguments for both the figure and the plots. Parameters ---------- do_save : bool or str Whether or not to save the figure. If a string, save to that filename. fig_args : dict Dictionary of kwargs to be passed to pl.figure() plot_args : dict Dictionary of kwargs to be passed to pl.plot() as_days : bool Whether to plot the x-axis as days or time points Returns ------- Figure handle ''' if verbose is None: verbose = self['verbose'] if verbose: print('Plotting...') if fig_args is None: fig_args = {'figsize': (26, 16)} if plot_args is None: plot_args = {'lw': 3, 'alpha': 0.7} if scatter_args is None: scatter_args = {'s': 150, 'marker': 's'} if axis_args is None: axis_args = { 'left': 0.1, 'bottom': 0.05, 'right': 0.9, 'top': 0.97, 'wspace': 0.2, 'hspace': 0.25 } fig = pl.figure(**fig_args) pl.subplots_adjust(**axis_args) pl.rcParams['font.size'] = font_size res = self.results # Shorten since heavily used # Plot everything colors = sc.gridcolors(5) to_plot = sc.odict({ # TODO 'Total counts': sc.odict({'n_susceptible':'Number susceptible', 'n_exposed':'Number exposed', 'n_infectious':'Number infectious', 'cum_diagnosed':'Number diagnosed', }), 'Daily counts': sc.odict({'infections':'New infections', 'tests':'Number of tests', 'diagnoses':'New diagnoses', }), }) data_mapping = { 'cum_diagnosed': pl.cumsum(self.data['new_positives']), 'tests': self.data['new_tests'], 'diagnoses': self.data['new_positives'], } for p, title, keylabels in to_plot.enumitems(): pl.subplot(2, 1, p + 1) for i, key, label in keylabels.enumitems(): this_color = colors[i + p] y = res[key] pl.plot(res['t'], y, label=label, **plot_args, c=this_color) if key in data_mapping: pl.scatter(self.data['day'], data_mapping[key], c=[this_color], **scatter_args) pl.scatter(pl.nan, pl.nan, c=[(0, 0, 0)], label='Data', **scatter_args) pl.grid(use_grid) cv.fixaxis(self) pl.ylabel('Count') pl.xlabel('Days since index case') pl.title(title) # Ensure the figure actually renders or saves if do_save: if isinstance(do_save, str): filename = do_save # It's a string, assume it's a filename else: filename = 'covid_abm_results.png' # Just give it a default name pl.savefig(filename) pl.show() return fig
def run(self, seed_infections=1, verbose=None, calc_likelihood=False, do_plot=False, **kwargs): ''' Run the simulation ''' T = sc.tic() # Reset settings and results if verbose is None: verbose = self['verbose'] self.init_results() self.init_people( seed_infections=seed_infections) # Actually create the people daily_tests = self.data[ 'new_tests'] # Number of tests each day, from the data evacuated = self.data['evacuated'] # Number of people evacuated # Main simulation loop for t in range(self.npts): # Print progress if verbose >= 1: string = f' Running day {t:0.0f} of {self["n_days"]}...' if verbose >= 2: sc.heading(string) else: print(string) test_probs = { } # Store the probability of each person getting tested # Update each person for person in self.people.values(): # Count susceptibles if person.susceptible: self.results['n_susceptible'][t] += 1 continue # Don't bother with the rest of the loop # Handle testing probability if person.infectious: test_probs[person.uid] = self[ 'symptomatic'] # They're infectious: high probability of testing else: test_probs[person.uid] = 1.0 # If exposed, check if the person becomes infectious if person.exposed: self.results['n_exposed'][t] += 1 if not person.infectious and t >= person.date_infectious: # It's the day they become infectious person.infectious = True if verbose >= 2: print( f' Person {person.uid} became infectious!' ) # If infectious, check if anyone gets infected if person.infectious: # First, check for recovery if person.date_recovered and t >= person.date_recovered: # It's the day they become infectious person.exposed = False person.infectious = False person.recovered = True self.results['recoveries'][t] += 1 else: self.results['n_infectious'][ t] += 1 # Count this person as infectious n_contacts = cv.pt( person.contacts ) # Draw the number of Poisson contacts for this person contact_inds = cv.choose_people( max_ind=len(self.people), n=n_contacts) # Choose people at random for contact_ind in contact_inds: exposure = cv.bt(self['r_contact'] ) # Check for exposure per person if exposure: target_person = self.people[contact_ind] if target_person.susceptible: # Skip people who are not susceptible self.results['infections'][t] += 1 target_person.susceptible = False target_person.exposed = True target_person.date_exposed = t incub_pars = dict(dist='normal_int', par1=self['incub'], par2=self['incub_std']) dur_pars = dict(dist='normal_int', par1=self['dur'], par2=self['dur_std']) incub_dist = cv.sample(**incub_pars) dur_dist = cv.sample(**dur_pars) target_person.date_infectious = t + incub_dist target_person.date_recovered = target_person.date_infectious + dur_dist if verbose >= 2: print( f' Person {person.uid} infected person {target_person.uid}!' ) # Count people who recovered if person.recovered: self.results['n_recovered'][t] += 1 # Implement testing -- this is outside of the loop over people, but inside the loop over time if t < len( daily_tests ): # Don't know how long the data is, ensure we don't go past the end n_tests = daily_tests.iloc[t] # Number of tests for this day if n_tests and not pl.isnan( n_tests): # There are tests this day self.results['tests'][ t] = n_tests # Store the number of tests test_probs = pl.array(list(test_probs.values())) test_probs /= test_probs.sum() test_inds = cv.choose_people_weighted(probs=test_probs, n=n_tests) uids_to_pop = [] for test_ind in test_inds: tested_person = self.people[test_ind] if tested_person.infectious and cv.bt( self['sensitivity'] ): # Person was tested and is true-positive self.results['diagnoses'][t] += 1 tested_person.diagnosed = True if self['evac_positives']: uids_to_pop.append(tested_person.uid) if verbose >= 2: print( f' Person {person.uid} was diagnosed!' ) for uid in uids_to_pop: # Remove people from the ship once they're diagnosed self.off_ship[uid] = self.people.pop(uid) # Implement quarantine if t == self['quarantine']: if verbose >= 1: print(f'Implementing quarantine on day {t}...') for person in self.people.values(): if 'quarantine_eff' in self.pars.keys(): quarantine_eff = self['quarantine_eff'] # Both else: if person.crew: quarantine_eff = self['quarantine_eff_c'] # Crew else: quarantine_eff = self['quarantine_eff_g'] # Guests person.contacts *= quarantine_eff # Implement testing change if t == self['testing_change']: if verbose >= 1: print(f'Implementing testing change on day {t}...') self['symptomatic'] *= self[ 'testing_symptoms'] # Reduce the proportion of symptomatic testing # Implement evacuations if t < len(evacuated): n_evacuated = evacuated.iloc[ t] # Number of evacuees for this day if n_evacuated and not pl.isnan( n_evacuated ): # There are evacuees this day # TODO -- refactor with n_tests if verbose >= 1: print(f'Implementing evacuation on day {t}') evac_inds = cv.choose_people(max_ind=len(self.people), n=n_evacuated) uids_to_pop = [] for evac_ind in evac_inds: evac_person = self.people[evac_ind] if evac_person.infectious and cv.bt( self['sensitivity']): self.results['evac_diagnoses'][t] += 1 uids_to_pop.append(evac_person.uid) for uid in uids_to_pop: # Remove people from the ship once they're diagnosed self.off_ship[uid] = self.people.pop(uid) # Compute cumulative results self.results['cum_exposed'] = pl.cumsum(self.results['infections']) self.results['cum_tested'] = pl.cumsum(self.results['tests']) self.results['cum_diagnosed'] = pl.cumsum(self.results['diagnoses']) # Compute likelihood if calc_likelihood: self.likelihood() # Tidy up self.results['ready'] = True elapsed = sc.toc(T, output=True) if verbose >= 1: print(f'\nRun finished after {elapsed:0.1f} s.\n') summary = self.summary_stats() print(f"""Summary: {summary['n_susceptible']:5.0f} susceptible {summary['n_exposed']:5.0f} exposed {summary['n_infectious']:5.0f} infectious """) if do_plot: self.plot(**kwargs) return self.results
pylab.ylabel(r"\textit{Freeway offset} (%s)" % length_units) def congestionDecrease(ttt, ff=totalFreeFlow, c=totalCongestion): return 100 * (1 - (ttt - ff) / c) adjoint_ttt = data["experiment"]["onerun"]["adjoint"] alinea_ttt = data["experiment"]["onerun"]["alinea"] running_time = map(list, zip(*data["experiment"]["adjoint"]["runtime"])) newfigure() max_rt = 2 * pylab.median(pylab.diff([0] + running_time[0])) running_time[0] = pylab.cumsum([min(max_rt, rt) for rt in pylab.diff([0] + running_time[0])]) pylab.plot( [0] + [x / 1000.0 for x in running_time[0]], [0] + [congestionDecrease(a) for a in running_time[1]], LineCycler.next(), label="Adjoint", ) pylab.hold(True) pylab.plot( [0] + [x / 1000.0 for x in running_time[0]], [congestionDecrease(alinea_ttt)] * (1 + len(running_time[0])), LineCycler.next(), label="Alinea", ) pylab.xlabel(r"\textit{Running time} (seconds)")
def thermodynamic_pathway_analysis(S, rids, fluxes, cids, thermodynamics, html_writer): Nr, Nc = S.shape # adjust the directions of the reactions in S to fit the fluxes fluxes = map(abs, fluxes) kegg = Kegg.getInstance() #kegg.write_reactions_to_html(html_writer, S, rids, fluxes, cids, show_cids=False) dG0_f = thermodynamics.GetTransformedFormationEnergies(cids) bounds = [thermodynamics.bounds.get(cid, (None, None)) for cid in cids] res = {} try: c_mid = thermodynamics.c_mid c_range = thermodynamics.c_range res['pCr'] = find_pCr(S, dG0_f, c_mid=c_mid, ratio=3.0, bounds=bounds) #res['PCR2'] = find_unfeasible_concentrations(S, dG0_f, c_range, c_mid=c_mid, bounds=bounds) res['MTDF'] = find_mtdf(S, dG0_f, c_range=c_range, bounds=bounds) #path = pathway_modelling.Pathway(S, dG0_f) #res['pCr_regularized'] = path.FindPcr_OptimizeConcentrations( # c_mid=c_mid, ratio=3.0, bounds=bounds) #res['pCr_regularized (dGr < -2.7)'] = path.FindPcr_OptimizeConcentrations( # c_mid=c_mid, ratio=3.0, bounds=bounds, max_reaction_dg=-2.7) #res['MTDF_regularized'] = path.FindMTDF_OptimizeConcentrations( # c_range=c_range, bounds=bounds, c_mid=c_mid) #costs = [] #for max_dg in pylab.arange(0.0,-4.25,-0.25): # c = path.FindPcrEnzymeCost(c_mid=c_mid, # ratio=3.0, # bounds=bounds, # max_reaction_dg=max_dg, # fluxes=fluxes) # costs.append(str(c)) #print ', '.join(costs) except LinProgNoSolutionException: html_writer.write( '<b>No feasible solution found, cannot calculate the Margin</b>') # plot the profile graph pylab.rcParams['text.usetex'] = False pylab.rcParams['legend.fontsize'] = 10 pylab.rcParams['font.family'] = 'sans-serif' pylab.rcParams['font.size'] = 12 pylab.rcParams['lines.linewidth'] = 2 pylab.rcParams['lines.markersize'] = 5 pylab.rcParams['figure.figsize'] = [8.0, 6.0] pylab.rcParams['figure.dpi'] = 100 # plot the thermodynamic profile in standard conditions profile_fig = pylab.figure() profile_fig.hold(True) pylab.title('Thermodynamic profile', figure=profile_fig) pylab.ylabel('cumulative dG [kJ/mol]', figure=profile_fig) pylab.xlabel('Reaction KEGG ID', figure=profile_fig) pylab.xticks(pylab.arange(1, Nr + 1), ['R%05d' % rids[i] for i in xrange(Nr)], fontproperties=FontProperties(size=8), rotation=30) dG0_r = pylab.zeros((Nr, 1)) for r in range(Nr): reactants = pylab.find(S[r, :]) dG0_r[r, 0] = pylab.dot(S[r, reactants], dG0_f[reactants]) nan_indices = pylab.find(pylab.isnan(dG0_r)) finite_indices = pylab.find(pylab.isfinite(dG0_r)) if (len(nan_indices) > 0): dG0_r_finite = pylab.zeros((Nr, 1)) dG0_r_finite[finite_indices] = dG0_r[finite_indices] cum_dG0_r = pylab.cumsum( [0] + [dG0_r_finite[r, 0] * fluxes[r] for r in range(Nr)]) else: cum_dG0_r = pylab.cumsum([0] + [dG0_r[r, 0] * fluxes[r] for r in range(Nr)]) pylab.plot(pylab.arange(0.5, Nr + 1), cum_dG0_r, figure=profile_fig, label='Standard [1M]') # plot the thermodynamic profile for the different optimization schemes pylab.grid(True, figure=profile_fig) for optimization in res.keys(): dG_f, conc, score = res[optimization] if score is None: continue dG_r = pylab.dot(S, dG_f) cum_dG_r = pylab.cumsum([0] + [dG_r[i, 0] * fluxes[i] for i in range(Nr)]) pylab.plot(pylab.arange(0.5, Nr + 1), cum_dG_r, figure=profile_fig, label='%s = %.1f' % (optimization, score)) pylab.legend() html_writer.embed_matplotlib_figure(profile_fig, width=480, height=360) # plot the optimal metabolite concentrations for the different optimization schemes ind_nan = pylab.find(pylab.isnan(dG0_f)) for optimization in res.keys(): dG_f, conc, score = res[optimization] if score is None: continue dG_r = pylab.dot(S, dG_f) conc[ ind_nan] = thermodynamics.c_mid # give all compounds with unknown dG0_f the middle concentration value conc_fig = pylab.figure() conc_fig.suptitle('Concentrations (%s = %.1f)' % (optimization, score)) pylab.xscale('log', figure=conc_fig) pylab.ylabel('Compound KEGG ID', figure=conc_fig) pylab.xlabel('Concentration [M]', figure=conc_fig) pylab.yticks(range(Nc, 0, -1), ["C%05d" % cid for cid in cids], fontproperties=FontProperties(size=8)) pylab.plot(conc, range(Nc, 0, -1), '*b', figure=conc_fig) x_min = conc.min() / 10 x_max = conc.max() * 10 y_min = 0 y_max = Nc + 1 for c in range(Nc): pylab.text(conc[c, 0] * 1.1, Nc - c, kegg.cid2name(cids[c]), \ figure=conc_fig, fontsize=6, rotation=0) b_low, b_up = bounds[c] if b_low is None: b_low = x_min if b_up is None: b_up = x_max pylab.plot([b_low, b_up], [Nc - c, Nc - c], '-k', linewidth=0.4) if optimization.startswith('pCr'): c_range_opt = pC_to_range(score, c_mid=thermodynamics.c_mid, ratio=3.0) pylab.axvspan(c_range_opt[0], c_range_opt[1], facecolor='g', alpha=0.3, figure=conc_fig) else: pylab.axvspan(thermodynamics.c_range[0], thermodynamics.c_range[1], facecolor='r', alpha=0.3, figure=conc_fig) pylab.axis([x_min, x_max, y_min, y_max], figure=conc_fig) try: html_writer.embed_matplotlib_figure(conc_fig, width=420, height=360) except AttributeError: html_writer.write('<b>Failed to generate concentration figure</b>') # write all the results in tables as well for optimization in res.keys(): (dG_f, conc, score) = res[optimization] html_writer.write( '<p>Biochemical Compound Formation Energies (%s = %.1f)<br>\n' % (optimization, score)) html_writer.write('<table border="1">\n') html_writer.write(' ' + '<td>%s</td>' * 5 % ("KEGG CID", "Compound Name", "Concentration [M]", "dG'0_f [kJ/mol]", "dG'_f [kJ/mol]") + '\n') for c in range(Nc): cid = cids[c] name = kegg.cid2name(cid) if (pylab.isnan(dG0_f[c, 0])): html_writer.write('<tr><td><a href="%s">C%05d</a></td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>\n' % \ (kegg.cid2link(cid), cid, name, "N/A", "N/A", "N/A")) else: html_writer.write('<tr><td><a href="%s">C%05d</a></td><td>%s</td><td>%.2g</td><td>%.2f</td><td>%.2f</td></tr>\n' % \ (kegg.cid2link(cid), cid, name, conc[c, 0], dG0_f[c, 0], dG_f[c, 0])) html_writer.write('</table></p>\n') html_writer.write( '<p>Biochemical Reaction Energies (%s = %.1f)<br>\n' % (optimization, score)) html_writer.write('<table border="1">\n') html_writer.write(' ' + '<td>%s</td>' * 3 % ("KEGG RID", "dG'0_r [kJ/mol]", "dG'_r [kJ/mol]") + '\n') dG_r = pylab.dot(S, dG_f) for r in range(Nr): rid = rids[r] if (pylab.isnan(dG0_r[r, 0])): html_writer.write('<tr><td><a href="%s" title="%s">R%05d</a></td><td>%s</td><td>%.2f</td></tr>\n' % \ (kegg.rid2link(rid), kegg.rid2name(rid), rid, "N/A", dG_r[r, 0])) else: html_writer.write('<tr><td><a href="%s" title="%s">R%05d</a></td><td>%.2f</td><td>%.2f</td></tr>\n' % \ (kegg.rid2link(rid), kegg.rid2name(rid), rid, dG0_r[r, 0], dG_r[r, 0])) html_writer.write('</table></p>\n') return res
def makecumulativenumbers(): randomnumbers = pl.randn(50, 10) cumulativenumbers = pl.cumsum(abs(randomnumbers), axis=0) return cumulativenumbers
def plot(self, to_plot=None, do_save=None, fig_path=None, fig_args=None, plot_args=None, scatter_args=None, axis_args=None, as_dates=True, interval=None, dateformat=None, font_size=18, font_family=None, use_grid=True, use_commaticks=True, do_show=True, verbose=None): ''' Plot the results -- can supply arguments for both the figure and the plots. Args: to_plot (dict): Nested dict of results to plot; see default_sim_plots for structure do_save (bool or str): Whether or not to save the figure. If a string, save to that filename. fig_path (str): Path to save the figure fig_args (dict): Dictionary of kwargs to be passed to pl.figure() plot_args (dict): Dictionary of kwargs to be passed to pl.plot() scatter_args (dict): Dictionary of kwargs to be passed to pl.scatter() axis_args (dict): Dictionary of kwargs to be passed to pl.subplots_adjust() as_dates (bool): Whether to plot the x-axis as dates or time points interval (int): Interval between tick marks dateformat (str): Date string format, e.g. '%B %d' font_size (int): Size of the font font_family (str): Font face use_grid (bool): Whether or not to plot gridlines use_commaticks (bool): Plot y-axis with commas rather than scientific notation do_show (bool): Whether or not to show the figure verbose (bool): Display a bit of extra information Returns: fig: Figure handle ''' if verbose is None: verbose = self['verbose'] sc.printv('Plotting...', 1, verbose) if to_plot is None: to_plot = default_sim_plots to_plot = sc.odict(to_plot) # In case it's supplied as a dict # Handle input arguments -- merge user input with defaults fig_args = sc.mergedicts({'figsize': (16, 12)}, fig_args) plot_args = sc.mergedicts({'lw': 3, 'alpha': 0.7}, plot_args) scatter_args = sc.mergedicts({'s': 150, 'marker': 's'}, scatter_args) axis_args = sc.mergedicts( { 'left': 0.1, 'bottom': 0.05, 'right': 0.9, 'top': 0.97, 'wspace': 0.2, 'hspace': 0.25 }, axis_args) fig = pl.figure(**fig_args) pl.subplots_adjust(**axis_args) pl.rcParams['font.size'] = font_size if font_family: pl.rcParams['font.family'] = font_family res = self.results # Shorten since heavily used # Plot everything colors = sc.gridcolors(max([len(tp) for tp in to_plot.values()])) # Define the data mapping. Must be here since uses functions if self.data is not None and len(self.data): data_mapping = { 'cum_exposed': pl.cumsum(self.data['new_infections']), 'cum_diagnosed': pl.cumsum(self.data['new_positives']), 'cum_tested': pl.cumsum(self.data['new_tests']), 'infections': self.data['new_infections'], 'tests': self.data['new_tests'], 'diagnoses': self.data['new_positives'], } else: data_mapping = {} for p, title, keylabels in to_plot.enumitems(): ax = pl.subplot(2, 1, p + 1) for i, key, label in keylabels.enumitems(): this_color = colors[i] y = res[key].values pl.plot(res['t'], y, label=label, **plot_args, c=this_color) if key in data_mapping: pl.scatter(self.data['day'], data_mapping[key], c=[this_color], **scatter_args) if self.data is not None and len(self.data): pl.scatter(pl.nan, pl.nan, c=[(0, 0, 0)], label='Data', **scatter_args) pl.grid(use_grid) cvu.fixaxis(self) if use_commaticks: sc.commaticks() pl.title(title) # Optionally reset tick marks (useful for e.g. plotting weeks/months) if interval: xmin, xmax = ax.get_xlim() ax.set_xticks(pl.arange(xmin, xmax + 1, interval)) # Set xticks as dates if as_dates: xticks = ax.get_xticks() xticklabels = self.inds2dates(xticks, dateformat=dateformat) ax.set_xticklabels(xticklabels) # Plot interventions for intervention in self['interventions']: intervention.plot(self, ax) # Ensure the figure actually renders or saves if do_save: if fig_path is None: # No figpath provided - see whether do_save is a figpath if isinstance(do_save, str): fig_path = do_save # It's a string, assume it's a filename else: fig_path = 'covasim.png' # Just give it a default name fig_path = sc.makefilepath( fig_path) # Ensure it's valid, including creating the folder pl.savefig(fig_path) if do_show: pl.show() else: pl.close(fig) return fig
from pylab import loadmat, array, concatenate, cumsum A = loadmat('data/MOCAP') data = A['batchdata'] seqlengths = A['seqlengths'] seqstarts = concatenate(([0], cumsum(seqlengths))) seqprobs = seqlengths / float(seqlengths.sum()) from pylab import rand,ifloor def int_rand_range(a,b): return ifloor(rand()*(b-a)+a) from pylab import true_multinomial, newaxis def data_sample(batch_size): seq_id = int(true_multinomial(seqprobs[newaxis,:])) seq_len = seqlengths[seq_id] seq_absolute_A = seqstarts[seq_id] seq_absolute_B = seqstarts[seq_id+1] seq_pos_A = int_rand_range(seq_absolute_A, seq_absolute_B - batch_size) return data[seq_pos_A:seq_pos_A + batch_size]
def raster_tuning(ax): fullbehaviorDir = behaviorDir+subject+'/' behavName = subject+'_tuning_curve_'+tuningBehavior+'.h5' tuningBehavFileName=os.path.join(fullbehaviorDir, behavName) tuning_bdata = loadbehavior.BehaviorData(tuningBehavFileName,readmode='full') freqEachTrial = tuning_bdata['currentFreq'] possibleFreq = np.unique(freqEachTrial) numberOfTrials = len(freqEachTrial) # -- The old way of sorting (useful for plotting sorted raster) -- sortedTrials = [] numTrialsEachFreq = [] #Used to plot lines after each group of sorted trials for indf,oneFreq in enumerate(possibleFreq): #indf is index of this freq and oneFreq is the frequency indsThisFreq = np.flatnonzero(freqEachTrial==oneFreq) #this gives indices of this frequency sortedTrials = np.concatenate((sortedTrials,indsThisFreq)) #adds all indices to a list called sortedTrials numTrialsEachFreq.append(len(indsThisFreq)) #finds number of trials each frequency has sortingInds = argsort(sortedTrials) #gives array of indices that would sort the sortedTrials # -- Load event data and convert event timestamps to ms -- tuning_ephysDir = os.path.join(settings.EPHYS_PATH, subject,tuningEphys) tuning_eventFilename=os.path.join(tuning_ephysDir, 'all_channels.events') tuning_ev=loadopenephys.Events(tuning_eventFilename) #load ephys data (like bdata structure) tuning_eventTimes=np.array(tuning_ev.timestamps)/SAMPLING_RATE #get array of timestamps for each event and convert to seconds by dividing by sampling rate (Hz). matches with eventID and tuning_evID=np.array(tuning_ev.eventID) #loads the onset times of events (matches up with eventID to say if event 1 went on (1) or off (0) tuning_eventOnsetTimes=tuning_eventTimes[tuning_evID==1] #array that is a time stamp for when the chosen event happens. #ev.eventChannel woul load array of events like trial start and sound start and finish times (sound event is 0 and trial start is 1 for example). There is only one event though and its sound start while (numberOfTrials < len(tuning_eventOnsetTimes)): tuning_eventOnsetTimes = tuning_eventOnsetTimes[:-1] ####################################################################################################### ###################THIS IS SUCH A HACK TO GET SPKDATA FROM EPHYSCORE################################### ####################################################################################################### thisCell = celldatabase.CellInfo(animalName=subject,############################################ ephysSession = tuningEphys, tuningSession = 'DO NOT NEED THIS', tetrode = tetrode, cluster = cluster, quality = 1, depth = 0, tuningBehavior = 'DO NOT NEED THIS', behavSession = tuningBehavior) tuning_spkData = ephyscore.CellData(thisCell) tuning_spkTimeStamps = tuning_spkData.spikes.timestamps (tuning_spikeTimesFromEventOnset,tuning_trialIndexForEachSpike,tuning_indexLimitsEachTrial) = spikesanalysis.eventlocked_spiketimes(tuning_spkTimeStamps,tuning_eventOnsetTimes,tuning_timeRange) #print 'numTrials ',max(tuning_trialIndexForEachSpike)##################################### ''' Create a vector with the spike timestamps w.r.t. events onset. (spikeTimesFromEventOnset,trialIndexForEachSpike,indexLimitsEachTrial) = eventlocked_spiketimes(timeStamps,eventOnsetTimes,timeRange) timeStamps: (np.array) the time of each spike. eventOnsetTimes: (np.array) the time of each instance of the event to lock to. timeRange: (list or np.array) two-element array specifying time-range to extract around event. spikeTimesFromEventOnset: 1D array with time of spikes locked to event. o trialIndexForEachSpike: 1D array with the trial corresponding to each spike. The first spike index is 0. indexLimitsEachTrial: [2,nTrials] range of spikes for each trial. Note that the range is from firstSpike to lastSpike+1 (like in python slices) spikeIndices ''' tuning_sortedIndexForEachSpike = sortingInds[tuning_trialIndexForEachSpike] #Takes values of trialIndexForEachSpike and finds value of sortingInds at that index and makes array. This array gives an array with the sorted index of each trial for each spike # -- Calculate tuning -- #nSpikes = spikesanalysis.spiketimes_to_spikecounts(spikeTimesFromEventOnset,indexLimitsEachTrial,responseRange) #array of the number of spikes in range for each trial '''Count number of spikes on each trial in a given time range. spikeTimesFromEventOnset: vector of spikes timestamps with respect to the onset of the event. indexLimitsEachTrial: each column contains [firstInd,lastInd+1] of the spikes on a trial. timeRange: time range to evaluate. Spike times exactly at the limits are not counted. returns nSpikes ''' ''' meanSpikesEachFrequency = np.empty(len(possibleFreq)) #make empty array of same size as possibleFreq # -- This part will be replace by something like behavioranalysis.find_trials_each_type -- trialsEachFreq = [] for indf,oneFreq in enumerate(possibleFreq): trialsEachFreq.append(np.flatnonzero(freqEachTrial==oneFreq)) #finds indices of each frequency. Appends them to get an array of indices of trials sorted by freq # -- Calculate average firing for each freq -- for indf,oneFreq in enumerate(possibleFreq): meanSpikesEachFrequency[indf] = np.mean(nSpikes[trialsEachFreq[indf]]) ''' #clf() #if (len(tuning_spkTimeStamps)>0): #ax1 = plt.subplot2grid((4,4), (3, 0), colspan=1) #spikesorting.plot_isi_loghist(spkData.spikes.timestamps) #ax3 = plt.subplot2grid((4,4), (3, 3), colspan=1) #spikesorting.plot_events_in_time(tuning_spkTimeStamps) #samples = tuning_spkData.spikes.samples.astype(float)-2**15 #samples = (1000.0/tuning_spkData.spikes.gain[0,0]) *samples #ax2 = plt.subplot2grid((4,4), (3, 1), colspan=2) #spikesorting.plot_waveforms(samples) #ax4 = plt.subplot2grid((4,4), (0, 0), colspan=3,rowspan = 3) plot(tuning_spikeTimesFromEventOnset, tuning_sortedIndexForEachSpike, '.', ms=3) #axvline(x=0, ymin=0, ymax=1, color='r') #The cumulative sum of the list of specific frequency presentations, #used below for plotting the lines across the figure. numTrials = cumsum(numTrialsEachFreq) #Plot the lines across the figure in between each group of sorted trials for indf, num in enumerate(numTrials): ax.axhline(y = num, xmin = 0, xmax = 1, color = '0.90', zorder = 0) tickPositions = numTrials - mean(numTrialsEachFreq)/2 tickLabels = ["%0.2f" % (possibleFreq[indf]/1000) for indf in range(len(possibleFreq))] ax.set_yticks(tickPositions) ax.set_yticklabels(tickLabels) ax.set_ylim([-1,numberOfTrials]) ylabel('Frequency Presented (kHz), {} total trials'.format(numTrials[-1])) #title(ephysSession+' T{}c{}'.format(tetrodeID,clusterID)) xlabel('Time (sec)') ''' ax5 = plt.subplot2grid((4,4), (0, 3), colspan=1,rowspan=3) ax5.set_xscale('log') plot(possibleFreq,meanSpikesEachFrequency,'o-') ylabel('Avg spikes in window {0}-{1} sec'.format(*responseRange)) xlabel('Frequency') ''' #show() '''
def CompareMtdf(self, target_mtdf=None): n_pathways = len(self.pathways) for i, (name, pathway_data) in enumerate(self.pathways.iteritems()): logging.info('Analyzing pathway %s', name) self.html_writer.write('<div margin="20px"><div><b>%s</b></div>' % name) self.GetConditions(pathway_data) S, rids, fluxes, cids = self.GetReactions(name, pathway_data) self.WriteReactionsToHtml(S, rids, fluxes, cids, show_cids=False) # Bounds on concentrations. bounds = [ self.thermo.bounds.get(cid, (None, None)) for cid in cids ] # All fluxes are forwards fluxes = map(abs, fluxes) dG0_f = self.thermo.GetTransformedFormationEnergies(cids) c_mid = self.thermo.c_mid c_range = self.thermo.c_range path = pathway_modelling.Pathway(S, dG0_f) if target_mtdf is not None: _ln_conc, score = path.FindMtdf_Regularized( c_range, bounds, c_mid, min_mtdf=target_mtdf, max_mtdf=target_mtdf) else: _ln_conc, score = path.FindMTDF_OptimizeConcentrations( c_range, bounds, c_mid) if score is None: logging.error('No MTDF score for %s', name) continue Nr, Nc = S.shape profile_fig = pylab.figure() profile_fig.hold(True) pylab.title('Thermodynamic Profile', figure=profile_fig) pylab.ylabel('Cumulative dG [kJ/mol]', figure=profile_fig) pylab.xlabel('Reaction KEGG ID', figure=profile_fig) pylab.grid(True, figure=profile_fig) rids = ['%s' % rids[i] for i in xrange(Nr)] pylab.xticks(pylab.arange(1, Nr + 1), rids, fontproperties=FontProperties(size=8), rotation=30) dG0_r = pylab.zeros((Nr, 1)) for r in range(Nr): reactants = pylab.find(S[r, :]) dG0_r[r, 0] = pylab.dot(S[r, reactants], dG0_f[reactants]) nan_indices = pylab.find(pylab.isnan(dG0_r)) finite_indices = pylab.find(pylab.isfinite(dG0_r)) if (len(nan_indices) > 0): dG0_r_finite = pylab.zeros((Nr, 1)) dG0_r_finite[finite_indices] = dG0_r[finite_indices] cum_dG0_r = pylab.cumsum( [0] + [dG0_r_finite[r, 0] * fluxes[r] for r in range(Nr)]) else: cum_dG0_r = pylab.cumsum( [0] + [dG0_r[r, 0] * fluxes[r] for r in range(Nr)]) pylab.plot(pylab.arange(0.5, Nr + 1), cum_dG0_r, 'g--', label='Standard [1M]', figure=profile_fig) # plot the thermodynamic profile for the different optimization schemes dG_r = pylab.dot(S, dG_f) self.html_writer.write('<ol>') for i, dG in enumerate(dG_r): self.html_writer.write('<li>%s: %.2f' % (rids[i], dG)) self.html_writer.write('</ol>') cum_dG_r = pylab.cumsum( [0] + [dG_r[i, 0] * fluxes[i] for i in range(Nr)]) pylab.plot(pylab.arange(0.5, Nr + 1), cum_dG_r, figure=profile_fig, label='%s MTDF = %.1f' % (name, score)) pylab.legend(['Standard conditions', 'MTDF'], 'lower left') fname = '%s-profile-fig' % name html_writer.embed_matplotlib_figure(profile_fig, width=640, height=480, name=fname) # Give all compounds with unknown dG0_f the middle concentration value conc[nan_indices] = self.thermo.c_mid unconstrained_cs = [] unconstrained_cids = [] for i, bound in enumerate(bounds): b_low, b_up = bound if b_low is None and b_up is None: unconstrained_cs.append(conc[i, 0]) unconstrained_cids.append(cids[i]) n_constrained = len(unconstrained_cs) conc_fig = pylab.figure() conc_fig.suptitle('Concentrations %s (MTDF = %.1f)' % (name, score)) pylab.xscale('log', figure=conc_fig) pylab.ylabel('Compound KEGG ID', figure=conc_fig) pylab.xlabel('Concentration [M]', figure=conc_fig) cids_names = ["C%05d" % cid for cid in unconstrained_cids] pylab.yticks(range(n_constrained, 0, -1), cids_names, fontproperties=FontProperties(size=8)) pylab.plot(unconstrained_cs, range(n_constrained, 0, -1), '*b', figure=conc_fig) x_min = self.thermo.c_range[0] / 10 x_max = self.thermo.c_range[1] * 50 y_min = 0 y_max = n_constrained + 1 for i, concentration in enumerate(unconstrained_cs): pylab.text(concentration * 1.1, n_constrained - i, kegg.cid2name(unconstrained_cids[i]), figure=conc_fig, fontsize=6, rotation=0) y_val = n_constrained - i pylab.plot([x_min, x_max], [y_val, y_val], '-k', linewidth=0.4) pylab.axvspan(min(unconstrained_cs), max(unconstrained_cs), facecolor='g', alpha=0.3, figure=conc_fig) pylab.axis([x_min, x_max, y_min, y_max], figure=conc_fig) fname = '%s-mtdf-conc-fig' % name html_writer.embed_matplotlib_figure(conc_fig, width=640, height=480, name=fname) self.html_writer.write('</div>')
def signal_in_unit(self, _unit): """ method expresses signal in unit """ multiplier = self.signal_unit.expressInUnit(_unit) return pl.cumsum(self.signal) * multiplier
def CompareMtdf(self, target_mtdf=None): n_pathways = len(self.pathways) for i, (name, pathway_data) in enumerate(self.pathways.iteritems()): logging.info('Analyzing pathway %s', name) self.html_writer.write('<div margin="20px"><div><b>%s</b></div>' % name) self.GetConditions(pathway_data) S, rids, fluxes, cids = self.GetReactions(name, pathway_data) self.WriteReactionsToHtml(S, rids, fluxes, cids, show_cids=False) # Bounds on concentrations. bounds = [self.thermo.bounds.get(cid, (None, None)) for cid in cids] # All fluxes are forwards fluxes = map(abs, fluxes) dG0_f = self.thermo.GetTransformedFormationEnergies(cids) c_mid = self.thermo.c_mid c_range = self.thermo.c_range path = pathway_modelling.Pathway(S, dG0_f) if target_mtdf is not None: _ln_conc, score = path.FindMtdf_Regularized( c_range, bounds, c_mid, min_mtdf=target_mtdf, max_mtdf=target_mtdf) else: _ln_conc, score = path.FindMTDF_OptimizeConcentrations( c_range, bounds, c_mid) if score is None: logging.error('No MTDF score for %s', name) continue Nr, Nc = S.shape profile_fig = pylab.figure() profile_fig.hold(True) pylab.title('Thermodynamic Profile', figure=profile_fig) pylab.ylabel('Cumulative dG [kJ/mol]', figure=profile_fig) pylab.xlabel('Reaction KEGG ID', figure=profile_fig) pylab.grid(True, figure=profile_fig) rids = ['%s' % rids[i] for i in xrange(Nr)] pylab.xticks(pylab.arange(1, Nr + 1), rids, fontproperties=FontProperties(size=8), rotation=30) dG0_r = pylab.zeros((Nr, 1)) for r in range(Nr): reactants = pylab.find(S[r,:]) dG0_r[r, 0] = pylab.dot(S[r, reactants], dG0_f[reactants]) nan_indices = pylab.find(pylab.isnan(dG0_r)) finite_indices = pylab.find(pylab.isfinite(dG0_r)) if (len(nan_indices) > 0): dG0_r_finite = pylab.zeros((Nr, 1)) dG0_r_finite[finite_indices] = dG0_r[finite_indices] cum_dG0_r = pylab.cumsum([0] + [dG0_r_finite[r, 0] * fluxes[r] for r in range(Nr)]) else: cum_dG0_r = pylab.cumsum([0] + [dG0_r[r, 0] * fluxes[r] for r in range(Nr)]) pylab.plot(pylab.arange(0.5, Nr + 1), cum_dG0_r, 'g--', label='Standard [1M]', figure=profile_fig) # plot the thermodynamic profile for the different optimization schemes dG_r = pylab.dot(S, dG_f) self.html_writer.write('<ol>') for i, dG in enumerate(dG_r): self.html_writer.write('<li>%s: %.2f' % (rids[i], dG)) self.html_writer.write('</ol>') cum_dG_r = pylab.cumsum([0] + [dG_r[i, 0] * fluxes[i] for i in range(Nr)]) pylab.plot(pylab.arange(0.5, Nr + 1), cum_dG_r, figure=profile_fig, label='%s MTDF = %.1f' % (name, score)) pylab.legend(['Standard conditions', 'MTDF'], 'lower left') fname = '%s-profile-fig' % name html_writer.embed_matplotlib_figure(profile_fig, width=640, height=480, name=fname) # Give all compounds with unknown dG0_f the middle concentration value conc[nan_indices] = self.thermo.c_mid unconstrained_cs = [] unconstrained_cids = [] for i, bound in enumerate(bounds): b_low, b_up = bound if b_low is None and b_up is None: unconstrained_cs.append(conc[i, 0]) unconstrained_cids.append(cids[i]) n_constrained = len(unconstrained_cs) conc_fig = pylab.figure() conc_fig.suptitle('Concentrations %s (MTDF = %.1f)' % (name, score)) pylab.xscale('log', figure=conc_fig) pylab.ylabel('Compound KEGG ID', figure=conc_fig) pylab.xlabel('Concentration [M]', figure=conc_fig) cids_names = ["C%05d" % cid for cid in unconstrained_cids] pylab.yticks(range(n_constrained, 0, -1), cids_names, fontproperties=FontProperties(size=8)) pylab.plot(unconstrained_cs, range(n_constrained, 0, -1), '*b', figure=conc_fig) x_min = self.thermo.c_range[0] / 10 x_max = self.thermo.c_range[1] * 50 y_min = 0 y_max = n_constrained + 1 for i, concentration in enumerate(unconstrained_cs): pylab.text(concentration * 1.1, n_constrained - i, kegg.cid2name(unconstrained_cids[i]), figure=conc_fig, fontsize=6, rotation=0) y_val = n_constrained - i pylab.plot([x_min, x_max], [y_val, y_val], '-k', linewidth=0.4) pylab.axvspan(min(unconstrained_cs), max(unconstrained_cs), facecolor='g', alpha=0.3, figure=conc_fig) pylab.axis([x_min, x_max, y_min, y_max], figure=conc_fig) fname = '%s-mtdf-conc-fig' % name html_writer.embed_matplotlib_figure(conc_fig, width=640, height=480, name=fname) self.html_writer.write('</div>')
def validate_ai_re(N=500, delta_true=.15, sigma_true=[.1,.1,.1,.1,.1], pi_true=quadratic, smoothness='Moderately', heterogeneity='Slightly'): ## generate simulated data a = pl.arange(0, 101, 1) pi_age_true = pi_true(a) import dismod3 import simplejson as json model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json())) gbd_hierarchy = model.hierarchy model = data_simulation.simple_model(N) model.hierarchy = gbd_hierarchy model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10) model.parameters['p']['smoothness'] = dict(amount=smoothness) model.parameters['p']['heterogeneity'] = heterogeneity age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int) age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int) age_weights = pl.ones_like(a) sum_pi_wt = pl.cumsum(pi_age_true*age_weights) sum_wt = pl.cumsum(age_weights*1.) p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start]) # correct cases where age_start == age_end i = age_start == age_end if pl.any(i): p[i] = pi_age_true[age_start[i]] model.input_data['age_start'] = age_start model.input_data['age_end'] = age_end model.input_data['effective_sample_size'] = mc.runiform(100, 10000, size=N) from validate_covariates import alpha_true_sim area_list = pl.array(['all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT', 'IRN', 'IRQ', 'JOR', 'SYR']) alpha = alpha_true_sim(model, area_list, sigma_true) print alpha model.input_data['true'] = pl.nan model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)] for i, a in model.input_data['area'].iteritems(): model.input_data['true'][i] = p[i] * pl.exp(pl.sum([alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha])) p = model.input_data['true'] n = model.input_data['effective_sample_size'] model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'north_africa_middle_east', 'total', 'all', None, None, None) #model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=1005, burn=500, thin=5, tune_interval=100) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=25, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) graphics.plot_one_type(model, model.vars['p'], {}, 'p') pl.plot(range(101), pi_age_true, 'r:', label='Truth') pl.legend(fancybox=True, shadow=True, loc='upper left') pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation'] data_simulation.add_quality_metrics(model.input_data) model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() data_simulation.add_quality_metrics(model.delta) model.alpha = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)]) model.alpha['true'] = pandas.Series(dict(alpha)) model.alpha['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns) model.alpha['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns) model.alpha = model.alpha.dropna() data_simulation.add_quality_metrics(model.alpha) model.sigma = pandas.DataFrame(dict(true=sigma_true)) model.sigma['mu_pred'] = [n.stats()['mean'] for n in model.vars['p']['sigma_alpha']] model.sigma['sigma_pred']=[n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']] data_simulation.add_quality_metrics(model.sigma) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) model.mu = pandas.DataFrame(dict(true=pi_age_true, mu_pred=model.vars['p']['mu_age'].stats()['mean'], sigma_pred=model.vars['p']['mu_age'].stats()['standard deviation'])) data_simulation.add_quality_metrics(model.mu) data_simulation.initialize_results(model) data_simulation.add_to_results(model, 'delta') data_simulation.add_to_results(model, 'mu') data_simulation.add_to_results(model, 'input_data') data_simulation.add_to_results(model, 'alpha') data_simulation.add_to_results(model, 'sigma') data_simulation.finalize_results(model) print model.results return model
print "Proccessing logfile ",inputlogfile lines=open(inputlogfile,"r").readlines() metadata=[x for x in lines if x[0]=="#"] #select meta data fields=[x.split()[2:] for x in metadata if x.split()[1]=="Fields:"][0] print fields R_index=fields.index("R") Clock_index=fields.index("CLOCKTIME") q=[x for x in lines if x[0]!="#"] #remove meta data q=q[:-1] #last line might be malformed print "first line: ",q[0] R=P.array([float(x.split()[R_index]) for x in q]) Realtime=P.array([float(x.split()[Clock_index]) for x in q]) N=len(R) ns=range(1,N+1) avgR=P.cumsum(R)/ns n50=range(50,len(R)) l50=[P.mean(R[i-50:i]) for i in n50] P.plot(ns,avgR,"r") P.plot(n50,l50,"b") P.text(N/2, avgR[N/4]-1, "Cumulative Average reward",color="r") P.text(N/2, P.amax(l50) +1, "Average of last 50 rewards",color="b") ytime=P.amin(avgR[:20]) +1 for i in P.arange(60,P.amax(Realtime),60): ind=bisect.bisect(Realtime,i) P.text(ind,ytime,".\n%d minute"%(i/60)) P.xlabel("Timestep") P.savefig(inputlogfile+".pdf")