def cv_evaluation_range(model, n_fold, X_train, y_train, score_function): scores = [] cv = StratifiedKFold(y_train, n_folds=n_fold, random_state=1001) enumerate(cv) for i, (t, v) in enumerate(cv): # train then immediately predict the test set y_hat = model.fit(X_train.loc[t], y_train.loc[t]).predict(X_train.loc[v]) # stash the overall error on the test set for the fold too scores.append( score_function(y_train[v], y_hat) ) return [np.mean(scores) - 1.96*np.sd(scores), np.mean(scores) + 1.96*np.sd(scores)]
def normalize(self, data : np.array) -> np.array: """ 2D normalization: Data = (Data - Mean)/standard_deviation Args: data : 2d numpy array Return normalized 2d numpy array data """ mean = [] standard_deviation = [] sample_size, feature_size = data.shape for i in range(feature_size): each_mean = np.mean(data[:,i]) each_sd = np.sd(data[:,i]) # can also use : each_sd = np.max(data[:,i]) - np.min(data[:,i]) mean.append(each_mean) standard_deviation.append(each_sd) normalized_data = (data - mean) / standard_deviation return normalized_data
def Ztest(perm_dict, out_fp): "this calculate a zscore and zstat from perm_dict values" #Print out ztest results #Columns are 1) exp coreness 2) Permutation mean 3) Perm sd 4) clade zscore 5) pval out_file = out_fp + "_stats.txt" f1 = open(out_file, 'w+') for otu in perm_dict: exp = perm_dict[otu][0] # observed coreness m = mean(perm_dict[otu][1:]) # mean perms s = sd(perm_dict[otu][1:]) # standard deviation perms if s == 0: # might need to develop a different solution for this. z = 1 p = norm.sf(z) #upper tail of cumulative probability distribution print >> f1, "%s\t" % otu, print >> f1, "\t".join( "%.2E" % x for x in [exp, m, s, z, p]), #join stats and print print >> f1, "\n", else: z = (float(exp) - float(m)) / float(s) #zscore p = norm.sf(z) #upper tail of cumulative probability distribution print >> f1, "%s\t" % otu, #print >> f1, "\t".join("%4.3f" % x for x in [exp,m,s,z,p]), #join stats and print print >> f1, "\t".join( "%.2E" % x for x in [exp, m, s, z, p]), #join stats and print print >> f1, "\n", f1.close() return None
def metrics(wealth): n = len(wealth) times = range(n) plt.plot(times, wealth, c='blue') plt.title('Evolution of the wealth') plt.xlabel('Seconds') plt.ylabel('Dollars') plt.show() log_wealth = np.log(wealth) list_logreturns = np.diff(log_wealth) plt.plot(range(n - 1), list_logreturns, c='blue') plt.title('Evolution of the log-returns') plt.xlabel('Seconds') plt.show() plt.hist(list_logreturns, bins='auto') plt.title('Distribution of the log-returns') plt.show() #Maybe do montecarlo and compute VaR = np.percentile(montecarlo_logreturns,5) sharpe = np.mean(list_logreturns) / np.sd(list_logreturns) print('The Sharpe ratio is:', sharpe) cum_return = (wealth[n - 1] - wealth[0]) / wealth[0] print('The total cumulative return is:', cum_return) return
def roll_stats(): probabilities = {}; values = all_values() avg = np.mean(values) med = np.median(values) sd = np.sd(values) counts = Counter(values) for c in counts: probabilities[str (c)] = str (counts[c]) + '/' + str (len(values)) return {'avg': avg, 'median': med, 'probabilities': probabilities, 'counts': counts, 'sd': sd}
def negative_gradient(self, y, pred, **kargs): """Compute the residual (= negative gradient). """ self.ind1 = np.where(y==1)[0] self.n1 = len(self.ind1) self.ind0 = np.where(y==0)[0] self.n0 = len(self.ind0) # our predictions are between [0,1] but the algorithm expects [-1,1] pred = (pred - 0.5) / np.sd(pred) self.M0 = np.repeat(pred[self.ind1], self.n0) - pred[self.ind0] M1 = self.approx_grad(self.M0) ng = np.empty(self.n0 + self.n1) ng[self.ind1] = np.sum(M1, axis=1) ng[self.ind0] = np.sum(M1, axis=0) return ng
def dg_from_dist(n_samples=25, offset=-10.0): """ Generate some fake results for MMGBSA then return mean and standard error as an uncertainty estimate. Parameters ---------- n_samples: integer Number of samples to draw offset: float Offset from 0 - generally MMGBSA results are more negative than experimental ones """ target = -1 * halfnorm.rvs(size=1)[0] + offset sample = np.random.randn(n_samples) + target return np.mean(sample), np.sd(sample)/np.sqrt(n_samples)
def con_general(infile): with open(infile, 'r') as f: text = f.readlines() constraints = [ccondense(cstrip(line)) for line in text if ...] conlengths = [len(constraint) for constraint in constraints] return conlengths def cstrip(constraint): ... def ccondense(constraint): # or just len? ... import numpy conlengths = con_general('...') print numpy.mean(conlengths) print numpy.sd(conlengths) #print distribution or at least hist
def sd(lst): return np.sd(lst)
if sample2.endswith("S2"): area1.append(value2) if sample2.endswith("S3"): area1.append(value2) if sample2.endswith("S7"): area2.append(value2) if sample2.endswith("S8"): area2.append(value2) if sample2.endswith("S10"): area2.append(value2) if sample2.endswith("S9"): area3.append(value2) if sample2.endswith("S11"): area3.append(value2) if sample2.endswith("S12"): area3.append(value2) else: area4.append(value2) if sample in Stab_Dictionary.keys(): stabval = Stab_Dictionary[sample] pval = stabval[0] qval = stabval[1] sigma = stabval[2] else: pval = 'NaN' qval = 'NaN' sigma = 'NaN' outfile.write(sample + "\t" + str(np.mean(area1)) + "\t" + str(np.mean(area2)) + "\t" + str(np.mean(area3)) + "\t" + str(np.mean(area4)) + "\t" + str(np.sd(area1)) + "\t" + str(np.sd(area2)) + "\t" + str(np.sd(area3)) + "\t" + str(np.sd(area4)) + "\t" + pval + "\t" + qval + "\t" + sigma + "\n")
def standardized_returns(midprices): log_midprices = np.log(midprices) logreturns = np.diff(log_midprices) return (logreturns - np.mean(logreturns)) / np.sd(logreturns)
def song_calc(song_lst): if len(song_lst) > 10: return numpy.mean(song_lst),numpy.sd(song_lst) return "This playlist is not long enough to use these measurements"
area1.append(value2) if sample2.endswith("S7"): area2.append(value2) if sample2.endswith("S8"): area2.append(value2) if sample2.endswith("S10"): area2.append(value2) if sample2.endswith("S9"): area3.append(value2) if sample2.endswith("S11"): area3.append(value2) if sample2.endswith("S12"): area3.append(value2) else: area4.append(value2) if sample in Stab_Dictionary.keys(): stabval = Stab_Dictionary[sample] pval = stabval[0] qval = stabval[1] sigma = stabval[2] else: pval = "NaN" qval = "NaN" sigma = "NaN" outfile.write(sample + "\t" + np.mean(area1) + "\t" + np.mean(area2) + "\t" + np.mean(area3) + "\t" + np.mean(area4) + "\t" + np.sd(area1) + "\t" + np.sd(area2) + "\t" + np.sd(area3) + "\t" + np.sd(area4) + "\t" + pval + "\t" + qval + "\t" + sigma + "\n")
def modelRun(seedComplexity,seedSQ,seedSkillRange,seedDesignerN,seedManagerN,seedAppointeeN,seedIdeologyMean,seedIdeologySD): """ main control loop """ # convert complexity to a string length complexity = int(round(50 + (200 * seedComplexity),0)) ###### # check input parameters for errors ###### if seedComplexity < 0 or seedComplexity > 1: print "Please set seedComplexity between 0 and 1" return if seedSQ < 0.1 or seedSQ > 0.6: print "Please set seedSQ between 0.1 and 0.6" return if seedDesignerN % 1 == 0 or seedManagerN % 1 == 0 or seedAppointeeN % 1 == 0: pass else: print "Please set the number of agents to a whole number" return if seedDesignerN == 0 or seedManagerN == 0 or seedAppointeeN == 0: print "All agent types should have at least 1 representative" return else: pass if seedIdeologySD > 0.2: # getting up too high will just result in a highly bi-modal distribution of ideology, which # is fine, but should be noted in any case (because of the truncated distribution) print "Ideology deviations greater than 0.2 may yield unbalanced results" # storage for results from model run run_output = { "seedComplexity" : seedComplexity, "seedSQ" : seedSQ, "seedDesignerN" : seedDesignerN, "seedManagerN" : seedManagerN, "seedAppointeeN" : seedAppointeeN, "seedIdeologyMean" : seedIdeologyMean, "seedIdeologySD" : seedIdeologySD, "seedSkillLow" : seedSkillRange[0], "seedSkillHigh" : seedSkillRange[1], "seedSkillRange" : abs(seedSkillRange[1] - seedSkillRange[0]), "spanOfControl" : seedDesignerN / seedManagerN } print "gen problem part" # generate the problem to be solved problem = genProblem(seedComplexity,complexity,seedSQ) problem = ''.join([str(i) for i in problem]) run_output['problem'] = problem print "gen agents part" # generate a list of agents # and establish their basic characteristics agents = genAgents(seedSkillRange,seedDesignerN,seedManagerN,seedAppointeeN,seedIdeologyMean,seedIdeologySD) for a in agents: for d in a: print 'define skill' d.defineSkill(seedSkillRange) print 'define ideal' d.defineIdeal(complexity) print 'define tolerance' # NOTE: THIS FUNCTION HAS BEEN HANGING FOR SOME REASON #d.defineTolerance(seedSQ) # split into occupation class lists designers = agents[0] managers = agents[1] appointees = agents[2] # merge designers and managers for the design step # will only use the merged sometimes, but can be useful dlist = designers + managers # hill climbing deliberation structure print "starting deliberation" revised_proposal = designer_revisions_HC1(designers,managers,problem) print "finished deliberation" proposal = revised_proposal[0] run_output['proposal'] = revised_proposal[0] run_output['dissatisfaction'] = revised_proposal[1] run_output['iterations'] = revised_proposal[2] run_output['pctchange_hd'] = revised_proposal[3] run_output['rawchange_hd'] = revised_proposal[4] run_output['pctimprove'] = revised_proposal[5] run_output['pctchange_hw'] = revised_proposal[6] run_output['rawchange_hw'] = revised_proposal[7] # estimate proactivty prior to appointee hd_proactivity = hamming_distance(problem,proposal) / len(problem) sqWeight = seedSQ * len(problem) maxWeight = len(problem) minWeight = 0 if maxWeight - sqWeight >= sqWeight - minWeight: maxChange = maxWeight - sqWeight else: maxChange = sqWeight - minWeight hw_proactivity = abs(problem.count("1") - proposal.count("1")) / maxChange run_output['hw_proactivity'] = hd_proactivity run_output['hd_proactivity'] = hw_proactivity # appointee veto appointeeResults = [] for a in appointees: # check direction of change relative to appointee ideal p1weight = problem.count("1") p2weight = proposal.count("1") aweight = str(a.getIdeal()).count("1") appointeeOutcome = 'na' # proposal reduces agency involvement, appointee prefer decrease if p1weight > p2weight and aweight < p1weight: appointeeOutcome = "approve" # proposal increases agency involvement, appointee prefers increase if p1weight <= p2weight and aweight >= p1weight: appointeeOutcome = "approve" # proposal reduces agency involvement, appointee prefers increase if p1weight > p2weight and aweight >= p1weight: appointeeOutcome = "reject" # proposal increases agency involvement, appointee prefers decrease if p1weight < p2weight and aweight < p1weight: appointeeOutcome = "reject" # proposal recommends no cahnge in agency involvement, appointee prefers decrease if p1weight == p2weight and aweight < p2weight: appointeeOutcome = "approve" # proposal recommends no cahnge in agency involvement, appointee prefers increase # this case is covered by second conditional above # if p1weight == p2weight and aweight >= p2weight: appointeeResults.append(appointeeOutcome) appointeeOutcome = appointeeResults[0] run_output['appointeeOutcome'] = appointeeOutcome # add in the agent aggregate statistics for d in designers: dlist.append(d.getAgentChars()['ideology']) slist.append(d.getAgentChars()['skill']) designerIdeoMean = numpy.mean(dlist) designerIdeoSD = numpy.sd(dlist) designerSkillMean = numpy.mean(slist) designerSkillSD = numpy.sd(slist) for d in managers: dlist.append(d.getAgentChars()['ideology']) slist.append(d.getAgentChars()['skill']) managerIdeoMean = numpy.mean(dlist) managerIdeoSD = numpy.sd(dlist) managerSkillMean = numpy.mean(slist) managerSkillSD = numpy.sd(slist) for d in appointees: dlist.append(d.getAgentChars()['ideology']) slist.append(d.getAgentChars()['skill']) appointeeIdeoMean = numpy.mean(dlist) appointeeIdeoSD = numpy.sd(dlist) appointeeSkillMean = numpy.mean(slist) appointeeSkillSD = numpy.sd(slist) # add to output kys = ['designerIdeoMean', 'designerIdeoSD', 'designerSkillMean', 'designerSkillSD', 'managerIdeoMean', 'managerIdeoSD', 'managerSkillMean', 'managerSkillSD', 'appointeeIdeoMean', 'appointeeIdeoSD', 'appointeeSkillMean', 'appointeeSkillSD'] vals = [designerIdeoMean, designerIdeoSD, designerSkillMean, designerSkillSD, managerIdeoMean, managerIdeoSD, managerSkillMean, managerSkillSD, appointeeIdeoMean, appointeeIdeoSD, appointeeSkillMean, appointeeSkillSD] for i in kys, k in vals: run_output[i] = k # if it's acceptable, check political situtation # if it's politically acceptable, enact and decide for key,value in run_output.iteritems(): # if numeric, round off to 4 digits if type(value).__name__ == 'int' or type(value).__name__ == 'float': run_output[str(key)] = round(run_output[key],4) else: pass print key + ":", str(value) return run_output