def plotZeroCount(self,figurename,**kwargs): zeroCount = [] for i in range(len(self.matrix)): zeros = len(np.flatnonzero(self.matrix[i] == 0)) if zeros != len(self.matrix): zeroCount.append(zeros) #--endfor histogram(figurename, zeroCount, int(len(self.matrix)/100), xlab = '# of Zeros', ylab = 'Frequency', **kwargs)
def CalculateMeanCoverage(Contigs, Information, param): # tuples like (cont lenght, contig name) list_of_cont_tuples = [(Contigs[contig].length, contig) for contig in Contigs] # sorted as longest first list_of_cont_tuples = sorted(list_of_cont_tuples, key=lambda tuple: tuple[0], reverse=True) # coverages of longest contigs longest_contigs = list_of_cont_tuples[:1000] cov_of_longest_contigs = [Contigs[contig[1]].coverage for contig in longest_contigs] # Calculate mean coverage from the 1000 longest contigs n = float(len(cov_of_longest_contigs)) mean_cov = sum(cov_of_longest_contigs) / n # If there is only one contig above the size threshold, n can be 1 if n == 1: n += 1 std_dev = ( sum(list(map((lambda x: x ** 2 - 2 * x * mean_cov + mean_cov ** 2), cov_of_longest_contigs))) / (n - 1) ) ** 0.5 extreme_obs_occur = True print >> Information, "Mean coverage before filtering out extreme observations = ", mean_cov print >> Information, "Std dev of coverage before filtering out extreme observations= ", std_dev ## SMOOTH OUT THE MEAN HERE by removing extreme observations ## while extreme_obs_occur: extreme_obs_occur, filtered_list = RemoveOutliers(mean_cov, std_dev, cov_of_longest_contigs) n = float(len(filtered_list)) try: mean_cov = sum(filtered_list) / n except ZeroDivisionError: break std_dev = ( sum(list(map((lambda x: x ** 2 - 2 * x * mean_cov + mean_cov ** 2), filtered_list))) / (n - 1) ) ** 0.5 cov_of_longest_contigs = filtered_list print >> Information, "Mean coverage after filtering = ", mean_cov print >> Information, "Std coverage after filtering = ", std_dev print >> Information, "Length of longest contig in calc of coverage: ", longest_contigs[0][0] print >> Information, "Length of shortest contig in calc of coverage: ", longest_contigs[-1][0] if param.plots: plots.histogram( cov_of_longest_contigs, param, bins=50, x_label="coverage", y_label="frequency", title="BESST_cov_1000_longest_cont" + param.bamfile.split("/")[-1], ) return (mean_cov, std_dev)
def compareMatrix(m1,m2,figurename = 'comparison.png',**kwargs): """compare 2 matrixes, output correlation coefficient Parameters ---------- m1,m2 : contactmatrix instances must be the same dimensions figurename : str filename for columnwise pearson corr histogram, set None to escape this step """ if not (isinstance(m1,contactmatrix) and isinstance(m2,contactmatrix)): raise TypeError, "Invalid argument type, 2 contactmatrixes are required" if len(m1) != len(m2): raise TypeError, "Invalid argument, dimensions of matrixes must meet" from scipy.stats import spearmanr,pearsonr flat1 = m1.matrix.flatten() flat2 = m2.matrix.flatten() nonzeros = (flat1 > 0) * (flat2 > 0) flat1 = flat1[nonzeros] flat2 = flat2[nonzeros] print 'pearsonr:' print pearsonr(flat1,flat2) print 'spearmanr:' print spearmanr(flat1,flat2) del flat1 del flat2 if not (figurename is None): corr = [] for i in range(len(m1)): r = pearsonr(m1.matrix[i],m2.matrix[i]) #print r if not np.isnan(r[0]): corr.append(r[0]) histogram(figurename, corr, 100, xlab = 'Correlation Coefficient', ylab = 'Frequency', **kwargs)
def plotSum(self,figurename,outlier=False,line=None,**kwargs): """ Print the rowsum frequency histogram Parameters: ----------- figurename: string Name of the plot outlier: bool option to select plotting the outlier line, only functioning if 'line' parameter is set to None line: float/array/list draw vertical lines at a list of positions """ rowsum = self.rowsum() if line is None: if outlier: line = (np.percentile(rowsum,75) - np.percentile(rowsum,25))*1.5 + np.percentile(rowsum,75) histogram(figurename, rowsum[rowsum > 0], int(len(self.matrix)/100), xlab = 'Row sums', ylab = 'Frequency', line = line, **kwargs)
def GiveScoreOnEdges(G, Scaffolds, small_scaffolds, Contigs, param, Information, plot): span_score_obs = [] std_dev_score_obs = [] gap_obs = [] nr_link_obs = [] cnt_sign = 0 for edge in G.edges(): mean_ = 0 std_dev = 0 if G[edge[0]][edge[1]]['nr_links'] != None: n = G[edge[0]][edge[1]]['nr_links'] obs_squ = G[edge[0]][edge[1]]['obs_sq'] mean_ = G[edge[0]][edge[1]]['obs'] / float(n) data_observation = (n * param.mean_ins_size - G[edge[0]][edge[1]]['obs']) / float(n) try: len1 = Scaffolds[ edge[0][0] ].s_length except KeyError: len1 = small_scaffolds[ edge[0][0] ].s_length try: len2 = Scaffolds[ edge[1][0] ].s_length except KeyError: len2 = small_scaffolds[ edge[1][0] ].s_length if 2 * param.std_dev_ins_size < len1 and 2 * param.std_dev_ins_size < len2: gap = param_est.GapEstimator(param.mean_ins_size, param.std_dev_ins_size, param.read_len, mean_, len1, len2) else: gap = data_observation G[edge[0]][edge[1]]['gap'] = int(gap) if -gap > len1 or -gap > len2: G[edge[0]][edge[1]]['score'] = 0 continue #std_dev_d_eq_0 = param_est.tr_sk_std_dev(param.mean_ins_size, param.std_dev_ins_size, param.read_len, len1, len2, gap) if 2 * param.std_dev_ins_size < len1 and 2 * param.std_dev_ins_size < len2: std_dev_d_eq_0 = param_est.tr_sk_std_dev(param.mean_ins_size, param.std_dev_ins_size, param.read_len, len1, len2, gap) else: std_dev_d_eq_0 = 2 ** 32 try: std_dev = ((obs_squ - n * mean_ ** 2) / (n - 1)) ** 0.5 #chi_sq = (n - 1) * (std_dev ** 2 / std_dev_d_eq_0 ** 2) except ZeroDivisionError: std_dev = 2 ** 32 #chi_sq = 0 try: l1 = G[edge[0]][edge[1]][Scaffolds[edge[0][0]].name] except KeyError: l1 = G[edge[0]][edge[1]][small_scaffolds[edge[0][0]].name] try: l2 = G[edge[0]][edge[1]][Scaffolds[edge[1][0]].name] except KeyError: l2 = G[edge[0]][edge[1]][small_scaffolds[edge[1][0]].name] l1.sort() n_obs = len(l1) l1_mean = sum(l1) / float(n_obs) #l1_median = l1[len(l1) / 2] l1 = map(lambda x: x - l1_mean, l1) #l1 = map(lambda x: x - l1_median, l1) max_obs2 = max(l2) l2.sort(reverse=True) l2 = map(lambda x: abs(x - max_obs2), l2) l2_mean = sum(l2) / float(n_obs) #l2_median = l2[len(l2) / 2] l2 = map(lambda x: x - l2_mean, l2) #l2 = map(lambda x: x - l2_median, l2) KS_statistic, p_value = ks_2samp(l1, l2) #M_W_statistic, p_val = mannwhitneyu(l1, l2) #diff = map(lambda x: abs(abs(x[1]) - abs(x[0])), zip(l1, l2)) #sc = sum(diff) / len(diff) if len(l1) < 3: span_score = 0 else: span_score = 1 - KS_statistic try: std_dev_score = min(std_dev / std_dev_d_eq_0, std_dev_d_eq_0 / std_dev) #+ span_score #+ min(n/E_links, E_links/float(n)) except ZeroDivisionError: std_dev_score = 0 sys.stderr.write(str(std_dev) + ' ' + str(std_dev_d_eq_0) + ' ' + str(span_score) + '\n') G[edge[0]][edge[1]]['score'] = std_dev_score + span_score if std_dev_score > 0.5 and span_score > 0.5 else 0 if param.plots: span_score_obs.append(span_score) std_dev_score_obs.append(std_dev_score) gap_obs.append(gap) nr_link_obs.append(n_obs) if param.plots: plots.histogram(span_score_obs, param, bins=20, x_label='score', y_label='frequency', title='Dispersity_score_distribuion' + plot + '.' + param.bamfile.split('/')[-1]) plots.histogram(std_dev_score_obs, param, bins=20, x_label='score', y_label='frequency', title='Standard_deviation_score_distribuion' + plot + '.' + param.bamfile.split('/')[-1]) plots.dot_plot(std_dev_score_obs, span_score_obs, param, x_label='std_dev_score_obs', y_label='span_score_obs', title='Score_correlation' + plot + '.' + param.bamfile.split('/')[-1]) plots.dot_plot(std_dev_score_obs, gap_obs, param, x_label='std_dev_score_obs', y_label='estimated gap size', title='Gap_to_sigma' + plot + '.' + param.bamfile.split('/')[-1]) plots.dot_plot(span_score_obs, gap_obs, param, x_label='span_score_obs', y_label='estimated gap size', title='Gap_to_span' + plot + '.' + param.bamfile.split('/')[-1]) plots.dot_plot(span_score_obs, nr_link_obs, param, x_label='span_score_obs', y_label='Number links', title='Obs_to_span' + plot + '.' + param.bamfile.split('/')[-1]) for edge in G.edges(): if G[edge[0]][edge[1]]['nr_links'] != None: try: G[edge[0]][edge[1]]['score'] except KeyError: sys.stderr.write(str(G[edge[0]][edge[1]]) + ' ' + str(Scaffolds[edge[0][0]].s_length) + ' ' + str(Scaffolds[edge[1][0]].s_length)) print >> Information, 'Number of significantly spurious edges:', cnt_sign return()
def GiveScoreOnEdges(G, Scaffolds, small_scaffolds, Contigs, param, Information, plot): span_score_obs = [] std_dev_score_obs = [] gap_obs = [] nr_link_obs = [] cnt_sign = 0 for edge in G.edges(): mean_ = 0 std_dev = 0 if G[edge[0]][edge[1]]['nr_links'] != None: n = G[edge[0]][edge[1]]['nr_links'] obs_squ = G[edge[0]][edge[1]]['obs_sq'] mean_ = G[edge[0]][edge[1]]['obs'] / float(n) data_observation = (n * param.mean_ins_size - G[edge[0]][edge[1]]['obs']) / float(n) try: len1 = Scaffolds[ edge[0][0] ].s_length except KeyError: len1 = small_scaffolds[ edge[0][0] ].s_length try: len2 = Scaffolds[ edge[1][0] ].s_length except KeyError: len2 = small_scaffolds[ edge[1][0] ].s_length if 2 * param.std_dev_ins_size < len1 and 2 * param.std_dev_ins_size < len2: gap = param_est.GapEstimator(param.mean_ins_size, param.std_dev_ins_size, param.read_len, mean_, len1, len2) else: gap = data_observation G[edge[0]][edge[1]]['gap'] = int(gap) if -gap > len1 or -gap > len2: G[edge[0]][edge[1]]['score'] = 0 continue #std_dev_d_eq_0 = param_est.tr_sk_std_dev(param.mean_ins_size, param.std_dev_ins_size, param.read_len, len1, len2, gap) if 2 * param.std_dev_ins_size < len1 and 2 * param.std_dev_ins_size < len2: std_dev_d_eq_0 = param_est.tr_sk_std_dev(param.mean_ins_size, param.std_dev_ins_size, param.read_len, len1, len2, gap) else: std_dev_d_eq_0 = 2 ** 32 try: std_dev = ((obs_squ - n * mean_ ** 2) / (n - 1)) ** 0.5 #chi_sq = (n - 1) * (std_dev ** 2 / std_dev_d_eq_0 ** 2) except ZeroDivisionError: std_dev = 2 ** 32 #chi_sq = 0 try: l1 = G[edge[0]][edge[1]][Scaffolds[edge[0][0]].name] except KeyError: l1 = G[edge[0]][edge[1]][small_scaffolds[edge[0][0]].name] try: l2 = G[edge[0]][edge[1]][Scaffolds[edge[1][0]].name] except KeyError: l2 = G[edge[0]][edge[1]][small_scaffolds[edge[1][0]].name] #max_obs1 = max(l1) #min_obs1 = min(l1) l1.sort() n_obs = len(l1) l1_mean = sum(l1) / float(n_obs) l1 = map(lambda x: x - l1_mean, l1) max_obs2 = max(l2) #min_obs2 = min(l2) l2.sort(reverse=True) l2 = map(lambda x: abs(x - max_obs2), l2) l2_mean = sum(l2) / float(n_obs) l2 = map(lambda x: x - l2_mean, l2) KS_statistic, p_value = ks_2samp(l1, l2) #M_W_statistic, p_val = mannwhitneyu(l1, l2) #diff = map(lambda x: abs(abs(x[1]) - abs(x[0])), zip(l1, l2)) #sc = sum(diff) / len(diff) if len(l1) < 3: span_score = 0 else: span_score = 1 - KS_statistic # try: # span_score = 1 - sc / float(min((max_obs1 - min_obs1), (max_obs2 - min_obs2))) # except ZeroDivisionError: # span_score = 0 # if span_score < 0: # span_score = 0 # print 'ZEEERO', max_obs1 - min_obs1, max_obs2 - min_obs2, gap, sc, Scaffolds[edge[0][0]].contigs[0].name, Scaffolds[edge[1][0]].contigs[0].name # if len(l1) > 3: # print >> Information , 'avg_diff: ', sc, 'span1: ', (max_obs1 - min_obs1), 'span2: ', (max_obs2 - min_obs2), 'Span score: ', span_score, 'pval: ', p_value, 'Est gap: ', gap, 'Nr_links: ', len(l1) #, Scaffolds[edge[0][0]].contigs[0].name, Scaffolds[edge[1][0]].contigs[0].name, len(diff) #print >> Information , l1 #print >> Information , l2 #print >> Information , diff #print span_score # # k = normal.MaxObsDistr(n, 0.95) # if 2 * param.read_len < len1 and 2 * param.read_len < len2: # #span_max1 = min(param.mean_ins_size + k * param.std_dev_ins_size - 2 * param.read_len, len1 - param.read_len + max(0, gap)) # #span_max2 = min(param.mean_ins_size + k * param.std_dev_ins_size - 2 * param.read_len, len2 - param.read_len + max(0, gap)) # # span_max1 = min(param.mean_ins_size + k * param.std_dev_ins_size - param.read_len, len1 + max(0, gap)) # span_max2 = min(param.mean_ins_size + k * param.std_dev_ins_size - param.read_len, len2 + max(0, gap)) # # try: # span_obs1 = Scaffolds[ edge[0][0] ].upper_right_nbrs_obs[edge[1]] - Scaffolds[ edge[0][0] ].lower_right_nbrs_obs[edge[1]] if edge[0][1] == 'R' else Scaffolds[ edge[0][0] ].upper_left_nbrs_obs[edge[1]] - Scaffolds[ edge[0][0] ].lower_left_nbrs_obs[edge[1]] # except KeyError: # span_obs1 = small_scaffolds[ edge[0][0] ].upper_right_nbrs_obs[edge[1]] - small_scaffolds[ edge[0][0] ].lower_right_nbrs_obs[edge[1]] if edge[0][1] == 'R' else small_scaffolds[ edge[0][0] ].upper_left_nbrs_obs[edge[1]] - small_scaffolds[ edge[0][0] ].lower_left_nbrs_obs[edge[1]] # try: # span_obs2 = Scaffolds[ edge[1][0] ].upper_right_nbrs_obs[edge[0]] - Scaffolds[ edge[1][0] ].lower_right_nbrs_obs[edge[0]] if edge[1][1] == 'R' else Scaffolds[ edge[1][0] ].upper_left_nbrs_obs[edge[0]] - Scaffolds[ edge[1][0] ].lower_left_nbrs_obs[edge[0]] # except KeyError: # span_obs2 = small_scaffolds[ edge[1][0] ].upper_right_nbrs_obs[edge[0]] - small_scaffolds[ edge[1][0] ].lower_right_nbrs_obs[edge[0]] if edge[1][1] == 'R' else small_scaffolds[ edge[1][0] ].upper_left_nbrs_obs[edge[0]] - small_scaffolds[ edge[1][0] ].lower_left_nbrs_obs[edge[0]] # # # #span_score1 = min((max(0, gap) + 2 * param.read_len + span_obs1) / float(span_max1) , float(span_max1) / (max(0, gap) + 2 * param.read_len + span_obs1)) if span_obs1 > 0 else 0 # #span_score2 = min((max(0, gap) + 2 * param.read_len + span_obs2) / float(span_max2) , float(span_max2) / (max(0, gap) + 2 * param.read_len + span_obs2)) if span_obs2 > 0 else 0 # # span_score1 = min((max(0, gap) + param.read_len + span_obs1) / float(span_max1) , float(span_max1) / (max(0, gap) + param.read_len + span_obs1)) if span_obs1 > 0 else 0 # span_score2 = min((max(0, gap) + param.read_len + span_obs2) / float(span_max2) , float(span_max2) / (max(0, gap) + param.read_len + span_obs2)) if span_obs2 > 0 else 0 # # span_score = min(span_score1, span_score2) # # #span_score = (max(0, gap) + param.read_len + span_obs1) / float(span_max1) # else: # span_score = 0 try: std_dev_score = min(std_dev / std_dev_d_eq_0, std_dev_d_eq_0 / std_dev) #+ span_score #+ min(n/E_links, E_links/float(n)) except ZeroDivisionError: std_dev_score = 0 sys.stderr.write(str(std_dev) + ' ' + str(std_dev_d_eq_0) + ' ' + str(span_score) + '\n') G[edge[0]][edge[1]]['score'] = std_dev_score + span_score if std_dev_score > 0.5 and span_score > 0.5 else 0 if param.plots: span_score_obs.append(span_score) std_dev_score_obs.append(std_dev_score) gap_obs.append(gap) nr_link_obs.append(n_obs) if param.plots: plots.histogram(span_score_obs, param, bins=20, x_label='score', y_label='frequency', title='Dispersity_score_distribuion' + plot + '.' + param.bamfile.split('/')[-1]) plots.histogram(std_dev_score_obs, param, bins=20, x_label='score', y_label='frequency', title='Standard_deviation_score_distribuion' + plot + '.' + param.bamfile.split('/')[-1]) plots.dot_plot(std_dev_score_obs, span_score_obs, param, x_label='std_dev_score_obs', y_label='span_score_obs', title='Score_correlation' + plot + '.' + param.bamfile.split('/')[-1]) plots.dot_plot(std_dev_score_obs, gap_obs, param, x_label='std_dev_score_obs', y_label='estimated gap size', title='Gap_to_sigma' + plot + '.' + param.bamfile.split('/')[-1]) plots.dot_plot(span_score_obs, gap_obs, param, x_label='span_score_obs', y_label='estimated gap size', title='Gap_to_span' + plot + '.' + param.bamfile.split('/')[-1]) plots.dot_plot(span_score_obs, nr_link_obs, param, x_label='span_score_obs', y_label='Number links', title='Obs_to_span' + plot + '.' + param.bamfile.split('/')[-1]) for edge in G.edges(): if G[edge[0]][edge[1]]['nr_links'] != None: try: G[edge[0]][edge[1]]['score'] except KeyError: sys.stderr.write(str(G[edge[0]][edge[1]]) + ' ' + str(Scaffolds[edge[0][0]].s_length) + ' ' + str(Scaffolds[edge[1][0]].s_length)) print >> Information, 'Number of significantly spurious edges:', cnt_sign return()
print 'start hyperparameter selection', time.time() best_regressor = regressors[hyperparameter_selection( regressors, train_x, train_y, 5)] print best_regressor.C, best_regressor.epsilon, best_regressor.kernel, best_regressor.degree, best_regressor.gamma else: best_regressor = SVR(C=0.25, epsilon=0.25, kernel='linear') best_regressor.fit(train_x, train_y) prediction = best_regressor.predict(test_x) np.save('prediction.npy', prediction) print 'RMSE, MAE, MRE (all):', mean_squared_error(test_y, prediction)**0.5, \ mean_absolute_error(test_y, prediction), \ mean_relative_error(test_y, prediction) # determine error if only best 24 players are selected indices = [] for index in range(len(test_x)): if test[index, 0] in test_players.keys(): indices.append(index) print 'RMSE, MAE, MRE (24 best):', mean_squared_error(test_y[indices], prediction[indices])**0.5, \ mean_absolute_error(test_y[indices], prediction[indices]), \ mean_relative_error(test_y[indices], prediction[indices]) print zip(test_y[indices], prediction[indices]) if HISTOGRAM: histogram(test_y, prediction)
def levels(E, ket, epsilon=1e-8, colors=''): """Return the degenerate subspace index and optionally the colormap""" # irreducible representations # 0 - unidimensional symmetric representation (reuns) # 1 - unidimensional anti-symmetric representation (reuna) # 2 - bidimensional representation (rebde) ir_reps = np.zeros([E.size], dtype=np.uint8) return_colors = len(colors) if return_colors: colormap = [''] * E.size # colors used # Group energy levels such that a level contains all the eigenvalues with # the same value delta = np.diff(E) avgSpacing = (E[-1] - E[0]) / E.size relsp = delta / avgSpacing print('levels epsilon:', epsilon) print('avgSpacing:', avgSpacing) levels = np.split(E, np.where(relsp > epsilon)[0] + 1) states = np.split(ket, np.where(relsp > epsilon)[0] + 1) # Energy difference (between two consecutive levels) histogram histogram(delta, xlabel=r'$\Delta E$', xscale='log', bins=np.pad(np.logspace(-15, 1, 17), (1, 0), mode='constant'), ylabel='No. of levels', fname='hist_delta.pdf', figsize=(5.8, 3)) # Relative spacing histogram histogram(relsp, xscale='log', ylabel='No. of levels', bins=np.pad(np.logspace(-13, 1, 15), (1, 0), mode='constant'), fname='hist_relsp.pdf', xlabel='$s$', figsize=(2.8, 3)) # Energy difference bar plot bar_plot(delta, figsize=(5.8, 3), ylabel=r'$\Delta E$', yscale='log', xlabel='index', fname='bar_delta.pdf', dpi=720) # Relative spacing bar plot bar_plot(relsp, figsize=(5.8, 3), yscale='log', fname='relsp.pdf', dpi=720, label=r'$\varepsilon=' + latex_float(epsilon) + '$', axhline_y=epsilon, ylabel='$s$', xlabel='index') # Check for bidimensional representation selection problems levels_cp = list(levels) states_cp = list(states) log = open('log.txt', 'a') log.write('\n\nlevels epsilon: ' + str(epsilon)) for i in range(len(levels_cp)): if levels_cp[i].size > 2: local_relsp = np.diff(levels_cp[i]) / avgSpacing log.write('\nInfo: Found ' + str(levels_cp[i].size) + ' levels ' + 'in the bidimensional representation with: \nenergy: ' + str(levels_cp[i]) + '\ndelta: ' + str(np.diff(levels_cp[i])) + '\nrelsp: ' + str(local_relsp)) # Try to fix the problem if levels_cp[i].size > 3: log.write('\nError: Cannot choose where to split!') raise RuntimeError('Cannot choose where to split!') elif local_relsp[0] == local_relsp[1]: log.write('\nWarning: 3 consecutive levels with identical ' + 'relative spacings') # log.write('\nket: ' + str(states_cp[i])) n2 = np.array([states_cp[i][j][1] for j in range(3)]) log.write('\nn2: ' + str(n2)) # Find the dominant parity unique, counts = np.unique(n2 % 2, return_counts=True) log.write('\nDominant parity: ' + ('odd' if unique[np.argmax(counts)] else 'even')) # Find the current position j = [np.array_equal(levels_cp[i], k) for k in levels].index(True) # Select the levels with different parity for the bidimensional # representation dominant = n2 % 2 == unique[np.argmax(counts)] different = n2 % 2 != unique[np.argmax(counts)] # Bidimensional representation levels bd_l = [levels_cp[i][dominant][0]] # Bidimensional representation states bd_st = [states_cp[i][dominant][0]] if counts[0] < 3 and counts[1] < 3: bd_l.append(levels_cp[i][different][0]) bd_st.append(states_cp[i][different][0]) else: logging.warning('3 consecutive quantum numbers with ' + 'the same parity!') bd_l.append(levels_cp[i][dominant][2]) bd_st.append(states_cp[i][dominant][2]) # Unidimensional representation levels u_l = [levels_cp[i][dominant][1]] # Unidimensional representation states u_st = [states_cp[i][dominant][1]] levels[j:j] = [np.array(bd_l), np.array(u_l)] states[j:j] = [np.array(bd_st), np.array(u_st)] del levels[j + 2] del states[j + 2] log.write('\nresult: ' + str(levels[j]) + str(levels[j + 1]) + '\nwith: ' + str(states[j]) + str(states[j + 1])) else: # Find the current position j = [np.array_equal(levels_cp[i], k) for k in levels].index(True) # Split at the maximum relative spacing levels[j:j] = np.split( levels_cp[i], np.where(local_relsp == local_relsp.max())[0] + 1) states[j:j] = np.split( states_cp[i], np.where(local_relsp == local_relsp.max())[0] + 1) del levels[j + 2] del states[j + 2] log.write('\nresult: ' + str(levels[j]) + str(levels[j + 1])) k = 0 for i in range(len(levels)): for j in range(levels[i].size): if return_colors: colormap[i + j + k] = colors[i % len(colors)] if levels[i].size > 1: # degenerate subspace -> rebde ir_reps[i + j + k] = 2 else: if states[i][0][1] % 2: # n2 odd -> reuna ir_reps[i + j + k] = 1 else: # n2 even -> reuns ir_reps[i + j + k] = 0 k += levels[i].size - 1 log.close() if return_colors: return ir_reps, colormap return ir_reps
numBins = 10 # number of bins in histograms figWidth = 14.4 # width of figure in inches nrow = 2 # number of subplot rows ncol = 3 # number of subplot columns nplot = 1 # current plot number # create single figure with subplots for all plots fig = plt.figure() fig.set_size_inches(figWidth, figWidth / 1.6) # histogram of population ax = fig.add_subplot(nrow, ncol, nplot) plots.histogram(dfPop, fig=fig, ax=ax, numBins=numBins, title='Population Distribution', xlabel=['height'], ylabel=['count']) # create array of x values for calculating pdf values xmin = dfPop.loc[:, 'height'].min() xmax = dfPop.loc[:, 'height'].max() x = np.linspace(xmin, xmax, 500) # plot normal probability density function with population mean and variance pdf = pdfnorm(x, mu, sigma) ax = ax.twinx() plots.scatter(x, pdf, fig=fig,
else: best_regressor = SVR(C=0.25, epsilon=0.25, kernel='linear') best_regressor.fit(train_x, train_y) prediction = best_regressor.predict(test_x) np.save('prediction.npy', prediction) print 'RMSE, MAE, MRE (all):', mean_squared_error(test_y, prediction)**0.5, \ mean_absolute_error(test_y, prediction), \ mean_relative_error(test_y, prediction) # determine error if only best 24 players are selected indices = [] for index in range(len(test_x)): if test[index, 0] in test_players.keys(): indices.append(index) print 'RMSE, MAE, MRE (24 best):', mean_squared_error(test_y[indices], prediction[indices])**0.5, \ mean_absolute_error(test_y[indices], prediction[indices]), \ mean_relative_error(test_y[indices], prediction[indices]) print zip(test_y[indices], prediction[indices]) if HISTOGRAM: histogram(test_y, prediction)