def check_replica(self): #discard everything below the flag new_experiment = [] new_replica = [] min_value = sys.maxint max_value = -sys.maxint for i in range(len(self.replica_values)): if self.experiment_values[i] > self.count_filter and self.replica_values[i] > self.count_filter: new_experiment.append(math.log(self.experiment_values[i], 2)) new_replica.append(math.log(self.replica_values[i], 2)) min_value = min(min_value, math.log(self.experiment_values[i], 2), math.log(self.replica_values[i], 2)) max_value = max(max_value, math.log(self.experiment_values[i], 2), math.log(self.replica_values[i], 2)) #print self.replica_values self.experiment_values = new_experiment self.replica_values = new_replica try: if self.postscript: import matplotlib matplotlib.use("PS") from matplotlib.pyplot import plot, show, xlabel, ylabel, axhline, axis, clf, text, title, xlim, ylim except: __matplotlibwarn(self) return 0 clf() r_squared = utils.pearson(self.experiment_values, self.replica_values)**2 text(min_value+abs(max_value)*0.1, max_value-abs(max_value)*0.2, r'Pearson $R^2$= %s'%round(r_squared, 3), fontsize=18, bbox={'facecolor':'yellow', 'alpha':0.5, 'pad':10}) xlabel("log2(%s)"%self.experiment_label, fontsize=18) ylabel("log2(%s)"%self.replica_label, fontsize=18) xlim(min_value, max_value) ylim(min_value, max_value) title(self.title_label, fontsize=24) plot(self.experiment_values, self.replica_values, '.') self._save_figure("check_replica")
def evaluate(model, loader): model.eval() y_hat_list = [] y_list = [] for batch_data in loader: a2a_g, b2a_g, b2b_gl, feats, types, counts, y = batch_data _, y_hat = model(a2a_g, b2a_g, b2b_gl, types, counts) y_hat_list += y_hat.tolist() y_list += y.tolist() y_hat = np.array(y_hat_list).reshape(-1,) y = np.array(y_list).reshape(-1,) return rmse(y, y_hat), mae(y, y_hat), sd(y, y_hat), pearson(y, y_hat)
def check_replica_correlation(self): "No usado, de momento" min_tags = 20 experiment_reader = utils.read_fetcher(self.current_experiment_path, self.experiment_format, cached=self.cached, logger=self.logger, use_samtools=self.use_samtools, access_sequential=self.access_sequential) replica_reader = utils.read_fetcher(self.current_replica_path, self.experiment_format, cached=self.cached, logger=self.logger, use_samtools=self.use_samtools, access_sequential=self.access_sequential) correlations_acum = 0 num_correlations = 0 for region_line in open(self.region_path): sline = region_line.split() region_experiment = self._region_from_sline(sline) region_replica = region_experiment.copy() tags_experiment = experiment_reader.get_overlaping_clusters(region_experiment, overlap=1) tags_replica = replica_reader.get_overlaping_clusters(region_experiment, overlap=1) count_experiment = len(tags_experiment) count_replica = len(tags_replica) correlations = [] if count_experiment+count_replica > min_tags: region_experiment.add_tags(tags_experiment, clusterize=True) region_replica.add_tags(tags_replica, clusterize=True) num_correlations += 1 correlation = utils.pearson(region_experiment.get_array(), region_replica.get_array()) correlations_acum += max(0, correlation) correlations.append(correlation) print correlations_acum/num_correlations try: if self.postscript: import matplotlib matplotlib.use("PS") from matplotlib.pyplot import plot, boxplot, show, legend, figure, xlabel, ylabel, subplot, axhline, axis except: __matplotlibwarn(self) return 0 print correlations boxplot(correlations) self._save_figure("check_replica")
import numpy as np from datasets import read_arcene from sklearn.preprocessing import LabelBinarizer import matplotlib.pyplot as plt from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score, f1_score from sklearn.svm import SVC from svm import SVM from optim import GD from utils import (pearson, spearman, chi2) metrics = { 'pearson': pearson, 'spearman': spearman, 'chi2': chi2, 'together': lambda x, y: pearson(x, y) * spearman(x, y) * chi2(x, y) } def sort_features(met, n): features = [] inds = np.arange(n) for m in met: crit = metrics[m] features.append( np.array(sorted(inds, key=lambda i: crit(X[:, i], y), reverse=True))) return features