Esempio n. 1
0
def check_replica(self):
    #discard everything below the flag
    new_experiment = []
    new_replica = []
    min_value = sys.maxint
    max_value = -sys.maxint
    for i in range(len(self.replica_values)):
        if self.experiment_values[i] > self.count_filter and self.replica_values[i] > self.count_filter:
            new_experiment.append(math.log(self.experiment_values[i], 2))
            new_replica.append(math.log(self.replica_values[i], 2))
            min_value = min(min_value, math.log(self.experiment_values[i], 2), math.log(self.replica_values[i], 2))
            max_value = max(max_value, math.log(self.experiment_values[i], 2), math.log(self.replica_values[i], 2))
    #print self.replica_values
    self.experiment_values = new_experiment
    self.replica_values = new_replica
    try:
        if self.postscript:
            import matplotlib
            matplotlib.use("PS")
        from matplotlib.pyplot import plot, show, xlabel, ylabel, axhline, axis, clf, text, title, xlim, ylim
    except:
        __matplotlibwarn(self)
        return 0
    clf()
    r_squared = utils.pearson(self.experiment_values, self.replica_values)**2
    text(min_value+abs(max_value)*0.1, max_value-abs(max_value)*0.2, r'Pearson $R^2$= %s'%round(r_squared, 3), fontsize=18, bbox={'facecolor':'yellow', 'alpha':0.5, 'pad':10})
    xlabel("log2(%s)"%self.experiment_label, fontsize=18)
    ylabel("log2(%s)"%self.replica_label, fontsize=18)
    xlim(min_value, max_value)
    ylim(min_value, max_value)
    title(self.title_label, fontsize=24)
    plot(self.experiment_values, self.replica_values, '.')

    self._save_figure("check_replica")   
Esempio n. 2
0
def evaluate(model, loader):
    model.eval()
    y_hat_list = []
    y_list = []
    for batch_data in loader:
        a2a_g, b2a_g, b2b_gl, feats, types, counts, y = batch_data
        _, y_hat = model(a2a_g, b2a_g, b2b_gl, types, counts)
        y_hat_list += y_hat.tolist()
        y_list += y.tolist()

    y_hat = np.array(y_hat_list).reshape(-1,)
    y = np.array(y_list).reshape(-1,)
    return rmse(y, y_hat), mae(y, y_hat), sd(y, y_hat), pearson(y, y_hat)
Esempio n. 3
0
def check_replica_correlation(self):
    "No usado, de momento" 
    min_tags = 20
    experiment_reader = utils.read_fetcher(self.current_experiment_path, self.experiment_format, cached=self.cached, logger=self.logger, use_samtools=self.use_samtools, access_sequential=self.access_sequential)
    replica_reader = utils.read_fetcher(self.current_replica_path, self.experiment_format, cached=self.cached, logger=self.logger, use_samtools=self.use_samtools, access_sequential=self.access_sequential)
    correlations_acum = 0
    num_correlations = 0
    for region_line in open(self.region_path):
        sline = region_line.split()
        region_experiment = self._region_from_sline(sline)       
        region_replica = region_experiment.copy()  
        tags_experiment = experiment_reader.get_overlaping_clusters(region_experiment, overlap=1)
        tags_replica = replica_reader.get_overlaping_clusters(region_experiment, overlap=1)
        count_experiment = len(tags_experiment)
        count_replica = len(tags_replica)
        correlations = []
        if count_experiment+count_replica > min_tags:
            region_experiment.add_tags(tags_experiment, clusterize=True)
            region_replica.add_tags(tags_replica, clusterize=True)     
            num_correlations += 1
            correlation = utils.pearson(region_experiment.get_array(), region_replica.get_array())
            correlations_acum += max(0, correlation)
            correlations.append(correlation)

    print correlations_acum/num_correlations
    try:
        if self.postscript:
            import matplotlib
            matplotlib.use("PS")
        from matplotlib.pyplot import plot, boxplot, show, legend, figure, xlabel, ylabel, subplot, axhline, axis
    except:
        __matplotlibwarn(self)
        return 0

    print correlations
    boxplot(correlations)
    self._save_figure("check_replica")    
Esempio n. 4
0
import numpy as np
from datasets import read_arcene
from sklearn.preprocessing import LabelBinarizer
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.svm import SVC
from svm import SVM
from optim import GD
from utils import (pearson, spearman, chi2)

metrics = {
    'pearson': pearson,
    'spearman': spearman,
    'chi2': chi2,
    'together': lambda x, y: pearson(x, y) * spearman(x, y) * chi2(x, y)
}


def sort_features(met, n):
    features = []
    inds = np.arange(n)
    for m in met:
        crit = metrics[m]
        features.append(
            np.array(sorted(inds, key=lambda i: crit(X[:, i], y),
                            reverse=True)))

    return features