Esempio n. 1
0
    def _pr_rc_curve_r(observations, predictions, FDRth=0.05):
        """
        :param observations: known truth set
        :param predictions: all data
        :param FDRth:
        :return:
        """
        obs_rtbl = numpy2ri.py2ri(observations)
        prd_rtbl = numpy2ri.py2ri(predictions)
        curve_prm = {'scores.class0': prd_rtbl, 'weights.class0': obs_rtbl, 'curve': True, 'sorted': True}
        prc = PRROC.pr_curve(**curve_prm)
        auc = prc.rx2('auc.integral')[0]
        curve = numpy2ri.ri2py(prc.rx2('curve'))
        cols = ['recall', 'precision', 'threshold']
        df = pd.DataFrame(curve, columns=cols)
        FDR5percTh = - (df[df.precision >= (1 - FDRth)])['threshold'].min()
        if not np.isnan(FDR5percTh):
            index_min = min(df[df.precision >= (1 - FDRth)].index.tolist())
        else:
            index_min = 0

        SENS = df.at[index_min, 'recall']
        threshold = -FDR5percTh

        return df, auc, SENS, FDR5percTh
Esempio n. 2
0
    def plot_qc_metrics(self, output_dir):
        """Plot QC results from ENmix pipeline and possible minfi. Still experimental.

        Parameters
        ----------
        output_dir
            Where to store plots."""
        self.enmix.plotCtrl(self.RGset)
        grdevice = importr("grDevices")
        geneplotter = importr("geneplotter")
        base = importr('base')
        anno=self.minfi.getAnnotation(self.RGset)
        anno_py = pandas2ri.ri2py(robjects.r['as'](anno,'data.frame'))
        beta_py = pandas2ri.ri2py(self.beta)
        beta1=numpy2ri.py2ri(beta_py[anno_py["Type"]=="I"])
        beta2=numpy2ri.py2ri(beta_py[anno_py["Type"]=="II"])
        grdevice.jpeg(output_dir+'/dist.jpg',height=900,width=600)
        base.par(mfrow=robjects.vectors.IntVector([3,2]))
        self.enmix.multidensity(self.beta, main="Multidensity")
        self.enmix.multifreqpoly(self.beta, xlab="Beta value")
        self.enmix.multidensity(beta1, main="Multidensity: Infinium I")
        self.enmix.multifreqpoly(beta1, main="Multidensity: Infinium I", xlab="Beta value")
        self.enmix.multidensity(beta2, main="Multidensity: Infinium II")
        self.enmix.multifreqpoly(beta2, main="Multidensity: Infinium II", xlab="Beta value")
        grdevice.dev_off()
        self.minfi.qcReport(self.RGset, pdf = "{}/qcReport.pdf".format(output_dir))
        self.minfi.mdsPlot(self.RGset)
        self.minfi.densityPlot(self.RGset, main='Beta', xlab='Beta')
Esempio n. 3
0
def vs_sample_vec(y_arr, dat_arr, w=None, p=0.5):
    """Sample Variogram Score; vectorized version

    Compute the variogram score VS(*y_arr*, *dat_arr*), where *y_arr* is a series of 
    *d*-dimensional observations and *dat_arr* is a series of 
    samples of multivariate forecasts.
    For details, see Scheuerer, M. and Hamill, T.M. (2015). Variogram-based 
    proper scoring rules for probabilistic forecasts of multivariate quantities. 
    Monthly Weather Review, 143, 1321-1334.

    Args:
        *y_arr* (np.array): Series of observations of 
        shape (*d*, *n*), where *d* is the dimension of the observations,
        and *n* the number of observation. Hence each column contains a single 
        *d*-dimensional realization.
        
        *dat_arr* (np.array): Forecast sample  
        of shape (*d*, *m*, *n*), where
        *d* is the dimension of the realized values, *m* the number of 
        samples, and *n* the number of realizations.
        
        *p* (float): Order of variogram score. Standard choices include *p* = 1 and
        *p* = 0.5 (default).
        
        *w* (np.array):  Numeric array of weights for *dat* used in the variogram
          score.  If no weights are specified, constant weights with *w*
          = 1 are used.

    Returns:
        *np.array*: Variogram score of each forecast-observation pair.

    """
    try:
        y_arr = np.array(y_arr)
        y_arr = np.expand_dims(y_arr, 1)
        dat_arr = np.array(dat_arr)
        p_r = float(p)
        if w is None :
            w_r = rpy2.robjects.NULL
        else:
            w = np.array(w)
            w_r = np2ri.py2ri(w)
    except Exception:
        print('Input has wrong format.')
    else:
        if (len(y_arr.shape) != 3
            or len(dat_arr.shape) != 3
            or y_arr.shape[0] != dat_arr.shape[0]
            or y_arr.shape[2] != dat_arr.shape[2]
        ):
            raise ValueError('Parameters have wrong dimension.')

    df = np.concatenate((y_arr,dat_arr),axis = 1)
    df_r = np2ri.py2ri(df)
    rpy2.robjects.globalenv['df'] =  df_r
    rpy2.robjects.globalenv['p'] =  p_r
    rpy2.robjects.globalenv['w'] =  w_r
    
    vscr_r = rpy2.robjects.r('apply(df, c(3), function(x) vs_sample(x[,1], x[,-1], w, p))')
    return np.array(vscr_r)
Esempio n. 4
0
    def _roc_curve_r(observations, predictions, FDRth=0.05):
        """
        :param observations: known truth set
        :param predictions: all data
        :param FDRth:
        :return:
        """
        obs_rtbl = numpy2ri.py2ri(observations)
        prd_rtbl = numpy2ri.py2ri(predictions)
        roc_prm = {'direction': '>'}
        RES = pROC.roc(obs_rtbl, prd_rtbl, **roc_prm)
        auc = pandas2ri.ri2py(RES.rx2('auc'))[0]
        columns = ['threshold', 'ppv', 'sensitivity', 'specificity']
        coor_prm = {'ret': r.c('threshold', 'ppv', 'sensitivity', 'specificity')}
        COORS = pROC.coords(RES, 'all', **coor_prm)
        cords = numpy2ri.ri2py(COORS)
        df = pd.DataFrame(cords.T, columns=columns)
        FDR5percTh = (df[df.ppv >= (1 - FDRth)])['threshold'].max()
        if not np.isnan(FDR5percTh):
            index_min = min(df[df.threshold <= FDR5percTh].index.tolist())
        else:
            index_min = 0

        threshold = df.at[index_min, 'threshold']
        SENS = df.at[index_min, 'sensitivity']
        SPEC = df.at[index_min, 'specificity']

        return df, auc, SENS, FDR5percTh
Esempio n. 5
0
def pd_py2ri(o):
    """ 
    """
    res = None
    if isinstance(o, pd.Series): 
        o = pd.DataFrame(o, index=o.index)

    if isinstance(o, pd.DataFrame): 
        if isinstance(o.index, pd.DatetimeIndex):
            res = rconv.convert_df_to_xts(o)
        else:
            res = rcom.convert_to_r_dataframe(o)

    if isinstance(o, pd.DatetimeIndex): 
        res = rconv.convert_datetime_index(o)

    if isinstance(o, pd.Timestamp): 
        res = rconv.convert_timestamp(o)
        
    if res is None:
        try:
            res = numpy2ri.py2ri(o)
        except:
            res = robjects.default_converter.py2ri(o)

    return res
Esempio n. 6
0
    def _infer_network(self, data):
        """
        Infer the network.

        Args:
            data (pd.DataFrame): data to be used for the inference.
        """
        # activate implicit conversion from pandas to R objects
        pandas2ri.activate()
        genie3 = importr('GENIE3')
        importr('foreach')
        importr('doParallel')
        # transform pandas dataframe into GENIE3 input format
        globalenv['r_matrix'] = numpy2ri.py2ri(data.T.values)
        globalenv['r_rows'] = data.columns
        globalenv['r_cols'] = data.index
        r('''
        rownames(r_matrix) <- c(r_rows)
        colnames(r_matrix) <- c(r_cols)
        ''')
        expr_matrix = globalenv['r_matrix']
        # run GENIE3
        values = numpy2ri.ri2py(
            genie3.GENIE3(expr_matrix, self.regulators, self.targets,
                          self.tree_method, self.k, self.n_trees, self.n_cores,
                          self.verbose))
        weight_matrix = pd.DataFrame(values,
                                     columns=data.columns,
                                     index=data.columns)
        self.graph = Graph(adjacency=weight_matrix)
        logger.debug('inferred with {}'.format(self.method))
Esempio n. 7
0
def pd_py2ri(o):
    """ 
    """
    res = None
    if isinstance(o, pd.Series):
        o = pd.DataFrame(o, index=o.index)

    if isinstance(o, pd.DataFrame):
        if isinstance(o.index, pd.DatetimeIndex):
            res = rconv.convert_df_to_xts(o)
        else:
            res = rcom.convert_to_r_dataframe(o)

    if isinstance(o, pd.DatetimeIndex):
        res = rconv.convert_datetime_index(o)

    if isinstance(o, pd.Timestamp):
        res = rconv.convert_timestamp(o)

    if res is None:
        try:
            res = numpy2ri.py2ri(o)
        except:
            res = robjects.default_converter.py2ri(o)

    return res
Esempio n. 8
0
def es_sample(y, dat):
    """Sample Energy Score

    Compute the energy score ES(*y*, *dat*), where *y* is a vector of a
    *d*-dimensional observation and dat is a multivariate ensemble
    forecast. 
    For details, see Gneiting, T., Stanberry, L.I., Grimit, E.P.,
    Held, L. and Johnson, N.A. (2008). Assessing probabilistic forecasts of 
    multivariate quantities, with an application to ensemble predictions of 
    surface winds. Test, 17, 211–235.

    Args:
        *y* (np.array): Realized values (numeric vector of length *d*).
        
        *dat* (np.array): Forecast sample of shape (*d*, *m*), where 
        *d* is the dimension of the realization and 
        *m* the number of sample members. Each of the *m* columns corresponds 
        to the *d*-dimensional forecast of one ensemble member.

    Returns:
        float: Energy score of the forecast-observation pair.

    """
    try:
        y = np.array(y)
        dat = np.array(dat)
        y_r = rpy2.robjects.FloatVector(y)
        dat_r = np2ri.py2ri(dat)
    except Exception:
        print('Input has wrong format.')
        
    return srl.es_sample(y_r, dat_r)[0]
Esempio n. 9
0
def vs_sample(y, dat, w=None, p=0.5):
    """Sample Variogram Score

    Compute the variogram score VS(*y*, *dat*) of order *p*, where *y* is a 
    *d*-dimensional observation and dat is a multivariate ensemble
    forecast. 
    For details, see Scheuerer, M. and Hamill, T.M. (2015). Variogram-based 
    proper scoring rules for probabilistic forecasts of multivariate quantities. 
    Monthly Weather Review, 143, 1321-1334.

    Args:
        *y* (np.array): Observation (numeric vector of length *d*).
        
        *dat* (np.array): Forecast sample of shape (*d*, *m*), where 
        *d* is the dimension of the realization and 
        *m* the number of sample members.
        
        *p* (float): Order of variogram score. Standard choices include *p* = 1 and
        *p* = 0.5 (default).
        
        *w* (np.array):  Numeric array of weights for *dat* used in the variogram
          score.  If no weights are specified, constant weights with *w*
          = 1 are used.


    Returns:
        float: Variogram score of the forecast-observation pair.

    """

    try:
        y = np.array(y)
        dat = np.array(dat)
        if w is None :
            w_r = rpy2.robjects.NULL
        else:
            w = np.array(w)
            w_r = np2ri.py2ri(w)
        p_r = float(p)
        y_r = rpy2.robjects.FloatVector(y)
        dat_r = np2ri.py2ri(dat)
    except Exception:
        print('Input has wrong format.')
    
    return srl.vs_sample(y = y_r, dat = dat_r, w = w_r, p = p_r)[0]
Esempio n. 10
0
def es_sample_vec(y_arr, dat_arr):
    """Sample Energy Score; vectorized version

    Compute the energy score ES(*y_arr*, *dat_arr*), where *y_arr* is a series of 
    *d*-dimensional observations and *dat_arr* is a series of 
    samples of multivariate forecasts.
    For details, see Gneiting, T., Stanberry, L.I., Grimit, E.P.,
    Held, L. and Johnson, N.A. (2008). Assessing probabilistic forecasts of 
    multivariate quantities, with an application to ensemble predictions of 
    surface winds. Test, 17, 211-235.

    Args:
        *y_arr* (np.array): Series of observations of 
        shape (*d*, *n*), where *d* is the dimension of the observations,
        and *n* the number of observation. Hence each column contains a single 
        *d*-dimensional realization.
        
        *dat_arr* (np.array): Forecast sample  
        of shape (*d*, *m*, *n*), where
        *d* is the dimension of the realized values, *m* the number of 
        samples, and *n* the number of realizations.

    Returns:
        np.array: Energy score of each forecast-observation pair.

    """
    try:
        y_arr = np.array(y_arr)
        y_arr = np.expand_dims(y_arr, 1)
        dat_arr = np.array(dat_arr)
    except Exception:
        print('Input has wrong format.')
    else:
        if (len(y_arr.shape) != 3 
            or len(dat_arr.shape) != 3
            or y_arr.shape[0] != dat_arr.shape[0]
            or y_arr.shape[2] != dat_arr.shape[2]
        ):
            raise ValueError('Parameters have wrong dimension.')

    df = np.concatenate((y_arr,dat_arr),axis = 1)
    df_r = np2ri.py2ri(df)
    rpy2.robjects.globalenv['df'] =  df_r
    
    escr_r = rpy2.robjects.r('apply(df, c(3), function(x) es_sample(x[,1], x[,-1]))')
    return np.array(escr_r)    
Esempio n. 11
0
def crps_sample_vec(y_arr, dat_arr):
    """Sample Continuous Ranked Probability Score (CRPS); vectorized version

    Compute CRPS(*y_arr*, *dat_arr*), where *y_arr* is a series of 
    univariate observations and *dat_arr* is a series of
    ensemble forecasts.
    For details, see Matheson, J.E. and Winkler, R.L. (1976). Scoring rules for
    continuous probability distributions. Management Science, 22, 1087-1096.

    Args:
        *y_arr* (np.array): Series of observations of 
        length *n*, where *n* is the number of observations. 
        
        *dat_arr* (np.array): Ensemble forecasts  
        of shape (*m*, *n*), where *m* is the number of ensemble members, 
        and *n* the number of observation.

    Returns:
        np.array: CRPS of each forecast-observation pair.

    """
    try:
        y_arr = np.array(y_arr)
        dat_arr = np.array(dat_arr)
        y_r = rpy2.robjects.FloatVector(y_arr)
        dat_r = np2ri.py2ri(dat_arr)
    except Exception:
        print('Input has wrong format.')
    else:
        if (len(y_arr.shape) != 1 
            or len(dat_arr.shape) != 2 
            or y_arr.shape[0] != dat_arr.shape[1]
        ):
            raise ValueError('Parameters have wrong dimension.')

    rpy2.robjects.globalenv['obs'] =  y_r
    rpy2.robjects.globalenv['forc'] =  dat_r
    
    crps_r = rpy2.robjects.r('apply(rbind(obs,forc), 2, function(x) crps_sample(x[1], x[-1]))')
    return np.array(crps_r)
Esempio n. 12
0
def Rdeepnet(train, train_class, test, hidden_N, nepoch):
    import rpy2.robjects as robjects
    robjects.r('''
    dp <- function(hidden_N,nepoch,train,train_class,test) #train,class,test)
    {
        a<- Sys.time()
        print(hidden_N)
        labelNames = c("Shopping","Food")
        predictions <- matrix(0,nrow=nrow(test), ncol=length(labelNames))
        predictions <- data.frame(predictions)
        #colnames(predictions) <- labelNames
        set.seed(1)
        library(deepnet)
        nn <- nn.train(as.matrix(train),as.matrix(train_class),hidden = hidden_N,numepochs = nepoch)
        predictions <- nn.predict(nn,test)
        predictions <- round(predictions)
        b<- Sys.time()
        print(b-a)
        return(predictions)
    }
    ''')
    r_f = robjects.globalenv['dp']
    #print(r_f.r_repr())
    r_f = robjects.r['dp']
    from rpy2.robjects import pandas2ri
    pandas2ri.activate()
    train = pandas2ri.py2ri(train)  #converts pandas df to R dataframe
    test = pandas2ri.py2ri(test)
    train_class = pandas2ri.py2ri(train_class)
    from rpy2.robjects import numpy2ri  #converts list in python to R vectors
    numpy2ri.activate()
    hidden_N = np.array(hidden_N)
    nepoch = nepoch
    hidden_N = numpy2ri.py2ri(hidden_N)
    predictions = pandas2ri.ri2py_dataframe(
        r_f(hidden_N, nepoch, train, train_class, test))
    return predictions
Esempio n. 13
0
        remained = x[-top[i, :] > -1]
        for j in range(len(remained)):
            combination = np.zeros(bioN)
            tmpselected = np.append(selected, remained[j])
            combination[tmpselected - 1] = 1
            comb = np.concatenate(([combination], comb))
    return comb


## implentment iteration
thresholdN = 10
iterationT = 2
while (iterationT < thresholdN):
    iterationT = iterationT + 1
    comb = prepareCombination(topres, bioN)
    rcomb = numpy2ri.py2ri(comb)
    rcomb = robjects.Matrix(rcomb)
    robjects.globalenv['rcomb'] = rcomb
    rscript_calC = '''
    rcomb <- data.frame(rcomb)
    starttimeC<-Sys.time()
    resC<-func_bycb(rcomb)
    endtimeC<-Sys.time()
    ctimeC<-endtimeC-starttimeC
    '''
    robjects.r(rscript_calC)
    #print(robjects.r['head']('rcomb'))
    npresC = np.array(robjects.r['resC'])
    npresC = np.reshape(npresC, newshape=(npresC.shape[0], npresC.shape[1]))
    npresC = np.transpose(npresC)
    npres = np.concatenate((npres, npresC), axis=0)