Ejemplo n.º 1
0
def ri2py_listvector(obj):
    if 'data.frame' in obj.rclass:
        items = zip(obj.do_slot('names'), (numpy2ri.ri2py(x) for x in obj))
        res = PandasDataFrame.from_items(items)
    else:
        res = numpy2ri.ri2py(obj)
    return res
Ejemplo n.º 2
0
def ri2py_listvector(obj):        
    if 'data.frame' in obj.rclass:
        items = zip(obj.do_slot('names'), (numpy2ri.ri2py(x) for x in obj))
        res = PandasDataFrame.from_items(items)
    else:
        res = numpy2ri.ri2py(obj)
    return res
Ejemplo n.º 3
0
def ri2py_dataframe(obj):
    # use the numpy converter
    recarray = numpy2ri.ri2py(obj)
    try:
        idx = numpy2ri.ri2py(obj.do_slot('row.names'))
    except LookupError as le:
        idx = None
    res = PandasDataFrame.from_records(recarray, index=idx)
    return res
Ejemplo n.º 4
0
def ri2py_dataframe(obj):
    # use the numpy converter
    recarray = numpy2ri.ri2py(obj)
    try:
        idx = numpy2ri.ri2py(obj.do_slot('row.names'))
    except LookupError as le:
        idx = None
    res = PandasDataFrame.from_records(recarray,
                                       index=idx)
    return res
Ejemplo n.º 5
0
def pval_grenander_fit(pvals):
    fdrtool = importr('fdrtool')
    pval_vec = ro.FloatVector(pvals)
    pval_ecdf = ecdf_pkg.ecdf_pval(pval_vec)
    gre_fit = fdrtool.grenander(pval_ecdf, type='decreasing')
    x_knots = rpyn.ri2py(gre_fit.rx2('x.knots'))
    f_knots = rpyn.ri2py(gre_fit.rx2('f.knots'))
    if len(f_knots) == 0:
        x_knots = np.array([0, 1])
        f_knots = np.array([1, 1])
    assert len(f_knots) == len(x_knots)
    return x_knots, f_knots
Ejemplo n.º 6
0
        def predict(self, X, eval_MSE=False):
            """
            X should be a dataframe
            """
            X = self._check_X(X)
            _ = self.pkg.predict_randomForest(self.rf, X, predict_all=eval_MSE)

            if eval_MSE:
                y_hat = numpy2ri.ri2py(_[0])
                mse = std(numpy2ri.ri2py(_[1]), axis=1, ddof=1) ** 2.
                return y_hat, mse
            else:
                return numpy2ri.ri2py(_)
Ejemplo n.º 7
0
def r_spectrum(signal, r_periodogram):
    filtered_signal = detrend(signal, type='linear')
    filtered_signal = recursive_filter_function(filtered_signal)
    filtered_signal = demean(filtered_signal)
    res = r_periodogram(numpy2ri.numpy2ri(filtered_signal),
                        pad=0.3,
                        tap=0.3,
                        span=2,
                        plot=False,
                        detrend=True,
                        demean=True)
    freq = numpy2ri.ri2py(res.rx2('freq'))
    psd = numpy2ri.ri2py(res.rx2('spec'))
    return 1 / freq, psd
Ejemplo n.º 8
0
    def _infer_network(self, data):
        """
        Infer the network.

        Args:
            data (pd.DataFrame): data to be used for the inference.
        """
        # activate implicit conversion from pandas to R objects
        pandas2ri.activate()
        genie3 = importr('GENIE3')
        importr('foreach')
        importr('doParallel')
        # transform pandas dataframe into GENIE3 input format
        globalenv['r_matrix'] = numpy2ri.py2ri(data.T.values)
        globalenv['r_rows'] = data.columns
        globalenv['r_cols'] = data.index
        r('''
        rownames(r_matrix) <- c(r_rows)
        colnames(r_matrix) <- c(r_cols)
        ''')
        expr_matrix = globalenv['r_matrix']
        # run GENIE3
        values = numpy2ri.ri2py(
            genie3.GENIE3(expr_matrix, self.regulators, self.targets,
                          self.tree_method, self.k, self.n_trees, self.n_cores,
                          self.verbose))
        weight_matrix = pd.DataFrame(values,
                                     columns=data.columns,
                                     index=data.columns)
        self.graph = Graph(adjacency=weight_matrix)
        logger.debug('inferred with {}'.format(self.method))
Ejemplo n.º 9
0
    def _pr_rc_curve_r(observations, predictions, FDRth=0.05):
        """
        :param observations: known truth set
        :param predictions: all data
        :param FDRth:
        :return:
        """
        obs_rtbl = numpy2ri.py2ri(observations)
        prd_rtbl = numpy2ri.py2ri(predictions)
        curve_prm = {'scores.class0': prd_rtbl, 'weights.class0': obs_rtbl, 'curve': True, 'sorted': True}
        prc = PRROC.pr_curve(**curve_prm)
        auc = prc.rx2('auc.integral')[0]
        curve = numpy2ri.ri2py(prc.rx2('curve'))
        cols = ['recall', 'precision', 'threshold']
        df = pd.DataFrame(curve, columns=cols)
        FDR5percTh = - (df[df.precision >= (1 - FDRth)])['threshold'].min()
        if not np.isnan(FDR5percTh):
            index_min = min(df[df.precision >= (1 - FDRth)].index.tolist())
        else:
            index_min = 0

        SENS = df.at[index_min, 'recall']
        threshold = -FDR5percTh

        return df, auc, SENS, FDR5percTh
Ejemplo n.º 10
0
    def _roc_curve_r(observations, predictions, FDRth=0.05):
        """
        :param observations: known truth set
        :param predictions: all data
        :param FDRth:
        :return:
        """
        obs_rtbl = numpy2ri.py2ri(observations)
        prd_rtbl = numpy2ri.py2ri(predictions)
        roc_prm = {'direction': '>'}
        RES = pROC.roc(obs_rtbl, prd_rtbl, **roc_prm)
        auc = pandas2ri.ri2py(RES.rx2('auc'))[0]
        columns = ['threshold', 'ppv', 'sensitivity', 'specificity']
        coor_prm = {'ret': r.c('threshold', 'ppv', 'sensitivity', 'specificity')}
        COORS = pROC.coords(RES, 'all', **coor_prm)
        cords = numpy2ri.ri2py(COORS)
        df = pd.DataFrame(cords.T, columns=columns)
        FDR5percTh = (df[df.ppv >= (1 - FDRth)])['threshold'].max()
        if not np.isnan(FDR5percTh):
            index_min = min(df[df.threshold <= FDR5percTh].index.tolist())
        else:
            index_min = 0

        threshold = df.at[index_min, 'threshold']
        SENS = df.at[index_min, 'sensitivity']
        SPEC = df.at[index_min, 'specificity']

        return df, auc, SENS, FDR5percTh
Ejemplo n.º 11
0
    def _exec_r_module(self):
        try:
            import rpy2.robjects
            from rpy2.robjects import numpy2ri
            from rpy2.robjects import pandas2ri
            from rpy2.robjects.packages import importr
        except ImportError:
            raise ImportError('R module cannot be run, because '
                              '"rpy2" package is not installed.')
        module_name = os.path.splitext(os.path.basename(self.source_file))[0]
        logger.debug('import module "%s" from source file: %s',
                     self.source_file)
        logger.debug('source module: "%s"', self.source_file)
        rpy2.robjects.r('source("{0}")'.format(self.source_file))
        module = rpy2.robjects.r[module_name]
        version = module.get('VERSION')[0]
        if version != self.handles.version:
            raise PipelineRunError(
                'Version of source and handles is not the same.')
        func = module.get('main')
        numpy2ri.activate()  # enables use of numpy arrays
        pandas2ri.activate()  # enable use of pandas data frames
        kwargs = self.keyword_arguments
        logger.debug('evaluate main() function with INPUTS: "%s"',
                     '", "'.join(kwargs.keys()))
        # R doesn't have unsigned integer types
        for k, v in kwargs.iteritems():
            if isinstance(v, np.ndarray):
                if v.dtype == np.uint16 or v.dtype == np.uint8:
                    logging.debug(
                        'module "%s" input argument "%s": '
                        'convert unsigned integer data type to integer',
                        self.name, k)
                    kwargs[k] = v.astype(int)
            elif isinstance(v, pd.DataFrame):
                # TODO: We may have to translate pandas data frames explicitly
                # into the R equivalent.
                # pandas2ri.py2ri(v)
                kwargs[k] = v
        args = rpy2.robjects.ListVector({k: v for k, v in kwargs.iteritems()})
        base = importr('base')
        r_out = base.do_call(func, args)

        for handle in self.handles.output:
            # NOTE: R functions are supposed to return a list. Therefore
            # we can extract the output argument using rx2().
            # The R equivalent would be indexing the list with "[[]]".
            if isinstance(r_out.rx2(handle.name),
                          rpy2.robjects.vectors.DataFrame):
                handle.value = pandas2ri.ri2py(r_out.rx2(handle.name))
                # handle.value = pd.DataFrame(r_out.rx2(handle.name))
            else:
                # NOTE: R doesn't have an unsigned integer data type.
                # So we cast to uint16.
                handle.value = numpy2ri.ri2py(r_out.rx2(handle.name)).astype(
                    np.uint16)
                # handle.value = np.array(r_out.rx2(handle.name), np.uint16)

        return self.handles.output
Ejemplo n.º 12
0
def ri2py_intvector(obj):
    # special case for factors
    if 'factor' in obj.rclass:
        res = pandas.Categorical.from_codes(numpy.asarray(obj) - 1,
                                            categories=obj.do_slot('levels'),
                                            ordered='ordered' in obj.rclass)
    else:
        res = numpy2ri.ri2py(obj)
    return res
Ejemplo n.º 13
0
def ri2py_intvector(obj):
    # special case for factors
    if 'factor' in obj.rclass:
        res = pandas.Categorical.from_codes(numpy.asarray(obj) - 1,
                                            categories = obj.do_slot('levels'),
                                            ordered = 'ordered' in obj.rclass)
    else:
        res = numpy2ri.ri2py(obj)
    return res
Ejemplo n.º 14
0
    def fit(self, X, Y):
        numpy2ri.activate()
        rPMA = importr('PMA')
        typex, typez = _check_penalty_type(self.penalty)
        X, x_mean, x_std = _center_data(X)
        Y, y_mean, y_std = _center_data(Y)
        if self.n_component is None:
            self.n_component = np.min([X.shape[1], Y.shape[1]])
        out = rPMA.CCA(x=X, z=Y, K=self.n_component, \
                niter=self.n_iter, standardize=False, \
                typex=typex, typez=typez, \
                penaltyx=self.C[0], penaltyz=self.C[1], \
                trace=False)

        self.u = numpy2ri.ri2py(out[0])
        self.v = numpy2ri.ri2py(out[1])
        self._x_score, self._y_score = self.transform(X, Y)
        self._cancorr = _cancorr(X, Y, self.u, self.v)
        numpy2ri.deactivate()
        return self
Ejemplo n.º 15
0
def get_distance(phyloseq_d, dist_method):
    R_phyloseq = importr('phyloseq')
    R_base = importr('base')
        
    distances = R_phyloseq.distance(phyloseq_d, method=dist_method)
    distance_mat = R_base.as_matrix(distances)
    distance_df = pd.DataFrame(numpy2ri.ri2py(distance_mat),
                               index=pandas2ri.ri2py(R_phyloseq.sample_names(phyloseq_d)),
                                columns=pandas2ri.ri2py(R_phyloseq.sample_names(phyloseq_d))
                            )
    return distance_df
Ejemplo n.º 16
0
def get_wunifrac_distance(phyloseq_d):
    R_phyloseq = importr('phyloseq')
    R_base = importr('base')
        
    distances = R_phyloseq.UniFrac(phyloseq_d, weighted=True, normalized=True, fast=True, parallel=False)
    distance_mat = R_base.as_matrix(distances)
    distance_df = pd.DataFrame(numpy2ri.ri2py(distance_mat),
                               index=pandas2ri.ri2py(R_phyloseq.sample_names(phyloseq_d)),
                                columns=pandas2ri.ri2py(R_phyloseq.sample_names(phyloseq_d))
                            )
    return distance_df
Ejemplo n.º 17
0
def processData(data,prompts):
    expandedMatrix = buildMatrices(data)
    competencies = {}
    eigenRatios = {}
    meanCompetencies = {}
    negativeCompetencies = {}
    for dataSet, matrix in expandedMatrix.items():
        sys.err.println("Processing " + dataSet)

        prompt = prompts['Answer.prompt_' + dataSet.split('_')[1]]

        # Calculate competencies for this scale
        factors = CCT(matrix, prompt)
        competencies[dataSet] = np2ri.ri2py((factors.rx('loadings')[0])).flatten()
        eigen = np2ri.ri2py((factors.rx('values')[0])).flatten()

        eigenRatios[dataSet] = eigen[0]/eigen[1]
        meanCompetencies[dataSet] = competencies[dataSet].mean()
        negativeCompetencies[dataSet] = (competencies[dataSet] < 0).sum() * 1.0/len(competencies[dataSet])

    return (competencies,eigenRatios,meanCompetencies,negativeCompetencies)
Ejemplo n.º 18
0
        def fit(self, X, y):
            self.X = self._check_X(X)
            y = array(y).astype(float)

            self.columns = numpy2ri.ri2py(self.X.colnames)
            n_sample, self.n_feature = self.X.nrow, self.X.ncol

            if isinstance(self.param['mtry'], basestring):
                p = self.n_feature
                self.param['mtry'] = eval(self.param['mtry'])

            self.rf = self.pkg.randomForest(x=self.X, y=y, **self.param)
            return self
Ejemplo n.º 19
0
def processData(dataFrame):
    originalCompetencies = {}
    competencies = {}
    eigenRatios = {}
    matrices = buildMatrices(dataFrame)

    for dataSet, matrix in matrices.items():
        factors = CCT(matrix)

        results = np2ri.ri2py((factors.rx('loadings')[0])).flatten()
        originalCompetencies[dataSet] = results

        matrix = reverseNegatives(matrix,results)

        factors = CCT(matrix)
        results = np2ri.ri2py((factors.rx('loadings')[0])).flatten()
        competencies[dataSet] = results

        eigen = np2ri.ri2py((factors.rx('values')[0])).flatten()
        eigenRatios[dataSet] = eigen

    return (competencies, eigenRatios, originalCompetencies)
Ejemplo n.º 20
0
def fit_betabinom_ab(n, k, weights=None):
    assert np.all((k >= 0) & (k <= n))
    n_r = ro.FloatVector(n)
    k_r = ro.FloatVector(k)
    if weights is not None:
        assert len(weights) == len(k)
        assert len(weights) == len(n)
        weights_r = ro.FloatVector(weights)
        result_r = bbfit.fit_betabinom_w(n_r, k_r, weights_r)
    else:
        result_r = bbfit.fit_betabinom(n_r, k_r)
    result = rpyn.ri2py(result_r)
    log_a, log_b = result.flatten()
    return np.exp(log_a), np.exp(log_b)
Ejemplo n.º 21
0
def ri2py_floatvector(obj):
    # special case for POSIXct date objects
    if 'POSIXct' in obj.rclass:
        tzone_name = obj.do_slot('tzone')[0]
        if tzone_name == '':
            # R is implicitly using the local timezone, while Python time libraries
            # will assume UTC.
            tzone = get_timezone()
        else:
            tzone = pytz.timezone(tzone_name)
        foo = (tzone.localize(datetime.fromtimestamp(x)) for x in obj)
        res = pandas.to_datetime(tuple(foo))
    else:
        res = numpy2ri.ri2py(obj)
    return res
Ejemplo n.º 22
0
def run_auto_arima(window, horizons, args):
    numpy2ri.activate()
    forecast = importr('forecast')
    _, y_train, _, _ = window
    extra_param = args['extras']
    fit = forecast.auto_arima(y_train,
                              stationary=extra_param.get('stationary', False),
                              seasonal=extra_param.get('seasonal', True),
                              stepwise=extra_param.get('stepwise', False))

    order = list(fit[6])
    arima_str = 'ARIMA({0},{5},{1}) ({2},{6},{3})_{4}'
    print arima_str.format(*order)
    result = forecast.forecast(fit, h=max_h, level=np.array([0.95]))
    y_pred = numpy2ri.ri2py(result[3])
    y_arima = y_pred[np.array(horizons) - 1]
    return y_arima
Ejemplo n.º 23
0
    def output_pheno_beta(self, meffil=False):
        """Get pheno and beta dataframe objects stored as attributes for input to MethylationArray object.

        Parameters
        ----------
        meffil
            True if ran meffil pipeline."""
        self.pheno_py=pandas2ri.ri2py(robjects.r['as'](self.pheno,'data.frame'))
        if not meffil:
            self.beta_py=pd.DataFrame(pandas2ri.ri2py(self.beta_final),index=numpy2ri.ri2py(robjects.r("featureNames")(self.RSet)),columns=numpy2ri.ri2py(robjects.r("sampleNames")(self.RSet))).transpose()
            self.pheno_py['Sample_Name']=np.vectorize(lambda x: x.split('/')[-1])(self.pheno_py['Basename'])
            self.pheno_py = self.pheno_py.set_index('Sample_Name').loc[self.beta_py.index,:]
        else:
            self.beta_py=pd.DataFrame(pandas2ri.ri2py(self.beta_final),index=robjects.r("rownames")(self.beta_final),columns=robjects.r("colnames")(self.beta_final)).transpose()
            print(self.beta_py)
            print(self.beta_py.index)
            print(self.pheno_py)
            self.pheno_py = self.pheno_py.set_index('Sample_Name').loc[self.beta_py.index,:]
Ejemplo n.º 24
0
    def convert_r_response_to_python(response,
                                     convert_scalars=True,
                                     to_list=False,
                                     native_to_json=False):
        """Converts rpy2 types to Python objects.

        The output is either a Python built-in, NumPy or Pandas object.

        Note:
            Rpy2 returns scalars as vectors. These are converted back to scalars with this conversion, which
            may not be desirable. This behavior can be switched of.

        Args:
            response (rpy2 type): object from the R session to be converted
            convert_scalars (bool): convert vectors with length 1 to scalars

        Returns:
            Converted object
        """
        original_response_type = type(response)

        new_res = response
        if issubclass(original_response_type, rpy2.robjects.vectors.Array):
            new_res = numpy2ri.ri2py(response)
        elif issubclass(original_response_type,
                        rpy2.robjects.vectors.DataFrame):
            new_res = pandas2ri.ri2py(response)
            if native_to_json:
                json_res = new_res.to_json(orient='records')
                json_res = json_res.replace('-2147483648', 'null')
                new_res = json.loads(json_res)
        elif issubclass(original_response_type, rpy2.robjects.vectors.Vector):
            if convert_scalars and len(response) == 1:
                new_res = response[0]
            elif to_list:
                new_res = list(response)

        return new_res
 def testAtomicVectorToNumpy(self):
     v = robjects.vectors.IntVector((1, 2, 3))
     a = rpyn.ri2py(v)
     self.assertTrue(isinstance(a, numpy.ndarray))
     self.assertEqual(1, v[0])
Ejemplo n.º 26
0
def ri2py_listvector(obj):        
    if 'data.frame' in obj.rclass:
        res = ri2py.registry[DataFrame](obj)
    else:
        res = numpy2ri.ri2py(obj)
    return res
Ejemplo n.º 27
0
def ri2py_listvector(obj):
    if 'data.frame' in obj.rclass:
        res = ri2py.registry[DataFrame](obj)
    else:
        res = numpy2ri.ri2py(obj)
    return res
Ejemplo n.º 28
0
def ri2py_vector(obj):
    res = numpy2ri.ri2py(obj)
    return res
 def testAtomicVectorToNumpy(self):
     v = robjects.vectors.IntVector((1,2,3))
     a = rpyn.ri2py(v)
     self.assertTrue(isinstance(a, numpy.ndarray))
     self.assertEqual(1, v[0])
    from rpy2.robjects.numpy2ri import numpy2rpy as numpy2ri
    from rpy2.robjects.numpy2ri import rpy2py as ri2py

#from rpy2.robjects.numpy2ri import numpy2ri, ri2py
from rpy2.robjects.packages import importr, data

import openturns as ot
# Require

stats = importr("stats")
faraway = importr("faraway")
savings_data = data(faraway).fetch("savings")["savings"]
#data(faraway).fetch("savings")["savings"]

# Model 1 : 2 param, non intercept
sr = ri2py(savings_data)["sr"]
r.assign('sr', numpy2ri(sr))
pop15 = ri2py(savings_data)["pop15"]
r.assign('pop15', numpy2ri(pop15))
pop75 = ri2py(savings_data)["pop75"]
r.assign('pop75', numpy2ri(pop75))

formula = Formula('sr ~ pop75 + pop15 - 1')
fit = stats.lm(formula)
summary = stats.summary_lm(fit)
"""
list(summary.names) provides
['call', 'terms', 'residuals', 'coefficients', 'aliased', 'sigma', 'df', 'r.squared', 'adj.r.squared', 'fstatistic', 'cov.unscaled']
 """
r2 = summary[7][0]
ar2 = summary[8][0]
Ejemplo n.º 31
0
def ri2py_vector(obj):
    res = numpy2ri.ri2py(obj)
    return res
Ejemplo n.º 32
0
def ri2py_dataframe(obj):
    # use the numpy converter
    recarray = numpy2ri.ri2py(obj)
    res = PandasDataFrame.from_records(recarray)
    return res
Ejemplo n.º 33
0
def risk_assessment_plot(X_binarized_norm, y, ROC_models):
    """
    Function that generates risk assessment plot. Takes as the input matrix of metrics.
    :return: plot
    """

    # for recurrence problem
    lr_probabilities, rf_probabilities, mlp_probabilities, xgb_probabilities = me.best_models_ROC_curves(
        X_binarized_norm,
        y, ROC_models,
        False)

    # for recurrence problem
    output_list1 = me.risk_assessment_data(lr_probabilities, rf_probabilities, y)
    output_list2 = me.risk_assessment_data(mlp_probabilities, xgb_probabilities, y)

    import rpy2.robjects.numpy2ri as rpyn
    rpyn.activate()

    [xrf_sens, yrf_sens, xrf_spec, yrf_spec, xlr_sens, ylr_sens, xlr_spec, ylr_spec] = rpyn.ri2py(output_list1)
    [xmlp_sens, ymlp_sens, xmlp_spec, ymlp_spec, xxgb_sens, yxgb_sens, xxgb_spec, yxgb_spec] = rpyn.ri2py(
        output_list2)

    plt.figure()
    plt.rcParams["figure.figsize"] = (10, 6)

    # for recurrence problem
    plt.plot(xlr_sens, ylr_sens, color='b',
             label='Events LR',
             lw=2, alpha=.8)
    plt.plot(xlr_spec, ylr_spec, color='b', linestyle='--',
             label='Non-events LR',
             lw=2, alpha=.8)
    plt.plot(xrf_sens, yrf_sens, color='g',
             label='Events RF',
             lw=2, alpha=.8)
    plt.plot(xrf_spec, yrf_spec, color='g', linestyle='--',
             label=r'Non-events RF',
             lw=2, alpha=.8)
    plt.plot(xmlp_sens, ymlp_sens, color='k',
             label=r'Events MLP',
             lw=2, alpha=.8)
    plt.plot(xmlp_spec, ymlp_spec, color='k', linestyle='--',
             label=r'Non-events MLP',
             lw=2, alpha=.8)
    plt.plot(xxgb_sens, yxgb_sens, color='magenta',
             label=r'Events XGBoost',
             lw=2, alpha=.8)
    plt.plot(xxgb_spec, yxgb_spec, color='magenta', linestyle='--',
             label=r'Non-events XGBoost',
             lw=2, alpha=.8)

    plt.xlabel('Calculated risk')
    plt.ylabel('Sensitivity, 1-Specificity')
    plt.legend(loc="best")
    plt.show()
Ejemplo n.º 34
0
def ri2py_vector(obj):
    # use the numpy converter first
    res = numpy2ri.ri2py(obj)
    if isinstance(res, recarray):
        res = PandasDataFrame.from_records(res)
    return res
Ejemplo n.º 35
0
    def _exec_r_module(self):
        try:
            import rpy2.robjects
            from rpy2.robjects import numpy2ri
            from rpy2.robjects import pandas2ri
            from rpy2.robjects.packages import importr
        except ImportError:
            raise ImportError(
                'R module cannot be run, because '
                '"rpy2" package is not installed.'
            )
        module_name = os.path.splitext(os.path.basename(self.source_file))[0]
        logger.debug(
            'import module "%s" from source file: %s', self.source_file
        )
        logger.debug('source module: "%s"', self.source_file)
        rpy2.robjects.r('source("{0}")'.format(self.source_file))
        module = rpy2.robjects.r[module_name]
        version = module.get('VERSION')[0]
        if version != self.handles.version:
            raise PipelineRunError(
                'Version of source and handles is not the same.'
            )
        func = module.get('main')
        numpy2ri.activate()   # enables use of numpy arrays
        pandas2ri.activate()  # enable use of pandas data frames
        kwargs = self.keyword_arguments
        logger.debug(
            'evaluate main() function with INPUTS: "%s"',
            '", "'.join(kwargs.keys())
        )
        # R doesn't have unsigned integer types
        for k, v in kwargs.iteritems():
            if isinstance(v, np.ndarray):
                if v.dtype == np.uint16 or v.dtype == np.uint8:
                    logging.debug(
                        'module "%s" input argument "%s": '
                        'convert unsigned integer data type to integer',
                        self.name, k
                    )
                    kwargs[k] = v.astype(int)
            elif isinstance(v, pd.DataFrame):
                # TODO: We may have to translate pandas data frames explicitly
                # into the R equivalent.
                # pandas2ri.py2ri(v)
                kwargs[k] = v
        args = rpy2.robjects.ListVector({k: v for k, v in kwargs.iteritems()})
        base = importr('base')
        r_out = base.do_call(func, args)

        for handle in self.handles.output:
            # NOTE: R functions are supposed to return a list. Therefore
            # we can extract the output argument using rx2().
            # The R equivalent would be indexing the list with "[[]]".
            if isinstance(r_out.rx2(handle.name), rpy2.robjects.vectors.DataFrame):
                handle.value = pandas2ri.ri2py(r_out.rx2(handle.name))
                # handle.value = pd.DataFrame(r_out.rx2(handle.name))
            else:
                # NOTE: R doesn't have an unsigned integer data type.
                # So we cast to uint16.
                handle.value = numpy2ri.ri2py(r_out.rx2(handle.name)).astype(
                    np.uint16
                )
                # handle.value = np.array(r_out.rx2(handle.name), np.uint16)

        return self.handles.output
Ejemplo n.º 36
0
def ri2py_dataframe(obj):
    # use the numpy converter
    recarray = numpy2ri.ri2py(obj)
    res = PandasDataFrame.from_records(recarray)
    return res
Ejemplo n.º 37
0
def approx_qei(X, model, maxima, x_pending = None,
               num_sampled_points = 5,
               num_batches_eval = 400,
               strategy_batch_selection = 'random'):
    """
    Use Mickael Binois approximation to qEI function
    This is used to calculate qEI score for batches 
    
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features)]:
        Values where the acquisition function should be computed.

    * `model` [sklearn estimator that implements predict with ``return_std``]:
        The fit estimator that approximates the function through the
        method ``predict``.
        It should have a ``return_std`` parameter that returns the standard
        deviation.
        
    * `maxima`[float, default 0]:
        Previous minimum value which we would like to improve upon.
    
    * `num_sampled_points` [int, default 5]:
        Number of points to sample in parallel
        
    * `num_batches_eval`[int, default 400]
        Number of batches to evaluate
    
    * `strategy_batch_selection` [default 'random']:
        Strategy for selection of elements in batches
    
     Returns
    -------
    * `values`: [array-like, shape=(len(num_batches_eval),)]:
        qEI values for each batch
    """
    # Converting x_pending list to numpy array
    if x_pending is not None:  
        x_pending = np.array(x_pending)
    
    batches = []
    cc_vec = np.zeros(num_batches_eval)
    # Batch preparation
    for i in range(num_batches_eval):   
        if strategy_batch_selection == 'random':
            rel_ind = np.random.choice(X.shape[0], num_sampled_points, replace=False)
            b = X[rel_ind,:]
            if x_pending is not None:
                b = np.vstack([x_pending, b])
        else:
            raise ValueError ("No such sampling strategy exists ..")
        batches.append(b)
        mean, covar = model.predict(b, return_cov=True)
        #print ('covar')
        #print(np.isnan(covar).any())
        cc = qEI.qEI_approx(mean, covar, maxima)
        cc_num = rpyn.ri2py(cc)
        cc_vec[i] = cc_num
    #print(cc_vec)
    max_qEI_val = np.nanmax(cc_vec)
    print('max')
    print(max_qEI_val)
    max_qEI_val_ind = np.argmax(cc_vec)
    best_batch = batches[max_qEI_val_ind]
    
    return best_batch, batches, cc_vec, max_qEI_val
Ejemplo n.º 38
0
    ################################################################################
    ################################################################################
    # code with sample from R objects : it is easier if we want to compare directly in R

    print('')
    print('-' * 10)
    print('')
    rnorm = r['rnorm']
    set_seed = r['set.seed']

    ## generate regressor and dependent variable
    set_seed(0)
    X1 = rnorm(n_points)
    X2 = rnorm(n_points)
    err1 = rnorm(n_points)
    output = numpy2ri(ri2py(X1) + ri2py(X1) - ri2py(X2) + ri2py(err1) + 1)
    r.assign('X1', X1)
    r.assign('X2', X2)
    r.assign('output', output)

    ## perform Durbin-Watson test
    formula = Formula('output ~ X1 + X2')
    dw_test_1 = lmtest.dwtest(formula, alternative=hypothesis['R'], exact=True)
    print('Result from R :')
    print('p-value :', dw_test_1[3][0])
    print('Alternative hypothesis : ', dw_test_1[2][0])
    print('dw stat : ', dw_test_1[0][0])
    print('')

    # transformation into openturns objects
    firstSample = ot.Sample(np.column_stack((ri2py(X1), ri2py(X2))))