예제 #1
0
def logCountsWithFactors(counts, size_factors):
    """ Uses the R package scater to log a matrix of counts (genes as rows)
    and a vector of size factor using the method normalize().
    :param counts: a matrix of counts (genes as rows)
    :param size_factors: a vector of size factors
    :return the normalized log counts (genes as rows)
    """
    columns = counts.columns
    indexes = counts.index
    pandas2ri.activate()
    r_counts = pandas2ri.py2ri(counts)
    scater = RimportLibrary("scran")
    r_call = """
        function(counts, size_factors){
          sce = SingleCellExperiment(assays=list(counts=as.matrix(counts)))
          sizeFactors(sce) = size_factors
          sce = normalize(sce)
          norm_counts = logcounts(sce)
          return(as.data.frame(norm_counts))
        }
    """
    r_func = r(r_call)
    r_norm_counts = r_func(r_counts, size_factors)
    pandas_norm_counts = pandas2ri.ri2py(r_norm_counts)
    pandas_norm_counts.index = indexes
    pandas_norm_counts.columns = columns
    pandas2ri.deactivate()
    return pandas_norm_counts
 def testActivate(self):
     robjects.conversion.py2ri = robjects.default_py2ri
     self.assertNotEqual(rpyp.pandas2ri, robjects.conversion.py2ri)
     rpyp.activate()
     self.assertEqual(rpyp.pandas2ri, robjects.conversion.py2ri)
     rpyp.deactivate()
     self.assertEqual(robjects.default_py2ri, robjects.conversion.py2ri)
def deaScranDESeq2(counts, conds, comparisons, alpha, scran_clusters=False):
    """Makes a call to DESeq2 with SCRAN to
    perform D.E.A. in the given
    counts matrix with the given conditions and comparisons.
    Returns a list of DESeq2 results for each comparison
    """
    results = list()
    n_cells = len(counts.columns)
    try:
        pandas2ri.activate()
        deseq2 = RimportLibrary("DESeq2")
        scran = RimportLibrary("scran")
        multicore = RimportLibrary("BiocParallel")
        multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))
        as_matrix = r["as.matrix"]
        # Create the R conditions and counts data
        r_counts = pandas2ri.py2ri(counts)
        cond = robjects.StrVector(conds)
        r_call = """
            function(r_counts) {
                sce = SingleCellExperiment(assays=list(counts=r_counts))
                return(sce)
            }
        """
        r_func = r(r_call)
        sce = r_func(as_matrix(r_counts))
        if scran_clusters:
            r_clusters = scran.quickCluster(as_matrix(r_counts), max(n_cells/10, 10))
            min_cluster_size = min(Counter(r_clusters).values())
            sizes = list(set([round((min_cluster_size/2) / i) for i in [5,4,3,2,1]]))
            sce = scran.computeSumFactors(sce, clusters=r_clusters, sizes=sizes, positive=True)
        else:
            sizes = list(set([round((n_cells/2) * i) for i in [0.1,0.2,0.3,0.4,0.5]]))
            sce = scran.computeSumFactors(sce, sizes=sizes, positive=True)   
        sce = r.normalize(sce)
        dds = r.convertTo(sce, type="DESeq2")
        r_call = """
            function(dds, conditions){
                colData(dds)$conditions = as.factor(conditions)
                design(dds) = formula(~ conditions)
                return(dds)
            }
        """
        r_func = r(r_call)
        dds = r_func(dds, cond)
        dds = r.DESeq(dds)
        # Perform the comparisons and store results in list
        for A,B in comparisons:
            result = r.results(dds, contrast=r.c("conditions", A, B), alpha=alpha)
            result = r['as.data.frame'](result)
            genes = r['rownames'](result)
            result = pandas2ri.ri2py_dataframe(result)
            # There seems to be a problem parsing the rownames from R to pandas
            # so we do it manually
            result.index = genes
            results.append(result)
        pandas2ri.deactivate()
    except Exception as e:
        raise e
    return results
예제 #4
0
 def fit(self, dfx: pd.DataFrame, outcome_col, covariate_cols,
         teacher_id_col, **argv):
     covariate_cols_except_fixed = [
         x for x in covariate_cols if x not in self.fixed_effect_cols
     ]
     fixed_effect_cols_plus_tid = [teacher_id_col] + self.fixed_effect_cols
     dropna_subset_cols = [outcome_col
                           ] + covariate_cols + fixed_effect_cols_plus_tid
     formula = create_felm_formula(outcome_col, covariate_cols_except_fixed,
                                   fixed_effect_cols_plus_tid,
                                   self.factor_cols)
     pandas2ri.activate()
     df_use = dfx.dropna(subset=dropna_subset_cols)
     _res1 = self.r.assign("r_df", pandas2ri.py2rpy(df_use))
     _res2 = self.r(
         "res <- lfe::felm({formula}, r_df)".format(formula=formula))
     bb = self.r("lfe::getfe(res)")
     self.effect = bb
     self.residuals_without_fixed = pd.Series(index=dfx.index)
     self.residuals_without_fixed.loc[df_use.index, ] = self.r(
         "res$r.residuals")[:, 0]
     self.residuals_with_fixed = pd.Series(index=dfx.index)
     self.residuals_with_fixed.loc[df_use.index, ] = self.r(
         "res$residuals")[:, 0]
     pandas2ri.deactivate()
예제 #5
0
def computeMnnBatchCorrection(counts):
    """Computes batch correction to a list of batches (data frames)
    where each data frame represents a batch (animal for instance).
    The batch correction is computed using Scran::mnnCorrect()
    from Marioni et al.
    :param counts: a list of matrices of counts
    :return returns a list of batch corrected matrices of counts
    """
    pandas2ri.activate()
    as_matrix = r["as.matrix"]
    meta = [(x.index,x.columns) for x in counts]
    r_counts = [as_matrix(pandas2ri.py2ri(x)) for x in counts]
    RimportLibrary("scran")
    r_call = """
        function(counts) {
           norm_counts = do.call(mnnCorrect, c(counts, cos.norm.out=FALSE));
           return(lapply(norm_counts$corrected, as.data.frame))
        }
    """
    r_func = r(r_call)
    norm_counts = list()
    for i,x in enumerate(r_func(r_counts)):
        norm_c = pandas2ri.ri2py(x)
        norm_c.index = meta[i][0]
        norm_c.columns = meta[i][1]
        norm_counts.append(norm_c)
    pandas2ri.deactivate()
    return norm_counts
예제 #6
0
def slingshot(adata, start, n_pcs=5, cl=None):
    import numpy as np
    import pandas as pd
    import rpy2.robjects as ro
    from rpy2.robjects import numpy2ri, pandas2ri
    from rpy2.robjects.packages import importr
    importr('slingshot')
    numpy2ri.activate()
    pandas2ri.activate()
    ro.r.assign('pca', adata.obsm['X_pca'][:, :n_pcs])
    ro.r.assign('cl', adata.obs[cl])
    ro.reval('sds <- newSlingshotDataSet(pca, cl)')
    ro.reval(f'sce <- slingshot(sds, cl, start.clus="{start}")')
    pt = pd.DataFrame(np.asarray(ro.reval('slingPseudotime(sce)')),
                      index=adata.obs_names)
    pt.columns = [f'{cl}_lineage_{c}' for c in pt.columns]
    try:
        adata.obs = adata.obs.drop(pt.columns, axis=1)
    except KeyError:
        print('PT keys not dropped in obs dataframe: Not found.')
    adata.obs = pd.concat([adata.obs, pt], axis=1)
    adata.uns['slingshot'] = {}
    adata.uns['slingshot']['lineages'] = {}
    lineages = np.asarray(np.asarray(ro.reval('sce@lineages')))
    for i, l in enumerate(lineages):
        adata.uns['slingshot']['lineages'][i] = list(np.asarray(l))
    numpy2ri.deactivate()
    pandas2ri.deactivate()
    return adata
예제 #7
0
def Rtsne(counts,
          dimensions,
          theta=0.5,
          dims=50,
          perplexity=30,
          max_iter=1000):
    """Performs dimensionality reduction
    using the R package Rtsne"""
    pandas2ri.activate()
    r_counts = pandas2ri.py2ri(counts)
    tsne = RimportLibrary("Rtsne")
    multicore = RimportLibrary("BiocParallel")
    multicore.register(
        multicore.MulticoreParam(multiprocessing.cpu_count() - 1))
    as_matrix = r["as.matrix"]
    tsne_out = tsne.Rtsne(as_matrix(counts),
                          dims=dimensions,
                          theta=theta,
                          check_duplicates=False,
                          pca=True,
                          initial_dims=dims,
                          perplexity=perplexity,
                          max_iter=max_iter,
                          verbose=False)
    pandas_tsne_out = pandas2ri.ri2py(tsne_out.rx2('Y'))
    pandas2ri.deactivate()
    return pandas_tsne_out
예제 #8
0
def oneWay_rmAnova(DV, ID, IV):
    '''
    Parameters
     ----------
    DV : list/array
        Dependent variable as a singular list/array (will make a df-longside)
    ID : list/array
        Repeated measure: list/array of assigned identities
    IV : list/array
        The independent variable (condition) you are testing across as a list/array
    '''
    from rpy2.robjects import pandas2ri
    pandas2ri.activate()

    df = pd.DataFrame({'DV': DV, 'ID': ID, 'IV': IV})
    r_df = pandas2ri.py2ri(df)

    afex = importr('afex')
    model = afex.aov_ez('ID', 'DV', r_df, within='IV')
    print(R.summary(model))

    #    esm = importr("emmeans", on_conflict="warn")
    esm = importr("emmeans")

    pairwise = esm.lsmeans(model, "IV", contr="pairwise", adjust="holm")
    print(R.summary(pairwise))

    pandas2ri.deactivate()
    return R.summary(pairwise)
예제 #9
0
 def testSeries(self):
     Series = pandas.core.series.Series
     s = Series(numpy.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
     rpyp.activate()
     rp_s = robjects.conversion.py2ri(s)
     rpyp.deactivate()
     self.assertEqual(rinterface.FloatSexpVector, type(rp_s))
 def testSeries(self):
     Series = pandas.core.series.Series
     s = Series(numpy.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
     rpyp.activate()
     rp_s = robjects.conversion.py2ri(s)
     rpyp.deactivate()
     self.assertEqual(rinterface.FloatSexpVector, type(rp_s))
예제 #11
0
 def testActivate(self):
     robjects.conversion.py2ri = robjects.default_py2ri
     self.assertNotEqual(rpyp.pandas2ri, robjects.conversion.py2ri)
     rpyp.activate()
     self.assertEqual(rpyp.pandas2ri, robjects.conversion.py2ri)
     rpyp.deactivate()
     self.assertEqual(robjects.default_py2ri, robjects.conversion.py2ri)
예제 #12
0
def apply_transferFunction_metric(r_stream1, r_stream2, evalresp1, evalresp2):
    """"
    Invoke a named "correlation" R metric and convert the R dataframe result into
    a Pandas dataframe.
    :param r_stream1: an r_stream object
    :param r_stream2: an r_stream object
    :param metric_function_name: the name of the set of metrics
    :return:
    """
    R_function = robjects.r('IRISMustangMetrics::transferFunctionMetric')
    
    # NOTE:  Conversion of dataframes only works if you activate but we don't want conversion
    # NOTE:  to always be automatic so we deactivate() after we're done converting.
    pandas2ri.activate()
    r_evalresp1 = pandas2ri.py2ri_pandasdataframe(evalresp1)
    r_evalresp2 = pandas2ri.py2ri_pandasdataframe(evalresp2)
    pandas2ri.deactivate()
    
    # TODO:  Can we just activate/deactivate before/after R_function() without converting
    # TODO:  r_evalresp1/2 ahead of time?
    
    # Calculate the metric
    r_metriclist = R_function(r_stream1, r_stream2, r_evalresp1, r_evalresp2)
    r_dataframe = _R_metricList2DF(r_metriclist)
    pandas2ri.activate()
    df = pandas2ri.ri2py_dataframe(r_dataframe)
    pandas2ri.deactivate()
    
    # Convert columns from R POSIXct to pyton UTCDateTime
    df.starttime = df.starttime.apply(UTCDateTime)
    df.endtime = df.endtime.apply(UTCDateTime)
    return df
예제 #13
0
def logCountsWithFactors(counts, size_factors):
    """ Uses the R package scater to log a matrix of counts (genes as rows)
    and a vector of size factor using the method normalize().
    :param counts: a matrix of counts (genes as rows)
    :param size_factors: a vector of size factors
    :return the normalized log counts (genes as rows)
    """
    columns = counts.columns
    indexes = counts.index
    pandas2ri.activate()
    r_counts = pandas2ri.py2ri(counts)
    scater = RimportLibrary("scran")
    r_call = """
        function(counts, size_factors){
          sce = SingleCellExperiment(assays=list(counts=as.matrix(counts)))
          sizeFactors(sce) = size_factors
          sce = normalize(sce)
          norm_counts = logcounts(sce)
          return(as.data.frame(norm_counts))
        }
    """
    r_func = r(r_call)
    r_norm_counts = r_func(r_counts, size_factors)
    pandas_norm_counts = pandas2ri.ri2py(r_norm_counts)
    pandas_norm_counts.index = indexes
    pandas_norm_counts.columns = columns
    pandas2ri.deactivate()
    return pandas_norm_counts
예제 #14
0
def computeSumFactors(counts, scran_clusters=True):
    """ Compute normalization factors
    using the deconvolution method
    described in Merioni et al.
    Returns the computed size factors as a vector.
    :param counts: a matrix of counts (genes as rows)
    :return returns the normalization factors a vector
    """
    n_cells = len(counts.columns)
    pandas2ri.activate()
    r_counts = pandas2ri.py2ri(counts)
    scran = RimportLibrary("scran")
    multicore = RimportLibrary("BiocParallel")
    multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))
    as_matrix = r["as.matrix"]
    if scran_clusters:
        r_clusters = scran.quickCluster(as_matrix(r_counts), max(n_cells/10, 10))
        min_cluster_size = min(Counter(r_clusters).values())
        sizes = list(set([round((min_cluster_size/2) / i) for i in [5,4,3,2,1]]))
        dds = scran.computeSumFactors(as_matrix(r_counts), 
                                      clusters=r_clusters, sizes=sizes, positive=True)
    else:
        sizes = list(set([round((n_cells/2) * i) for i in [0.1,0.2,0.3,0.4,0.5]]))
        dds = scran.computeSumFactors(as_matrix(r_counts), sizes=sizes, positive=True)        
    pandas_sf = pandas2ri.ri2py(dds)
    pandas2ri.deactivate()
    return pandas_sf
예제 #15
0
    def compute_pairwise_information(cls, data, method, kwargs=None):
        logger = get_logger(__name__)
        if method == 'mutual-information':
            minet = importr('minet')
            pandas2ri.activate()
            if kwargs is None:
                kwargs = {}

            estimator = kwargs.pop('estimator', 'mi.shrink')
            disc = kwargs.pop('disc', 'equalwidth')
            nbins = kwargs.pop('nbins', np.sqrt(len(data.columns)))

            logger.debug('Running minet.build_mim(estimator={!r}, '
                         'disc={!r}, nbins={!r})'.format(
                             estimator, disc, nbins))
            r_info = minet.build_mim(data.T,
                                     estimator=estimator,
                                     disc=disc,
                                     nbins=nbins,
                                     **kwargs)
            info = np.asarray(r_info)

            del r_info, minet
            gc.collect()
            pandas2ri.deactivate()
        else:
            raise ValueError(
                'Unsupported information method: {!r}'.format(method))

        info = pd.DataFrame(info, index=data.index, columns=data.index)

        return info
    def generate_solutions_tables(self):
        ''' code from Adam use rpy2 to execute rcode which reads out a solutions file to pandas '''
        col_names = [
            'alpha', 'tau', 'AT', 'b', 'delta', 'LL', 'mode_curv',
            'genome mass', 'sigma.h.hat', 'theta.z.hat', 'sigma.A.hat',
            'theta.Q.hat', 'lambda.hat', 'theta.0', 'frac.het', 'SCNA_LL',
            'entropy', 'Kar_LL', 'WGD', 'combined_LL', 'SSNV_LL',
            'SCNA_Theta_integral', 'dens'
        ]

        # Build R function to be used as a python package
        load_RData_func_str = """
                       load_RData <- function(file_path) {
                          load(file_path)
                          head_name <- ls()[1]
                          file_name <- names(`segobj.list`)[1]
                          r_data <- `segobj.list`[[file_name]]$mode.res$mode.tab
                          return(r_data)
                      }
                      """
        # Pack the function above as a package
        r_pack = SignatureTranslatedAnonymousPackage(load_RData_func_str,
                                                     "r_pack")
        print 'Generating absolute tables for ' + str(len(
            self.data_table)) + ' samples'
        pandas2ri.activate()
        for index, row in self.data_table.iterrows():
            if np.mod(index, 100) == 0:
                print str(index) + '/' + str(len(self.data_table))
            r_data = r_pack.load_RData(row['absolute_summary_data'])
            abs_table = pd.DataFrame(pandas2ri.ri2py(r_data),
                                     columns=col_names)
            self.pp_modes_tables[row['pair_id']] = abs_table
        pandas2ri.deactivate()
예제 #17
0
def computeSumFactors(counts, scran_clusters=True):
    """ Compute normalization factors
    using the deconvolution method
    described in Marioni et al.
    Returns the computed size factors as a vector.
    :param counts: a matrix of counts (genes as rows)
    :return returns the normalization factors a vector
    """
    n_cells = len(counts.columns)
    pandas2ri.activate()
    r_counts = pandas2ri.py2ri(counts)
    scran = RimportLibrary("scran")
    as_matrix = r["as.matrix"]
    if scran_clusters and n_cells >= 50:
        r_clusters = scran.quickCluster(as_matrix(r_counts),
                                        min(n_cells/10, 10),
                                        method="igraph")
        min_cluster_size = min(Counter(r_clusters).values())
        sizes = list(range(min(int(min_cluster_size/4), 10), 
                           min(int(min_cluster_size/2), 50), 5))
        dds = scran.computeSumFactors(as_matrix(r_counts), 
                                      clusters=r_clusters, sizes=sizes)
    else:
        sizes = list(range(min(int(n_cells/4), 10), 
                           min(int(n_cells/2), 50), 5))
        dds = scran.computeSumFactors(as_matrix(r_counts), sizes=sizes)        
    pandas_sf = pandas2ri.ri2py(dds)
    pandas2ri.deactivate()
    return pandas_sf
예제 #18
0
def apply_PSD_plot(r_stream, filepath, evalresp=None):
    """"
    Create a PSD plot which will be written to a .png file
    opened 'png' file.
    :param r_stream: an r_stream object
    :param filepath: file path for png output
    :param evalresp: (optional) pandas dataframe of FAP from evalresp (freq,amp,phase)
    :return:
    """
    result = robjects.r('grDevices::png')(filepath)
    r_psdList = robjects.r('IRISSeismic::psdList')(r_stream)

    if len(r_psdList) == 0:
        raise Exception("No PSDs returned")

    pandas2ri.activate()

    # convert pandas df to R df as parameter automatically
    if evalresp is not None:
        r_evalresp = pandas2ri.py2ri(evalresp)  # convert to R dataframe
        result = robjects.r('IRISSeismic::psdPlot')(r_psdList,
                                                    style='pdf',
                                                    evalresp=r_evalresp)
    else:
        result = robjects.r('IRISSeismic::psdPlot')(r_psdList, style='pdf')

    pandas2ri.deactivate()

    result = robjects.r('grDevices::dev.off')()

    return True
예제 #19
0
def convert_rdata_to_dataframe ( filename ) :
    #
    from rpy2.robjects import r as R
    from rpy2.robjects.packages import importr
    from rpy2.robjects import pandas2ri
    from rpy2.robjects.conversion import localconverter
    import rpy2.robjects as ro
    #
    print ( 'WARNING THIS PROGRAM NEED VALUE ERROR CHECKING' )
    rd_ = R.load( filename )
    if 'matrix' in str( type( R[rd_[0]] ) ).lower() :
        column_names = [ R[rd_[0]].colnames ]
        index_names  = [ R[rd_[0]].rownames ]
    else :
        column_names = [ [r for r in _rd_.colnames] for _rd_ in R[rd_[0]]]
        index_names  = [ [r for r in _rd_.rownames] for _rd_ in R[rd_[0]]]
    #
    pandas2ri.activate()
    #
    # SMALL HELPER FUNCTION THAT TRANSFORMS A RDATA OBJECT INTO
    # A PANDAS DATAFRAME. CURRENTLY THERE IS NO VALUE ERROR CHECKING
    #
    rd = R.load( filename )
    raw_df_l = []
    if 'ndarray' in str( type( R[rd[0]] ) ).lower() :
        [ raw_df_l.append( R[rd[0]] ) ]
    else :
        [ raw_df_l.append( rdf ) for rdf in ro.vectors.DataFrame(R[rd[0]]) ]
    full_df_dict = {} ; i_ = 0
    for raw_df,colnames,rownames in zip( raw_df_l,column_names,index_names ) :
        pdf = pd.DataFrame( raw_df , columns=colnames , index=rownames )
        full_df_dict[i_] = pdf
        i_ = i_ + 1
    pandas2ri.deactivate()
    return ( full_df_dict )
예제 #20
0
def apply_transferFunction_metric(r_stream1, r_stream2, evalresp1, evalresp2):
    """"
    Invoke a named "correlation" R metric and convert the R dataframe result into
    a Pandas dataframe.
    :param r_stream1: an r_stream object
    :param r_stream2: an r_stream object
    :param evalresp1: pandas DataFrame of evalresp FAP for r_stream1
    :param evalresp2: pandas DataFrame of evalresp FAP for r_stream2
    :return:
    """
    R_function = robjects.r('IRISMustangMetrics::transferFunctionMetric')

    # NOTE:  Conversion of dataframes only works if you activate but we don't want conversion
    # NOTE:  to always be automatic so we deactivate() after we're done converting.
    pandas2ri.activate()
    r_evalresp1 = pandas2ri.py2ri_pandasdataframe(evalresp1)
    r_evalresp2 = pandas2ri.py2ri_pandasdataframe(evalresp2)
    pandas2ri.deactivate()

    # TODO:  Can we just activate/deactivate before/after R_function() without converting
    # TODO:  r_evalresp1/2 ahead of time?

    # Calculate the metric
    r_metriclist = R_function(r_stream1, r_stream2, r_evalresp1, r_evalresp2)
    r_dataframe = _R_metricList2DF(r_metriclist)
    pandas2ri.activate()
    df = pandas2ri.ri2py_dataframe(r_dataframe)
    pandas2ri.deactivate()

    # Convert columns from R POSIXct to pyton UTCDateTime
    df.starttime = df.starttime.apply(UTCDateTime)
    df.endtime = df.endtime.apply(UTCDateTime)
    return df
    def fit_and_predict(self, train, horizon):
        r_string = """
            function(data, frequency, horizon){
                library(forecast)

                if(length(frequency) == 1){
                    ts_data <- ts(data, frequency=frequency)
                }else{
                    ts_data <- msts(data, seasonal.periods=frequency)
                }

                fit <- tbats(ts_data)
                fitted_df <- data.frame(fit$fitted.values)

                forecast <- forecast(fit, h = horizon)
                forecast_df <- data.frame(forecast)

                output <- list(fitted_df, forecast_df)
                return(output)
            }
        """

        r_func = robjects.r(r_string)

        pandas2ri.activate()
        output_list = r_func(train, robjects.IntVector(self.frequency),
                             horizon)
        fit = pandas2ri.ri2py(output_list[0])
        forecast = pandas2ri.ri2py(output_list[1])
        pandas2ri.deactivate()

        return fit, forecast
    def fit_and_predict(self, train, horizon):
        r_string = """
            function(data, frequency, horizon){
                library(forecast)
                ts_data <- ts(data, frequency=frequency)

                fit <- HoltWinters(ts_data)
                fitted_df <- data.frame(fit$fitted)

                forecast <- forecast(fit, h = horizon)
                forecast_df <- data.frame(forecast)

                output <- list(fitted_df, forecast_df)
                return(output)
            }
        """

        r_func = robjects.r(r_string)

        # Run R
        pandas2ri.activate()
        output_list = r_func(train, self.frequency, horizon)
        fit = pandas2ri.ri2py(output_list[0])
        forecast = pandas2ri.ri2py(output_list[1])
        pandas2ri.deactivate()

        return fit, forecast
예제 #23
0
파일: ml.py 프로젝트: pombredanne/gramex
def r(code=None, path=None, rel=True, conda=True, convert=True,
      repo='https://cran.microsoft.com/', **kwargs):
    '''
    Runs the R script and returns the result.

    :arg str code: R code to execute.
    :arg str path: R script path. Cannot be used if code is specified
    :arg bool rel: True treats path as relative to the caller function's file
    :arg bool conda: True overrides R_HOME to use the Conda R
    :arg bool convert: True converts R objects to Pandas and vice versa
    :arg str repo: CRAN repo URL

    All other keyword arguments as passed as parameters
    '''
    # Use Conda R if possible
    if conda:
        r_home = _conda_r_home()
        if r_home:
            os.environ['R_HOME'] = r_home

    # Import the global R session
    try:
        from rpy2.robjects import r, pandas2ri, globalenv
    except ImportError:
        app_log.error('rpy2 not installed. Run "conda install rpy2"')
        raise
    except RuntimeError:
        app_log.error('Cannot find R. Set R_HOME env variable')
        raise

    # Set a repo so that install.packages() need not ask for one
    r('local({r <- getOption("repos"); r["CRAN"] <- "%s"; options(repos = r)})' % repo)

    # Activate or de-activate automatic conversion
    # https://pandas.pydata.org/pandas-docs/version/0.22.0/r_interface.html
    if convert:
        pandas2ri.activate()
    else:
        pandas2ri.deactivate()

    # Pass all other kwargs as global environment variables
    for key, val in kwargs.items():
        globalenv[key] = val

    if code and path:
        raise RuntimeError('Use r(code=) or r(path=...), not both')
    if path:
        # if rel=True, load path relative to parent directory
        if rel:
            stack = inspect.getouterframes(inspect.currentframe(), 2)
            folder = os.path.dirname(os.path.abspath(stack[1][1]))
            path = os.path.join(folder, path)
        result = r.source(path, chdir=True)
        # source() returns a withVisible: $value and $visible. Use only the first
        result = result[0]
    else:
        result = r(code)

    return result
예제 #24
0
파일: conv.py 프로젝트: ivirshup/anndata2ri
def full_converter() -> conversion.Converter:
    pandas2ri.activate()
    new_converter = conversion.Converter("anndata conversion", template=conversion.converter)
    pandas2ri.deactivate()

    overlay_converter(converter, new_converter)

    return new_converter
예제 #25
0
    def fit(
        self,
        x: Optional[np.ndarray] = None,
        y: Optional[np.ndarray] = None,
        w: Optional[np.ndarray] = None,
        **kwargs,
    ) -> "GamMGCVModel":
        """
        Fit the model.

        Params
        ------
        x
            Independent variables.
        y
            Dependent variables.
        w
            Weights of :paramref:`x`.
        kwargs
            Keyword arguments.

        Returns
        -------
        :class:`cellrank.ul.models.GamMGCVModel`
            Return fitted self.
        """

        from rpy2 import robjects
        from rpy2.robjects import pandas2ri, Formula
        from rpy2.robjects.packages import importr

        super().fit(x, y, w, **kwargs)

        use_ixs = np.where(self.w > 0)[0]
        self._x = self.x[use_ixs]
        self._y = self.y[use_ixs]
        self._w = self.w[use_ixs]

        n_splines = kwargs.pop("n_splines", self._n_splines)

        mgcv = importr("mgcv")
        pandas2ri.activate()

        df = pandas2ri.py2rpy(
            pd.DataFrame(np.c_[self.x, self.y][use_ixs, :], columns=["x",
                                                                     "y"]))
        self._model = mgcv.gam(
            Formula(f'y ~ s(x, k={n_splines}, bs="cr")'),
            data=df,
            sp=self._sp,
            family=robjects.r.gaussian,
            weights=pd.Series(self.w[use_ixs]),
        )

        pandas2ri.deactivate()

        return self
예제 #26
0
 def testActivate(self):
     #FIXME: is the following still making sense ?
     self.assertNotEqual(rpyp.py2ri, robjects.conversion.py2ri)
     l = len(robjects.conversion.py2ri.registry)
     k = set(robjects.conversion.py2ri.registry.keys())
     rpyp.activate()
     self.assertTrue(len(conversion.py2ri.registry) > l)
     rpyp.deactivate()
     self.assertEqual(l, len(conversion.py2ri.registry))
     self.assertEqual(k, set(conversion.py2ri.registry.keys()))
 def testRi2pandas(self):
     rdataf = robjects.r('data.frame(a=1:2, b=I(c("a", "b")), c=c("a", "b"))')
     rpyp.activate()
     pandas_df = robjects.conversion.ri2py(rdataf)
     rpyp.deactivate()
     self.assertIsInstance(pandas_df, pandas.DataFrame)
     self.assertEquals(('a', 'b', 'c'), tuple(pandas_df.keys()))
     self.assertEquals(pandas_df['a'].dtype, numpy.dtype('int32'))
     self.assertEquals(pandas_df['b'].dtype, numpy.dtype('O'))
     self.assertEquals(pandas_df['c'].dtype, numpy.dtype('O'))
 def testSeries_issue264(self):
     Series = pandas.core.series.Series
     s = Series(('a', 'b', 'c', 'd', 'e'),
                index=pandas.Int64Index([0,1,2,3,4]))
     rpyp.activate()
     rp_s = robjects.conversion.py2ri(s)
     rpyp.deactivate()
     # segfault before the fix
     str(rp_s)
     self.assertEqual(rinterface.ListSexpVector, type(rp_s))
예제 #29
0
def convertRtoPandas(file_path):
    # Pack the function above as a package
    r_pack = SignatureTranslatedAnonymousPackage(load_RData_func_str, "r_pack")

    pandas2ri.activate()
    r_data = r_pack.load_RData(file_path)
    py_data = pd.DataFrame(pandas2ri.ri2py(r_data), columns=col_names)
    pandas2ri.deactivate()

    return py_data
예제 #30
0
파일: conv.py 프로젝트: theislab/anndata2ri
def full_converter() -> conversion.Converter:
    pandas2ri.activate()
    new_converter = conversion.Converter("anndata conversion", template=conversion.converter)
    pandas2ri.deactivate()

    overlay_converter(scipy2ri.converter, new_converter)
    # overwrite the scipy2ri Sexp4 converter and add our others
    overlay_converter(converter, new_converter)

    return new_converter
예제 #31
0
def rdf_to_pydf(x):
    """Convert an R dataframe to a python dataframe"""
    '''
    The converter is activated and then deactivated. There have been some reports of inconsistencies if the
    converter is activated during import
    '''
    pandas2ri.activate()
    df = pandas2ri.ri2py(x)
    pandas2ri.deactivate()
    return df
예제 #32
0
 def testSeries_issue264(self):
     Series = pandas.core.series.Series
     s = Series(('a', 'b', 'c', 'd', 'e'),
                index=pandas.Int64Index([0,1,2,3,4]))
     rpyp.activate()
     rp_s = robjects.conversion.py2ri(s)
     rpyp.deactivate()
     # segfault before the fix
     str(rp_s)
     self.assertEqual(rinterface.ListSexpVector, type(rp_s))
예제 #33
0
 def testActivate(self):
     #FIXME: is the following still making sense ?
     self.assertNotEqual(rpyp.py2ri, robjects.conversion.py2ri)
     l = len(robjects.conversion.py2ri.registry)
     k = set(robjects.conversion.py2ri.registry.keys())
     rpyp.activate()
     self.assertTrue(len(conversion.py2ri.registry) > l)
     rpyp.deactivate()
     self.assertEqual(l, len(conversion.py2ri.registry))
     self.assertEqual(k, set(conversion.py2ri.registry.keys()))
예제 #34
0
 def testRi2pandas(self):
     rdataf = robjects.r('data.frame(a=1:2, b=I(c("a", "b")), c=c("a", "b"))')
     rpyp.activate()
     pandas_df = robjects.conversion.ri2py(rdataf)
     rpyp.deactivate()
     self.assertIsInstance(pandas_df, pandas.DataFrame)
     self.assertEquals(('a', 'b', 'c'), tuple(pandas_df.keys()))
     self.assertEquals(pandas_df['a'].dtype, numpy.dtype('int32'))
     self.assertEquals(pandas_df['b'].dtype, numpy.dtype('O'))
     self.assertEquals(pandas_df['c'].dtype, numpy.dtype('O'))
예제 #35
0
 def testActivate(self):
     #FIXME: is the following still making sense ?
     assert rpyp.py2rpy != robjects.conversion.py2rpy
     l = len(robjects.conversion.py2rpy.registry)
     k = set(robjects.conversion.py2rpy.registry.keys())
     rpyp.activate()
     assert len(conversion.py2rpy.registry) > l
     rpyp.deactivate()
     assert len(conversion.py2rpy.registry) == l
     assert set(conversion.py2rpy.registry.keys()) == k
예제 #36
0
    def fit(self, X, y, **kwargs):
        # Do one-hot encoding
        self.cat_columns = X.columns[X.dtypes == object].values.tolist()
        X = pd.get_dummies(X)

        import rpy2.robjects as ro
        from rpy2.robjects import pandas2ri, Formula

        assert not self.is_fit(), 'Call fit() twice!'

        if self.clean_feature_names is None:  # R can not accept wierd symbols as names
            self.clean_feature_names = []
            for name in self.feature_names:
                the_name = name.replace('-', '_').replace(' ', '_').replace('$', '').replace('/', '_')\
                    .replace('>', '_big_').replace('(', '_lq_').replace(')', '_rq_').replace('?', '_ques_')\
                    .replace('.', '_dot_').replace('&', '_and_')
                if the_name.startswith('_'):
                    the_name = 's_%s' % (the_name[1:])

                self.clean_feature_names.append(the_name)

        # Create the fitting string e.g. 'y~s(age)+s(BUN_level)+gender'
        formula_terms = []
        for feat_name, clean_feat_name in zip(self.feature_names,
                                              self.clean_feature_names):
            num_unique_x = len(self.X_values_counts[feat_name])
            if num_unique_x < 2:
                continue

            term_str = "%s" % clean_feat_name if num_unique_x == 2 \
                else "s(%s, bs='cr', k=%d)" % (clean_feat_name, min(self.maxk, int(num_unique_x*2/3)))

            formula_terms.append(term_str)

        formula_str = 'y~%s' % ('+'.join(formula_terms))
        print('formula_str:', formula_str)
        formula = Formula(formula_str)

        pandas2ri.activate()

        env = formula.environment
        env['y'] = y
        for feat_name, clean_feat_name in zip(self.feature_names,
                                              self.clean_feature_names):
            env[clean_feat_name] = X[feat_name]

        # with Timer('Fitting the R mgcv model'):
        self.R_model = ro.r[self.model_to_use](formula,
                                               family=self.family,
                                               nthreads=self.nthreads,
                                               discrete=self.discrete,
                                               select=self.select)

        pandas2ri.deactivate()
예제 #37
0
def computeNClusters(counts, min_size=20):
    """Computes the number of clusters
    from the data using Scran::quickCluster"""
    pandas2ri.activate()
    r_counts = pandas2ri.py2ri(counts.transpose())
    scran = RimportLibrary("scran")
    as_matrix = r["as.matrix"]
    clusters = scran.quickCluster(as_matrix(r_counts), min_size, method="igraph")
    n_clust = len(set(clusters))
    pandas2ri.deactivate()
    return n_clust
예제 #38
0
파일: data.py 프로젝트: charliemarx/pmtools
def _save_data_as_rdata(file_name, data, cvindices):

    import rpy2.robjects as rn
    from .rpy2_helper import r_assign, r_save_to_disk
    from rpy2.robjects import pandas2ri
    data = set_defaults_for_data(data)
    assert check_data(data)

    fields_to_save = [
        "format", "Y", "sample_weights", "outcome_name", "variable_names"
    ]

    try:

        for k in fields_to_save:
            r_assign(data[k], k)

    except:

        from eqm.debug import ipsh
        ipsh()

    r_assign(cvindices, "cvindices")

    pandas2ri.activate()

    X_df = pd.DataFrame(data=data['X'])
    X_df.columns = data['variable_names']
    rn.r.assign('X', X_df)

    # test set
    has_test_set = ('X_test' in data) and ('Y_test'
                                           in data) and ('sample_weights_test'
                                                         in data)
    if has_test_set:
        X_test_df = pd.DataFrame(data=data['X_test'])
        X_test_df.columns = data['variable_names']
        rn.r.assign('X_test', pandas2ri.py2ri(X_test_df))
        r_assign(data['Y_test'], 'Y_test')
        r_assign(data['sample_weights_test'], 'sample_weights_test')
    else:
        rn.reval("""
                X_test = matrix(data=NA, nrow = 0, ncol = ncol(X));
                Y_test = matrix(data=NA, nrow = 0, ncol = 1);
                sample_weights_test = matrix(data=1.0, nrow = 0, ncol = 1);
                """)

    pandas2ri.deactivate()
    variables_to_save = fields_to_save + [
        "cvindices", "X", "X_test", "Y_test", "sample_weights_test"
    ]
    r_save_to_disk(file_name, variables_to_save)
    return True
예제 #39
0
 def testRi2pandas_issue207(self):
     d = robjects.DataFrame({'x': 1})
     rpyp.activate()
     try:
         ok = True
         robjects.globalenv['d'] = d
     except ValueError:
         ok = False
     finally:
         rpyp.deactivate()
         if 'd' in robjects.globalenv:
             del(robjects.globalenv['d'])
     self.assertTrue(ok)
 def testRi2pandas_issue207(self):
     d = robjects.DataFrame({'x': 1})
     rpyp.activate()
     try:
         ok = True
         robjects.globalenv['d'] = d
     except ValueError:
         ok = False
     finally:
         rpyp.deactivate()
         if 'd' in robjects.globalenv:
             del(robjects.globalenv['d'])
     self.assertTrue(ok)
def computeNClusters(counts, min_size=20):
    """Computes the number of clusters
    from the data using Scran::quickCluster"""
    pandas2ri.activate()
    r_counts = pandas2ri.py2ri(counts.transpose())
    scran = RimportLibrary("scran")
    multicore = RimportLibrary("BiocParallel")
    multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))  
    as_matrix = r["as.matrix"]
    clusters = scran.quickCluster(as_matrix(r_counts), min_size)
    n_clust = len(set(clusters))
    pandas2ri.deactivate()
    return n_clust
예제 #42
0
def opt_imp(
    data,
    methods,
    n_iter=10,
    freq=1440,
    measures={
        'SMAE': pf.smae,
        'RMSE': pf.rmse,
        'SRMSE': pf.srmse,
        'SMAPE': pf.smape,
        'MASE': partial(pf.mase, shift=60 * 24 * 7)
    }):
    dist = out_dist(data)  # get the distribution of outage lengths
    data_lno = lno(data)  # get the longest no outage (LNO)
    ts = ro.r.ts  # R time series object
    pandas2ri.activate()  # activate connection
    results = []  # initialize empty list for results
    for i in range(
            n_iter
    ):  # repeat multiple times becaouse of random nature of outage additions
        data_out = add_out(data=data_lno, dist=dist)  # add outages
        data_out_ts = ts(ro.FloatVector(data_out.values), frequency=freq
                         )  # construct time series object & estimate frequency
        result = pd.DataFrame()  # empty dataframe for scores
        for method in methods:  # for each method under consideration
            name = method['name']  # get name
            alg = method['alg']  # get algorithm
            opt = method['opt']  # get options
            for kwargs in dp.dol2lod(opt):  # for all combinations of kwargs
                print(str(i) + ':', kwargs)  # progress update
                data_imp = pd.Series(
                    index=data_out.index,
                    data=np.reshape(pandas2ri.ri2py(alg(data_out_ts,
                                                        **kwargs)),
                                    newshape=data_out.shape,
                                    order='C')
                )  # get results of imputation from R & construct DataFrame using original index and columns
                #data_imp=imp(data=data_out,alg=alg,**kwargs) # impute data with said methods
                label = ','.join([name] + [
                    str(key) + ':' + str(kwargs[key]) for key in sorted(kwargs)
                ])  # build entry label from sorted keys
                pfm = pf.ev(pred=data_imp,
                            true=data_lno,
                            label=label,
                            measures=measures)  # evaluate performance
                result = pd.concat([result,
                                    pfm])  # append computed performance
        result.index.name = 'method'  # name index column
        results.append(result)  # add to results
    pandas2ri.deactivate()  # deactivate connection
    return sum(results) / n_iter
예제 #43
0
def computeNClusters(counts, min_size=20):
    """Computes the number of clusters
    from the data using Scran::quickCluster"""
    pandas2ri.activate()
    r_counts = pandas2ri.py2ri(counts.transpose())
    scran = RimportLibrary("scran")
    multicore = RimportLibrary("BiocParallel")
    multicore.register(
        multicore.MulticoreParam(multiprocessing.cpu_count() - 1))
    as_matrix = r["as.matrix"]
    clusters = scran.quickCluster(as_matrix(r_counts), min_size)
    n_clust = len(set(clusters))
    pandas2ri.deactivate()
    return n_clust
예제 #44
0
def computeSizeFactors(counts):
    """ Computes size factors using DESeq
    for the counts matrix given as input (Genes as rows
    and spots as columns).
    Returns the computed size factors as a vector.
    :param counts: a matrix of counts (genes as rows)
    :return returns the normalization factors a vector
    """
    pandas2ri.activate()
    r_counts = pandas2ri.py2ri(counts)
    deseq2 = RimportLibrary("DESeq2")
    dds = deseq2.estimateSizeFactorsForMatrix(r_counts)
    pandas_sf = pandas2ri.ri2py(dds)
    pandas2ri.deactivate()
    return pandas_sf
예제 #45
0
def computeRLEFactors(counts):
    """ Compute normalization size factors
    using the RLE method described in EdgeR and returns then as a vector.
    :param counts: a matrix of counts (genes as rows)
    :return returns the normalization factors a vector
    """
    pandas2ri.activate()
    r_counts = pandas2ri.py2ri(counts)
    edger = RimportLibrary("edgeR")
    as_matrix = r["as.matrix"]
    dds = edger.calcNormFactors(as_matrix(r_counts), method="RLE")
    pandas_sf = pandas2ri.ri2py(dds)
    pandas_cm = pandas2ri.ri2py(r.colSums(counts))
    pandas2ri.deactivate()
    return pandas_sf * pandas_cm
 def testRepr(self):
     # this should go to testVector, with other tests for repr()
     l = (('b', numpy.array([True, False, True], dtype=numpy.bool_)),
          ('i', numpy.array([1, 2, 3], dtype="i")),
          ('f', numpy.array([1, 2, 3], dtype="f")),
          ('s', numpy.array(["a", "b", "c"], dtype="S")),
          ('u', numpy.array([u"a", u"b", u"c"], dtype="U")))
     od = OrderedDict(l)
     pd_df = pandas.core.frame.DataFrame(od)
     rpyp.activate()
     rp_df = robjects.conversion.py2ri(pd_df)
     rpyp.deactivate()
     s = repr(rp_df) # used to fail with a TypeError
     s = s.split('\n')
     self.assertEqual('[Array, Array, Array, FactorV..., FactorV...]', s[1].strip())
 def testDataFrame(self):
     l = (('b', numpy.array([True, False, True], dtype=numpy.bool_)),
          ('i', numpy.array([1, 2, 3], dtype="i")),
          ('f', numpy.array([1, 2, 3], dtype="f")),
          ('s', numpy.array(["a", "b", "c"], dtype="S")),
          ('u', numpy.array([u"a", u"b", u"c"], dtype="U")),
          ('dates', [datetime(2012, 5, 2), 
                     datetime(2012, 6, 3), 
                     datetime(2012, 7, 1)]))
     od = OrderedDict(l)
     pd_df = pandas.core.frame.DataFrame(od)
     rpyp.activate()
     rp_df = robjects.conversion.py2ri(pd_df)
     rpyp.deactivate()
     self.assertEqual(pd_df.shape[0], rp_df.nrow)
     self.assertEqual(pd_df.shape[1], rp_df.ncol)
예제 #48
0
def deaDESeq2(counts, conds, comparisons, alpha, size_factors=None):
    """Makes a call to DESeq2 to
    perform D.E.A. in the given
    counts matrix with the given conditions and comparisons.
    Can be given size factors. 
    Returns a list of DESeq2 results for each comparison
    """
    results = list()
    try:
        pandas2ri.activate()
        deseq2 = RimportLibrary("DESeq2")
        multicore = RimportLibrary("BiocParallel")
        multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))
        # Create the R conditions and counts data
        r_counts = pandas2ri.py2ri(counts)
        cond = robjects.DataFrame({"conditions": robjects.StrVector(conds)})
        design = r('formula(~ conditions)')
        dds = r.DESeqDataSetFromMatrix(countData=r_counts, colData=cond, design=design)
        if size_factors is None:
            dds = r.DESeq(dds, parallel=True, useT=True, 
                          minmu=1e-6, minReplicatesForReplace=np.inf)
        else:
            assign_sf = r["sizeFactors<-"]
            dds = assign_sf(object=dds, value=robjects.FloatVector(size_factors))
            dds = r.estimateDispersions(dds)
            dds = r.nbinomWaldTest(dds)
        # Perform the comparisons and store results in list
        for A,B in comparisons:
            result = r.results(dds, contrast=r.c("conditions", A, B), 
                               alpha=alpha, parallel=True)
            result = r['as.data.frame'](result)
            genes = r['rownames'](result)
            result = pandas2ri.ri2py_dataframe(result)
            # There seems to be a problem parsing the rownames from R to pandas
            # so we do it manually
            result.index = genes
            results.append(result)
        pandas2ri.deactivate()
    except Exception as e:
        raise e
    return results
예제 #49
0
def Rtsne(counts, dimensions, theta=0.5, dims=50, perplexity=30, max_iter=1000):
    """Performs dimensionality reduction
    using the R package Rtsne"""
    pandas2ri.activate()
    r_counts = pandas2ri.py2ri(counts)
    tsne = RimportLibrary("Rtsne")
    multicore = RimportLibrary("BiocParallel")
    multicore.register(multicore.MulticoreParam(multiprocessing.cpu_count()-1))
    as_matrix = r["as.matrix"]
    tsne_out = tsne.Rtsne(as_matrix(counts), 
                          dims=dimensions, 
                          theta=theta, 
                          check_duplicates=False, 
                          pca=True, 
                          initial_dims=dims, 
                          perplexity=perplexity, 
                          max_iter=max_iter, 
                          verbose=False)
    pandas_tsne_out = pandas2ri.ri2py(tsne_out.rx2('Y'))
    pandas2ri.deactivate()
    return pandas_tsne_out
예제 #50
0
def apply_correlation_metric(r_stream1, r_stream2, metric_function_name, *args, **kwargs):
    """"
    Invoke a named "correlation" R metric and convert the R dataframe result into
    a Pandas dataframe.
    :param r_stream1: an r_stream object
    :param r_stream2: an r_stream object
    :param metric_function_name: the name of the set of metrics
    :return:
    """
    function = 'IRISMustangMetrics::' + metric_function_name + 'Metric'
    R_function = robjects.r(function)
    pandas2ri.activate()
    r_metriclist = R_function(r_stream1, r_stream2, *args, **kwargs)  # args and kwargs shouldn't be needed in theory
    pandas2ri.deactivate()
    r_dataframe = _R_metricList2DF(r_metriclist)
    df = pandas2ri.ri2py_dataframe(r_dataframe)
    
    # Convert columns from R POSIXct to pyton UTCDateTime
    df.starttime = df.starttime.apply(UTCDateTime)
    df.endtime = df.endtime.apply(UTCDateTime)
    return df
예제 #51
0
def calculate_prec(cross_df, automate= False):
    """
    function that calculates the prec_inf using R
    and returns a fully contructed plottable dataframe

    Args:
     cross_df: pandas dataframe containing the data
     automate: bool, a To do feature to automatically calculate the best fit

    Returns:
     dataframe contining the R added precision values to be
     received most always by the plotting commander.
    """
    import rpy2.robjects as ro
    from rpy2.robjects import pandas2ri
    from rpy2.robjects.packages import importr
    import rpy2.robjects.numpy2ri
    import rpy2.rinterface as rin


    stats = importr('stats')
    base = importr('base')
    # activate R environemnt in python
    rpy2.robjects.numpy2ri.activate()
    pandas2ri.activate()
    # read in necessary elements ofmenu = [("Item 1", "item_1_value"), ("Item 2", "item_2_value"), ("Item 3", "item_3_value")]
    df = pd.DataFrame({'x': cross_df['Kpoints_atom_density'],
                       'y': cross_df['Energy']})
    ro.globalenv['dataframe']=df

    ### *** R used to obtain the fit on the data to calculate prec_inf *** ###
    # perform regression  - bokeh widgets can be used here to provide the inputs to the nls regression

    # some python to R translation of object names via the pandas - R dataframes
    y = df['y']
    x = df['x']
    l = len(y) - 1  # needed because R indexes list from 1 to len(list)

    # ***WIDGET inputs*** # OR AUTOMATE
    # the slider  inputs on starting point or can be automated also
    l1 = 3
    l2 = 0
    fitover = rin.SexpVector(list(range(l1,l-l2)), rin.INTSXP)

    # numeric entry widget for 'b' is plausible for user to choose best starting guess
    start_guess = {'a': y[l], 'b': 5}
    start=pandas2ri.py2ri(pd.DataFrame(start_guess,index=start_guess))

    # drop down list selection of model
    model = 'y~a*x/(b+x)'

    # Minimize function with weights and selection
    m = \
    stats.nls(model, start = start, algorithm = "port", subset = fitover, weights = x^2, data=base.as_symbol('dataframe'))

    # Estimation of goodness of fit
    g = stats.cor(y[l1:l-l2],stats.predict(m))

    # Report summary of fit, values and error bars
    print( base.summary(m).rx2('coefficients') )

    # Extrapolation value is given by a
    a = stats.coef(m)[1]

    # Calculation of precision
    prec = abs(y-a)

    # test print outs of the data ? how to render onto html like Shiny if necesary ?

    print("We learn that the converged value is: {0} and best precision achieved in the measurement is {1}".format(a, min(abs(prec))))

    cross_df['Energy_Prec_Inf'] = prec

    # close the R environments
    rpy2.robjects.numpy2ri.deactivate()
    pandas2ri.deactivate()

    return (cross_df)
 def testCategorical(self):
     factor = robjects.vectors.FactorVector(('a', 'b', 'a'))
     rpyp.activate()
     rp_c = robjects.conversion.ri2py(factor)
     rpyp.deactivate()
     self.assertEqual(pandas.Categorical, type(rp_c))