def zscrp(csvs): for a in csvs: a0 = pd.read_csv(a) robjects.r(''' calc.zpos <- function(x) { ctrl.avgs <- x %>% group_by(rep, plt_nm) %>% filter(condt == "+ctrl") %>% dplyr::summarise(ctrlmean = mean(area), ctrlstdev = sd(area)) x.zpos <- left_join(x, ctrl.avgs, by = c("rep", "plt_nm")) %>% mutate(zpos = (area - ctrlmean)/ctrlstdev) %>% select(-c(ctrlmean, ctrlstdev)) return(x.zpos) } ''') r_f = robjects.globalenv['calc.zpos'] res = r_f(a0) r.data('res') pd_df = pandas2ri.ri2py_dataframe(res) # out = a.split('.csv') pd_df.to_csv(a, index=False)
def load_data() -> pd.DataFrame: importr('faraway') r.data('chredlin'); chredlin = pandas2ri.ri2py(r.chredlin) chredlin = chredlin.set_index(pandas2ri.ri2py(r.chredlin.rownames)) chredlin['log_income'] = np.log(chredlin['income']) return chredlin
def load_R_dataset(name, package=None, convert=True): if package: importr(package) r.data(name) robj = r[name] if convert: return _convert_robj(robj) else: return robj
def load_data(name, package=None, convert=True): if package: pack = importr(package) r.data(name) robj = r[name] if convert: return convert_robj(robj) else: return robj
def transform(df, df_len): pandas2ri.activate() r.data('df') pandas_df = df.head(df_len) col_name = pandas_df.columns.to_list() col_name.insert(0, "ID") Inputid = pandas_df.index pandas_df.reindex(columns=col_name) pandas_df["ID"] = Inputid pandas_df = pandas_df.reindex(columns=col_name) return pandas_df
def test_nnd_hotdeck_using_rpy2(): if rpy2 is None: print('rpy2 is absent: skipping test') return r.data('iris') pandas2ri.activate() # or explcitly do: # iris = pandas2ri.ri2py(r['iris']) iris = r['iris'] # lab = list([1:15, 51:65, 101:115) # recipient data.frame iris_rec = pd.concat([ iris.loc[1:15], iris.loc[51:65], iris.loc[101:115], ]) iris_rec.columns del iris_rec["Petal.Width"] # donor data.frame iris_don = pd.concat([ iris.loc[16:50], iris.loc[66:100], iris.loc[116:150], ]) del iris_rec["Petal.Length"] # Now iris.rec and iris.don have the variables # "Sepal.Length", "Sepal.Width" and "Species" # in common. # "Petal.Length" is available only in iris.rec # "Petal.Width" is available only in iris.don # find the closest donors using NND hot deck; # distances are computed on "Sepal.Length" and "Sepal.Width" x, y = nnd_hotdeck_using_rpy2( receiver = iris_rec, donor = iris_don, donor_classes = 'Species', z_variables = "Petal.Width", matching_variables = ["Sepal.Length", "Sepal.Width"] )
def summr(csv): nm = os.path.basename(csv).split('.') drnm = os.path.dirname(csv) a = pd.read_csv(csv) robjects.r(''' summr <- function(z) { foo <- z %>% group_by(rep, plt_nm, condt) %>% dplyr::summarise(mean_OD = mean(`O.D.`), median_OD = median(`O.D.`), sd_OD = sd(`O.D.`), mean_zneg = mean(zneg), sd_zneg = sd(zneg), mean_zpos = mean(zpos), sd_zpos = sd(zpos)) } ''') r_summr = robjects.globalenv['summr'] raz = r_summr(a) r.data('raz') pd_df = pandas2ri.ri2py_dataframe(raz) pd_df.to_csv((drnm + '/' + nm[0] + '_sumfile.csv'), index=False)
def summr_ov_cd(csv): nm = os.path.basename(csv).split('.') drnm = os.path.dirname(csv) a = pd.read_csv(csv) robjects.r(''' summr_rp <- function(z) { foo <- z %>% group_by(condt) %>% dplyr::summarise(mean_area = mean(area), median_area = median(area), sd_area = sd(area), mean_zneg = mean(zneg), sd_zneg = sd(zneg), mean_zpos = mean(zpos), sd_zpos = sd(zpos)) } ''') r_summr_rp = robjects.globalenv['summr_rp'] raz = r_summr_rp(a) r.data('raz') pd_df = pandas2ri.ri2py_dataframe(raz) pd_df.to_csv((drnm + '/' + nm[0] + '_sum_ov_condt.csv'), index=False)
from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier from rpy2.robjects.packages import importr from rpy2.robjects import r, pandas2ri import math # Importing the dataset pandas2ri.activate() utils = importr("utils") AppliedPredictiveModeling = importr("AppliedPredictiveModeling", data=True) r.data("segmentationOriginal") dataset = r["segmentationOriginal"] dataset = pd.get_dummies(dataset) X = dataset.iloc[:, 1:-3].values y = dataset.iloc[:, -1].values # see the frequency of class Label = pd.value_counts(y).to_frame().reset_index() print(Label) # SVM Models ModelLinear = SVM(X, y) ModelLinear.Linearparam()
# -*- coding: utf-8 -*- """ Created on Tue Nov 10 12:06:35 2015 @author: Kaddabadda """ import pandas as pd from rpy2.robjects import pandas2ri pandas2ri.activate() from rpy2.robjects import r r.data('prediction') df_iris = pandas2ri.ri2py(r[prediction]) from rpy2 import robjects Rdir = "I:/DOCUMENTS/WEGC/02_PhD_research/03_Data/ZAMG/SPARTACUS/TMAX/rda/Tx20130227.rda" f = r'/Tx20130227.rda' obj = Rdir + f m=robjects.r('matrix(1:6, nrow=2, ncol=3)') m = robjects.reval(obj) rdf = 'I:/DOCUMENTS/WEGC/02_PhD_research/03_Data/ZAMG/SPARTACUS/TMAX/rda/Tx20130227.rda' pandas2ri.ri2py(rdf)
""" #Data sets from rpy2.robjects import r, pandas2ri def data1(name): return pandas2ri.ri2py(r[name]) df = data1('iris') df.describe() df = data1('mtcars') df df.describe() df2 = data1('mtcars') df2.describe() print(r.data()) df3=pandas2ri.ri2py(r('women')) df3 #$ pip install pydataset #then just load up any dataset you wish (currently around 757 datasets available) : from pydataset import data titanic = data('titanic') titanic from sklearn import datasets from sklearn import datasets iris = datasets.load_iris()
def test_reproduction(): if rpy2 is None: return # Reproducing examples from StatMatch documenation # https://cran.r-project.org/web/packages/StatMatch/StatMatch.pdf r.data('iris') pandas2ri.activate() # or explcitly do: # iris = pandas2ri.ri2py(r['iris']) iris = r['iris'] # lab = list([1:15, 51:65, 101:115) # recipient data.frame iris_rec = pd.concat([ iris.loc[1:15], iris.loc[51:65], iris.loc[101:115], ]) iris_rec.columns del iris_rec["Petal.Width"] # donor data.frame iris_don = pd.concat([ iris.loc[16:50], iris.loc[66:100], iris.loc[116:150], ]) del iris_rec["Petal.Length"] # Now iris.rec and iris.don have the variables # "Sepal.Length", "Sepal.Width" and "Species" # in common. # "Petal.Length" is available only in iris.rec # "Petal.Width" is available only in iris.don # find the closest donors using NND hot deck; # distances are computed on "Sepal.Length" and "Sepal.Width" StatMatch = importr("StatMatch") out_NND = StatMatch.NND_hotdeck( data_rec = iris_rec, data_don=iris_don, match_vars = pd.Series(["Sepal.Length", "Sepal.Width"]), don_class = "Species" ) # create synthetic data.set, without the # duplication of the matching variables fused_0 = pandas2ri.ri2py( StatMatch.create_fused( data_rec = iris_rec, data_don = iris_don, mtc_ids = out_NND[0], z_vars = "Petal.Width" ) ) # create synthetic data.set, with the "duplication" # of the matching variables fused_1 = pandas2ri.ri2py( StatMatch.create_fused( data_rec = iris_rec, data_don = iris_don, mtc_ids = out_NND[0], z_vars = "Petal.Width", dup_x = True, match_vars = pd.Series(["Sepal.Length", "Sepal.Width"]) ) ) del fused_0, fused_1
def load_data(name, package=None): if package: pack = importr(package) r.data(name) return convert_robj(r[name])
from rpy2.robjects import r from rpy2.robjects.packages import importr vars = importr('vars') urca = importr('urca') class RVAR(object): pass if __name__ == '__main__': r.data("Canada") can = r['Canada'] p1ct = r('p1ct <- VAR(Canada, p=1, type="both")') coefs = r('coefs <- coef(p1ct)') ecoef = coefs.rx2('e') # summary(Canada) # plot(Canada, nc=2, xlab="") # adf1 <- summary(ur.df(Canada[, "prod"], type = "trend", lags = 2)) # adf1 # adf2 <- summary(ur.df(diff(Canada[, "prod"]), type = "drift", lags = 1)) # adf2 # VARselect(Canada, lag.max = 8, type = "both")
# %% {"slideshow": {"slide_type": "fragment"}} from rpy2.robjects import r x = r('c(1,2,3,4)') type(x) # %% {"slideshow": {"slide_type": "fragment"}} v = r('seq(1:10)') v # %% {"slideshow": {"slide_type": "slide"}} from rpy2.robjects import pandas2ri pandas2ri.activate() r.library('missMDA') r.data('orange') orange = r('orange') # %% {"slideshow": {"slide_type": "slide"}} orange # %% {"slideshow": {"slide_type": "slide"}, "language": "R"} # library('missMDA') # data(orange) # estim_ncpPCA(orange) # %% {"slideshow": {"slide_type": "fragment"}} from rpy2.robjects.packages import importr miss_mda = importr('missMDA') res = miss_mda.imputePCA(orange,ncp=2)
from numpy import * import scipy as sp from pandas import * from rpy2.robjects.packages import importr import rpy2.robjects as ro import rpy2 from rpy2.robjects import pandas2ri pandas2ri.activate() ro.r('x=c()') ro.r('x[1]=22') ro.r('x[2]=44') print(ro.r('x')) # mtcars ro.r('data(mtcars)') from rpy2.robjects import r r.data('mtcars') print(r['mtcars'].head()) # let's do an lm on mtcars ro.r('''fit=lm(mpg ~ wt + cyl, data=mtcars)''') print(ro.r('summary(fit)'))
def load_data() -> pd.DataFrame: importr('lasso2') r.data('Prostate') prostate = pandas2ri.ri2py(r.Prostate) return prostate
from rpy2.robjects import r from rpy2.robjects.packages import importr vars = importr('vars') urca = importr('urca') class RVAR(object): pass if __name__ == '__main__': r.data("Canada") can = r['Canada'] p1ct = r('p1ct <- VAR(Canada, p=1, type="both")') coefs = r('coefs <- coef(p1ct)') ecoef = coefs.rx2('e') # summary(Canada) # plot(Canada, nc=2, xlab="") # adf1 <- summary(ur.df(Canada[, "prod"], type = "trend", lags = 2)) # adf1 # adf2 <- summary(ur.df(diff(Canada[, "prod"]), type = "drift", lags = 1)) # adf2 # VARselect(Canada, lag.max = 8, type = "both") # Canada <- Canada[, c("prod", "e", "U", "rw")]
# -*- coding: utf-8 -*- """ Created on Thu Sep 6 12:18:34 2018 @author: LatizeExpress """ import pandas as pd import numpy import matplotlib import matplotlib.pyplot as plt import rpy2.robjects as robjects from rpy2.robjects import r, pandas2ri pandas2ri.activate() path = "E:/Working/INNERJOIN_WORKING/RefinedData_groupby5.rds" #path ="D:/NTUC/raw_data/rds_source/TransactionalData/customer1.rds" r.data(path) readRDS = robjects.r['readRDS'] df = readRDS(path) df = pandas2ri.ri2py(df) # Writing ot csv f df.to_csv("income_groupby1.csv", index=False, encoding='utf8')
rplot = robjects.r['plot'] grdevices = importr('grDevices') result = rmcreg(x, y, method_reg="Deming", alpha=0.05) grdevices.png(file="MethodComparison.png", width=512, height=512) p = rplot(result) grdevices.dev_off() data = load_iris() df = pd.DataFrame(data.data, columns=data.feature_names) #print(df.head()) r.data('iris') pandas2ri.activate() #r_dataframe = pandas2ri.py2ri(df) grdevices = importr('grDevices') result2 = rmcreg(r['iris']["Petal.Length"], r['iris']["Petal.Width"], method_reg="PaBa", alpha=0.05) grdevices.png(file="iris.png", width=512, height=512) p = rplot(result2, identity=False) grdevices.dev_off() print(r['iris']["Petal.Length"])