Ejemplo n.º 1
0
def zscrp(csvs):

    for a in csvs:
        a0 = pd.read_csv(a)

        robjects.r('''
         calc.zpos <- function(x) {
           ctrl.avgs <- x %>%
             group_by(rep, plt_nm) %>%
             filter(condt == "+ctrl") %>%
             dplyr::summarise(ctrlmean = mean(area),
                              ctrlstdev = sd(area))
           x.zpos <- left_join(x, ctrl.avgs, by = c("rep", "plt_nm")) %>%
             mutate(zpos = (area - ctrlmean)/ctrlstdev) %>%
             select(-c(ctrlmean, ctrlstdev))

           return(x.zpos)
         }
         ''')

        r_f = robjects.globalenv['calc.zpos']
        res = r_f(a0)
        r.data('res')

        pd_df = pandas2ri.ri2py_dataframe(res)
        #        out = a.split('.csv')

        pd_df.to_csv(a, index=False)
Ejemplo n.º 2
0
def load_data() -> pd.DataFrame:
    importr('faraway')
    r.data('chredlin');
    chredlin = pandas2ri.ri2py(r.chredlin)
    chredlin = chredlin.set_index(pandas2ri.ri2py(r.chredlin.rownames))
    chredlin['log_income'] = np.log(chredlin['income'])
    return chredlin
Ejemplo n.º 3
0
def load_R_dataset(name, package=None, convert=True):
    if package:
        importr(package)
    r.data(name)
    robj = r[name]
    if convert:
        return _convert_robj(robj)
    else:
        return robj
Ejemplo n.º 4
0
def load_data(name, package=None, convert=True):
    if package:
        pack = importr(package)

    r.data(name)

    robj = r[name]

    if convert:
        return convert_robj(robj)
    else:
        return robj
Ejemplo n.º 5
0
def transform(df, df_len):
    pandas2ri.activate()
    r.data('df')
    pandas_df = df.head(df_len)

    col_name = pandas_df.columns.to_list()
    col_name.insert(0, "ID")
    Inputid = pandas_df.index
    pandas_df.reindex(columns=col_name)
    pandas_df["ID"] = Inputid
    pandas_df = pandas_df.reindex(columns=col_name)

    return pandas_df
def test_nnd_hotdeck_using_rpy2():
    if rpy2 is None:
        print('rpy2 is absent: skipping test')
        return

    r.data('iris')

    pandas2ri.activate()
    # or explcitly do:
    # iris = pandas2ri.ri2py(r['iris'])

    iris = r['iris']

    # lab = list([1:15, 51:65, 101:115)
    # recipient data.frame
    iris_rec = pd.concat([
        iris.loc[1:15],
        iris.loc[51:65],
        iris.loc[101:115],
        ])
    iris_rec.columns
    del iris_rec["Petal.Width"]

    # donor data.frame
    iris_don = pd.concat([
        iris.loc[16:50],
        iris.loc[66:100],
        iris.loc[116:150],
        ])
    del iris_rec["Petal.Length"]

    # Now iris.rec and iris.don have the variables
    # "Sepal.Length", "Sepal.Width" and "Species"
    # in common.
    # "Petal.Length" is available only in iris.rec
    # "Petal.Width" is available only in iris.don

    # find the closest donors using NND hot deck;
    # distances are computed on "Sepal.Length" and "Sepal.Width"

    x, y = nnd_hotdeck_using_rpy2(
        receiver = iris_rec,
        donor = iris_don,
        donor_classes = 'Species',
        z_variables = "Petal.Width",
        matching_variables = ["Sepal.Length", "Sepal.Width"]
        )
def test_nnd_hotdeck_using_rpy2():
    if rpy2 is None:
        print('rpy2 is absent: skipping test')
        return

    r.data('iris')

    pandas2ri.activate()
    # or explcitly do:
    # iris = pandas2ri.ri2py(r['iris'])

    iris = r['iris']

    # lab = list([1:15, 51:65, 101:115)
    # recipient data.frame
    iris_rec = pd.concat([
        iris.loc[1:15],
        iris.loc[51:65],
        iris.loc[101:115],
        ])
    iris_rec.columns
    del iris_rec["Petal.Width"]

    # donor data.frame
    iris_don = pd.concat([
        iris.loc[16:50],
        iris.loc[66:100],
        iris.loc[116:150],
        ])
    del iris_rec["Petal.Length"]

    # Now iris.rec and iris.don have the variables
    # "Sepal.Length", "Sepal.Width" and "Species"
    # in common.
    # "Petal.Length" is available only in iris.rec
    # "Petal.Width" is available only in iris.don

    # find the closest donors using NND hot deck;
    # distances are computed on "Sepal.Length" and "Sepal.Width"

    x, y = nnd_hotdeck_using_rpy2(
        receiver = iris_rec,
        donor = iris_don,
        donor_classes = 'Species',
        z_variables = "Petal.Width",
        matching_variables = ["Sepal.Length", "Sepal.Width"]
        )
Ejemplo n.º 8
0
def summr(csv):
    nm = os.path.basename(csv).split('.')
    drnm = os.path.dirname(csv)
    a = pd.read_csv(csv)
    robjects.r('''
    summr <- function(z) {
    foo <- z %>% group_by(rep, plt_nm, condt) %>%
    dplyr::summarise(mean_OD = mean(`O.D.`), median_OD = median(`O.D.`), sd_OD = sd(`O.D.`), mean_zneg = mean(zneg), sd_zneg = sd(zneg), mean_zpos = mean(zpos), sd_zpos = sd(zpos))
    }
    ''')

    r_summr = robjects.globalenv['summr']
    raz = r_summr(a)
    r.data('raz')

    pd_df = pandas2ri.ri2py_dataframe(raz)
    pd_df.to_csv((drnm + '/' + nm[0] + '_sumfile.csv'), index=False)
Ejemplo n.º 9
0
def summr_ov_cd(csv):
    nm = os.path.basename(csv).split('.')
    drnm = os.path.dirname(csv)
    a = pd.read_csv(csv)
    robjects.r('''
    summr_rp <- function(z) {
    foo <- z %>% group_by(condt) %>%
    dplyr::summarise(mean_area = mean(area), median_area = median(area), sd_area = sd(area), mean_zneg = mean(zneg), sd_zneg = sd(zneg), mean_zpos = mean(zpos), sd_zpos = sd(zpos))
    }
    ''')

    r_summr_rp = robjects.globalenv['summr_rp']
    raz = r_summr_rp(a)
    r.data('raz')

    pd_df = pandas2ri.ri2py_dataframe(raz)
    pd_df.to_csv((drnm + '/' + nm[0] + '_sum_ov_condt.csv'), index=False)
Ejemplo n.º 10
0
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

from rpy2.robjects.packages import importr
from rpy2.robjects import r, pandas2ri

import math

# Importing the dataset

pandas2ri.activate()
utils = importr("utils")
AppliedPredictiveModeling = importr("AppliedPredictiveModeling", data=True)

r.data("segmentationOriginal")
dataset = r["segmentationOriginal"]
dataset = pd.get_dummies(dataset)

X = dataset.iloc[:, 1:-3].values
y = dataset.iloc[:, -1].values

# see the frequency of class

Label = pd.value_counts(y).to_frame().reset_index()
print(Label)

# SVM Models

ModelLinear = SVM(X, y)
ModelLinear.Linearparam()
Ejemplo n.º 11
0
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 10 12:06:35 2015

@author: Kaddabadda
"""
import pandas as pd

from rpy2.robjects import pandas2ri
pandas2ri.activate()

from rpy2.robjects import r
r.data('prediction')
df_iris = pandas2ri.ri2py(r[prediction])



from rpy2 import robjects

Rdir  = "I:/DOCUMENTS/WEGC/02_PhD_research/03_Data/ZAMG/SPARTACUS/TMAX/rda/Tx20130227.rda"
f = r'/Tx20130227.rda'

obj = Rdir + f

m=robjects.r('matrix(1:6, nrow=2, ncol=3)')

m = robjects.reval(obj)

rdf = 'I:/DOCUMENTS/WEGC/02_PhD_research/03_Data/ZAMG/SPARTACUS/TMAX/rda/Tx20130227.rda'

pandas2ri.ri2py(rdf)
Ejemplo n.º 12
0
"""

#Data sets
from rpy2.robjects import r, pandas2ri
def data1(name): 
    return pandas2ri.ri2py(r[name])

df = data1('iris')
df.describe()

df = data1('mtcars')
df
df.describe()
df2 = data1('mtcars')
df2.describe()
print(r.data())
df3=pandas2ri.ri2py(r('women'))
df3

#$ pip install pydataset
#then just load up any dataset you wish (currently around 757 datasets available) :

from pydataset import data

titanic = data('titanic')
titanic

from sklearn import datasets

from sklearn import datasets
iris = datasets.load_iris()
def test_reproduction():
    if rpy2 is None:
        return

    # Reproducing examples from StatMatch documenation
    # https://cran.r-project.org/web/packages/StatMatch/StatMatch.pdf

    r.data('iris')

    pandas2ri.activate()
    # or explcitly do:
    # iris = pandas2ri.ri2py(r['iris'])

    iris = r['iris']

    # lab = list([1:15, 51:65, 101:115)
    # recipient data.frame
    iris_rec = pd.concat([
        iris.loc[1:15],
        iris.loc[51:65],
        iris.loc[101:115],
        ])
    iris_rec.columns
    del iris_rec["Petal.Width"]

    # donor data.frame
    iris_don = pd.concat([
        iris.loc[16:50],
        iris.loc[66:100],
        iris.loc[116:150],
        ])
    del iris_rec["Petal.Length"]

    # Now iris.rec and iris.don have the variables
    # "Sepal.Length", "Sepal.Width" and "Species"
    # in common.
    # "Petal.Length" is available only in iris.rec
    # "Petal.Width" is available only in iris.don
    # find the closest donors using NND hot deck;
    # distances are computed on "Sepal.Length" and "Sepal.Width"

    StatMatch = importr("StatMatch")

    out_NND = StatMatch.NND_hotdeck(
        data_rec = iris_rec, data_don=iris_don,
        match_vars = pd.Series(["Sepal.Length", "Sepal.Width"]),
        don_class = "Species"
        )

    # create synthetic data.set, without the
    # duplication of the matching variables
    fused_0 = pandas2ri.ri2py(
        StatMatch.create_fused(
            data_rec = iris_rec,
            data_don = iris_don,
            mtc_ids = out_NND[0],
            z_vars = "Petal.Width"
            )
        )

    # create synthetic data.set, with the "duplication"
    # of the matching variables
    fused_1 = pandas2ri.ri2py(
        StatMatch.create_fused(
            data_rec = iris_rec,
            data_don = iris_don,
            mtc_ids = out_NND[0],
            z_vars = "Petal.Width",
            dup_x = True,
            match_vars = pd.Series(["Sepal.Length", "Sepal.Width"])
            )
        )
    del fused_0, fused_1
Ejemplo n.º 14
0
def load_data(name, package=None):
    if package:
        pack = importr(package)

    r.data(name)
    return convert_robj(r[name])
Ejemplo n.º 15
0
from rpy2.robjects import r
from rpy2.robjects.packages import importr

vars = importr('vars')
urca = importr('urca')


class RVAR(object):
    pass


if __name__ == '__main__':
    r.data("Canada")
    can = r['Canada']
    p1ct = r('p1ct <- VAR(Canada, p=1, type="both")')
    coefs = r('coefs <- coef(p1ct)')

    ecoef = coefs.rx2('e')

    # summary(Canada)

    # plot(Canada, nc=2, xlab="")

    # adf1 <- summary(ur.df(Canada[, "prod"], type = "trend", lags = 2))
    # adf1

    # adf2 <- summary(ur.df(diff(Canada[, "prod"]), type = "drift", lags = 1))
    # adf2

    # VARselect(Canada, lag.max = 8, type = "both")
Ejemplo n.º 16
0
# %% {"slideshow": {"slide_type": "fragment"}}
from rpy2.robjects import r
x = r('c(1,2,3,4)')
type(x)

# %% {"slideshow": {"slide_type": "fragment"}}
v = r('seq(1:10)')
v

# %% {"slideshow": {"slide_type": "slide"}}
from rpy2.robjects import pandas2ri

pandas2ri.activate()
r.library('missMDA')
r.data('orange')
orange = r('orange')

# %% {"slideshow": {"slide_type": "slide"}}
orange

# %% {"slideshow": {"slide_type": "slide"}, "language": "R"}
# library('missMDA')
# data(orange)
# estim_ncpPCA(orange)

# %% {"slideshow": {"slide_type": "fragment"}}
from rpy2.robjects.packages import importr

miss_mda = importr('missMDA')
res = miss_mda.imputePCA(orange,ncp=2)
Ejemplo n.º 17
0
from numpy import *
import scipy as sp
from pandas import *
from rpy2.robjects.packages import importr
import rpy2.robjects as ro
import rpy2

from rpy2.robjects import pandas2ri

pandas2ri.activate()

ro.r('x=c()')
ro.r('x[1]=22')
ro.r('x[2]=44')
print(ro.r('x'))

# mtcars

ro.r('data(mtcars)')

from rpy2.robjects import r

r.data('mtcars')
print(r['mtcars'].head())

# let's do an lm on mtcars
ro.r('''fit=lm(mpg ~ wt + cyl, data=mtcars)''')
print(ro.r('summary(fit)'))
Ejemplo n.º 18
0
def load_data() -> pd.DataFrame:
    importr('lasso2')
    r.data('Prostate')
    prostate = pandas2ri.ri2py(r.Prostate)
    return prostate
Ejemplo n.º 19
0
from rpy2.robjects import r
from rpy2.robjects.packages import importr

vars = importr('vars')
urca = importr('urca')

class RVAR(object):
    pass

if __name__ == '__main__':
    r.data("Canada")
    can = r['Canada']
    p1ct = r('p1ct <- VAR(Canada, p=1, type="both")')
    coefs = r('coefs <- coef(p1ct)')

    ecoef = coefs.rx2('e')

    # summary(Canada)

    # plot(Canada, nc=2, xlab="")

    # adf1 <- summary(ur.df(Canada[, "prod"], type = "trend", lags = 2))
    # adf1

    # adf2 <- summary(ur.df(diff(Canada[, "prod"]), type = "drift", lags = 1))
    # adf2

    # VARselect(Canada, lag.max = 8, type = "both")

    # Canada <- Canada[, c("prod", "e", "U", "rw")]
def test_reproduction():
    if rpy2 is None:
        return

    # Reproducing examples from StatMatch documenation
    # https://cran.r-project.org/web/packages/StatMatch/StatMatch.pdf

    r.data('iris')

    pandas2ri.activate()
    # or explcitly do:
    # iris = pandas2ri.ri2py(r['iris'])

    iris = r['iris']

    # lab = list([1:15, 51:65, 101:115)
    # recipient data.frame
    iris_rec = pd.concat([
        iris.loc[1:15],
        iris.loc[51:65],
        iris.loc[101:115],
        ])
    iris_rec.columns
    del iris_rec["Petal.Width"]

    # donor data.frame
    iris_don = pd.concat([
        iris.loc[16:50],
        iris.loc[66:100],
        iris.loc[116:150],
        ])
    del iris_rec["Petal.Length"]

    # Now iris.rec and iris.don have the variables
    # "Sepal.Length", "Sepal.Width" and "Species"
    # in common.
    # "Petal.Length" is available only in iris.rec
    # "Petal.Width" is available only in iris.don
    # find the closest donors using NND hot deck;
    # distances are computed on "Sepal.Length" and "Sepal.Width"

    StatMatch = importr("StatMatch")

    out_NND = StatMatch.NND_hotdeck(
        data_rec = iris_rec, data_don=iris_don,
        match_vars = pd.Series(["Sepal.Length", "Sepal.Width"]),
        don_class = "Species"
        )

    # create synthetic data.set, without the
    # duplication of the matching variables
    fused_0 = pandas2ri.ri2py(
        StatMatch.create_fused(
            data_rec = iris_rec,
            data_don = iris_don,
            mtc_ids = out_NND[0],
            z_vars = "Petal.Width"
            )
        )

    # create synthetic data.set, with the "duplication"
    # of the matching variables
    fused_1 = pandas2ri.ri2py(
        StatMatch.create_fused(
            data_rec = iris_rec,
            data_don = iris_don,
            mtc_ids = out_NND[0],
            z_vars = "Petal.Width",
            dup_x = True,
            match_vars = pd.Series(["Sepal.Length", "Sepal.Width"])
            )
        )
    del fused_0, fused_1
Ejemplo n.º 21
0
# -*- coding: utf-8 -*-
"""
Created on Thu Sep  6 12:18:34 2018

@author: LatizeExpress
"""
import pandas as pd
import numpy
import matplotlib
import matplotlib.pyplot as plt
import rpy2.robjects as robjects
from rpy2.robjects import r, pandas2ri

pandas2ri.activate()
path = "E:/Working/INNERJOIN_WORKING/RefinedData_groupby5.rds"
#path ="D:/NTUC/raw_data/rds_source/TransactionalData/customer1.rds"
r.data(path)
readRDS = robjects.r['readRDS']
df = readRDS(path)
df = pandas2ri.ri2py(df)
# Writing ot csv f
df.to_csv("income_groupby1.csv", index=False, encoding='utf8')
Ejemplo n.º 22
0
rplot = robjects.r['plot']

grdevices = importr('grDevices')

result = rmcreg(x, y, method_reg="Deming", alpha=0.05)
grdevices.png(file="MethodComparison.png", width=512, height=512)

p = rplot(result)

grdevices.dev_off()

data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
#print(df.head())

r.data('iris')
pandas2ri.activate()
#r_dataframe = pandas2ri.py2ri(df)

grdevices = importr('grDevices')

result2 = rmcreg(r['iris']["Petal.Length"],
                 r['iris']["Petal.Width"],
                 method_reg="PaBa",
                 alpha=0.05)
grdevices.png(file="iris.png", width=512, height=512)

p = rplot(result2, identity=False)

grdevices.dev_off()
print(r['iris']["Petal.Length"])