Esempio n. 1
0
    def run(self, data_object):
        """Read canned dataset from R to a pandas dataframe

        Returns:
            data_object (DataObject): DataObject instance
            terminate (bool): should we terminate the DAG? true or false

        """
        dataset = self.node_config["dataset"]
        logging.info("Reading {} from R".format(dataset))

        try:
            from rpy2.robjects.packages import importr, data
        except ImportError:  # pragma: no cover
            raise ImportError(
                "This example needs Rpy2."
                "Please refer to the R requirements in the README"
            )
        datasets = importr("datasets")
        r_env = data(datasets).fetch(dataset)

        import rpy2.robjects as robjects

        # why we do this:
        # > data(euro)
        # > euro
        # ATS         BEF         DEM         ESP         FIM         FRF         IEP         ITL         LUF         NLG         PTE
        # 13.760300   40.339900    1.955830  166.386000    5.945730    6.559570    0.787564 1936.270000   40.339900    2.203710  200.482000
        #
        # > as.data.frame(euro)
        #        euro
        # ATS   13.760300
        # BEF   40.339900
        # DEM    1.955830
        data = robjects.r("as.data.frame(%s)" % dataset)

        # at time of writing, rpy2's R dataframe to pandas dataframe was not fully supported
        # However, as python list() seems to work for FloatVector, StrVector, and FactorVector, let's use it
        from rpy2.robjects import r

        colnames = r.colnames(data)
        pandas_data = {}
        # convert each column of the R dataframe in turn
        for i, colname in enumerate(colnames):
            pandas_data[colname] = list(data[i])
        # Unfortunately, some datasets have rownames that should be an ID column (e.g., see mtcars where rownames=names of the cars).
        # This is the best we can do: pull it out as an additional column for each and every dataset
        pandas_data["row_names"] = list(data.rownames)

        df = pd.DataFrame(pandas_data)
        data_object.add(self, df)
        terminate = df.empty
        return data_object, terminate
Esempio n. 2
0
    def _read_data(self):
        """
        Activate R to Pandas, import package from R and extract dataset.
        convert to pandas dataframe

        Input: None
        Output: CreditCard data read to dataframe in Object
        """
        pandas2ri.activate()
        aer = importr('AER')
        credcard = data(aer).fetch('CreditCard')
        self.df = pandas2ri.ri2py(credcard['CreditCard'])
Esempio n. 3
0
def import_useeior_mastercrosswalk():
    """
    Load USEEIOR's MasterCrosswalk that links BEA data to NAICS
    :return:
    """
    pandas2ri.activate()
    # import the useeior package (r package)
    useeior = packages.importr('useeior')
    # load the .Rd file for
    cw = packages.data(useeior).fetch(
        'MasterCrosswalk2012')['MasterCrosswalk2012']

    return cw
Esempio n. 4
0
def import_useeior_mastercrosswalk():
    """
    Load USEEIOR's MasterCrosswalk that links BEA data to NAICS
    :return:
    """
    pandas2ri.activate()
    # import the useeior package (r package)
    useeior = packages.importr('useeior')
    # load the .Rd file for
    cw = packages.data(useeior).fetch(
        'MasterCrosswalk2012')['MasterCrosswalk2012']

    # save as csv
    cw.to_csv(datapath + "NAICS_to_BEA_Crosswalk.csv", index=False)
Esempio n. 5
0
  def build(self):
    ##print grdevices.palette()
    if self.spec['type'] == 'csv' :
        df = robjects.DataFrame.from_csvfile('./data/' + self.spec['name'] + '.csv')
    else :
        print type(self.spec['name'])
        samplename = self.spec['name'].encode('ascii','ignore')
        df = data(datasets).fetch(samplename)[samplename]

    #print df
    grdevices.png(file=self.sfilename, width=700, height=400)
    pp = ggplot2.ggplot(df)

    ppargs = {}

    if len(self.spec['viz[xaxis]']) != 0 :
        ppargs['x'] = self.spec['viz[xaxis]']

    if len(self.spec['viz[yaxis]']) != 0 :
        ppargs['y'] = self.spec['viz[yaxis]']

    if len(self.spec['viz[color]']) != 0 :
        ppargs['colour'] = self.spec['viz[color]']

    if len(self.spec['viz[shape]']) != 0 :
        ppargs['shape'] = self.spec['viz[shape]']

    player1 = self.spec['viz[layer1]'] if len(self.spec['viz[layer1]']) != 0 else None
    player2 = self.spec['viz[layer2]'] if len(self.spec['viz[layer2]']) != 0 else None 

    pp = pp + ggplot2.aes_string(**ppargs)
    ##pp = pp + ggplot2.geom_bar(stat="identity", fill="white", colour="darkgreen")
    ##pp = pp + ggplot2.scale_fill_brewer(palette="blues")
    ##pp = pp + ggplot2.geom_point() 
    pp = pp + ggplot2.geom_point(size=5) 
    pp.plot()
    grdevices.dev_off()
    return self.cfilename
Esempio n. 6
0
import pytest

from rpy2.robjects import packages

try:
    from rpy2.robjects.lib import tidyr
    has_tidyr = True
    msg = ''
except packages.PackageNotInstalledError as error:
    has_tidyr = False
    msg = str(error)

from rpy2 import rinterface
from rpy2.robjects import vectors
datasets = packages.importr('datasets')
mtcars = packages.data(datasets).fetch('mtcars')['mtcars']


@pytest.mark.skipif(not has_tidyr, reason=msg)
class TestTidyr(object):
    def test_dataframe(self):
        dataf = tidyr.DataFrame({
            'x':
            vectors.IntVector((1, 2, 3, 4, 5)),
            'labels':
            vectors.StrVector(('a', 'b', 'b', 'b', 'a'))
        })
        assert isinstance(dataf, tidyr.DataFrame)
        assert sorted(['x', 'labels']) == sorted(list(dataf.colnames))

    def test_spread(self):
Esempio n. 7
0
    W_3 = cur_param[(2*J+J*K+K):(2*J+J*K+2*K)]
    b_3 = cur_param[2*J+J*K+2*K]
    nn = neural_net(X,W_1,b_1,W_2,b_2,W_3,b_3)
    est_q[i,] = np.reshape((nn+1)*(max_y-min_y)/2+min_y,(len(X),))

est_q = np.mean(est_q, axis = 0)

plt.plot(X_s,y_s,'k.')
plt.plot(X_s,q_true,'r-',label='True')
plt.plot(X_s,est_q,'b-',label='Estimate')
plt.legend()

utils = importr("utils")
utils.install_packages("MASS")
MASS = importr("MASS")
motor = data(MASS).fetch('mcycle')['mcycle']
motor = pandas2ri.ri2py(motor)

plt.plot('times', 'accel', '.', data = motor)
plt.xlabel('Time')
plt.ylabel('Acceleration')

X = np.array(motor['times'])
X = np.reshape(X,(len(X),1))
y = np.array(motor['accel'])
y = np.reshape(y,(len(y),1))

max_X = np.max(X)
min_X = np.min(X)
max_y = np.max(y)
min_y = np.min(y)
Esempio n. 8
0
def get_brms_data(dataset_name:str):
    "A helper function for importing different datasets included in brms."
    with localconverter(default_converter + pandas2ri.converter + numpy2ri.converter) as cv:
        return pd.DataFrame(rpackages.data(brms).fetch(dataset_name)[dataset_name])
Esempio n. 9
0
    def __init__(self):

        for name in dataset_names:
            onedataset = data(datasets).fetch(name)[name]
            self._data[name] = onedataset
Esempio n. 10
0
import pytest
import pandas as pd
from anndata import AnnData
from rpy2.robjects import r
from rpy2.robjects.packages import importr, data

import anndata2ri
from anndata2ri.test_utils import conversions_rpy2py

se = importr("SummarizedExperiment")
sce = importr("SingleCellExperiment")
sc_rna_seq_data = data(importr("scRNAseq"))
as_ = getattr(importr("methods"), "as")


def check_allen(adata):
    assert adata.uns.keys() == {"SuppInfo", "which_qc"}
    assert set(adata.obs.keys()) > {
        "NREADS", "NALIGNED", "Animal.ID", "passes_qc_checks_s"
    }


def check_example(adata):
    assert set(adata.obsm.keys()) == {"X_pca", "X_tsne"}
    assert adata.obsm["X_pca"].shape == (100, 5)


sumex_allen = sc_rna_seq_data.fetch("allen")["allen"]
code_example = """
local({
    ncells <- 100
Esempio n. 11
0
import pytest
from rpy2.robjects.packages import importr, data, PackageNotInstalledError

try:
    from rpy2.robjects.lib import dplyr
    has_dplyr = True
    msg = ''
except PackageNotInstalledError as error:
    has_dplyr = False
    msg = str(error)

datasets = importr('datasets')
mtcars = data(datasets).fetch('mtcars')['mtcars']

@pytest.mark.skipif(not has_dplyr, reason=msg)
class TestDplyr(object):

    def test_dataframe(self):
        dataf = dplyr.DataFrame(mtcars)
        # FIXME: no testing much at the moment...
        assert isinstance(dataf, dplyr.DataFrame)

    def test_filter_nofilter_method(self):
        dataf = dplyr.DataFrame(mtcars)
        dataf_filter = dataf.filter()
        assert dataf.nrow == dataf_filter.nrow

    def test_filter_nofilter_function(self):
        dataf = dplyr.DataFrame(mtcars)
        dataf_filter = dplyr.filter(dataf)
        assert dataf.nrow == dataf_filter.nrow
Esempio n. 12
0
    @staticmethod
    def from_formula(formula,
                     data=rinterface.MissingArg,
                     family=rinterface.MissingArg,
                     subset=rinterface.MissingArg,
                     weights=rinterface.MissingArg):
        """ Build an LmList from a formula """
        res = LmList._lmfit_from_formula(formula,
                                         data=data,
                                         family=family,
                                         subset=subset,
                                         weights=weights)
        res = LmList(res)
        return res


#-- LmList-end

#-- buildLmList-begin
sleepstudy = data(lme4).fetch('sleepstudy')['sleepstudy']
formula = robjects.Formula('Reaction ~ Days | Subject')
lml1 = LmList.from_formula(formula, data=sleepstudy)
#-- buildLmList-end

#-- buildLmListBetterCall-begin
sleepstudy = data(lme4).fetch('sleepstudy')['sleepstudy']
formula = robjects.Formula('Reaction ~ Days | Subject')

lml1 = LmList.from_formula(formula, data=sleepstudy)
#-- buildLmListBetterCall-end
try:
    from rpy2.robjects.numpy2ri import numpy2ri, ri2py
except ImportError:
    from rpy2.robjects.numpy2ri import numpy2rpy as numpy2ri
    from rpy2.robjects.numpy2ri import rpy2py as ri2py

#from rpy2.robjects.numpy2ri import numpy2ri, ri2py
from rpy2.robjects.packages import importr, data

import openturns as ot
# Require

stats = importr("stats")
faraway = importr("faraway")
savings_data = data(faraway).fetch("savings")["savings"]
#data(faraway).fetch("savings")["savings"]

# Model 1 : 2 param, non intercept
sr = ri2py(savings_data)["sr"]
r.assign('sr', numpy2ri(sr))
pop15 = ri2py(savings_data)["pop15"]
r.assign('pop15', numpy2ri(pop15))
pop75 = ri2py(savings_data)["pop75"]
r.assign('pop75', numpy2ri(pop75))

formula = Formula('sr ~ pop75 + pop15 - 1')
fit = stats.lm(formula)
summary = stats.summary_lm(fit)
"""
list(summary.names) provides
Esempio n. 14
0
from rpy2.robjects.vectors import DataFrame
from rpy2.robjects.packages import importr, data
# this shit works like a f*****g charm!
r_base = importr('base')

datasets = importr('datasets')
faithful_data = data(datasets).fetch('faithful')['faithful']

edsummary = r_base.summary(faithful_data.rx2("eruptions"))
for k, v in edsummary.items():
   print("%s: %.3f\n" %(k, v))

graphics = importr('graphics')

print("Stem-and-leaf plot of Old Faithful eruption duration data")
graphics.stem(faithful_data.rx2("eruptions"))

grdevices = importr('grDevices')
stats = importr('stats')
grdevices.png('faithful_histogram.png', width = 733, height = 550)
ed = faithful_data.rx2("eruptions")
graphics.hist(ed, r_base.seq(1.6, 5.2, 0.2),
              prob = True, col = "lightblue",
              main = "Old Faithful eruptions", xlab = "Eruption duration (seconds)")
graphics.lines(stats.density(ed,bw=0.1), col = "orange")
graphics.rug(ed)
grdevices.dev_off()

from rpy2.robjects.vectors import FloatVector

long_ed = FloatVector([x for x in ed if x > 3])
Esempio n. 15
0
if not have_tutorial_packages:
    # import R's utility package
    utils = importr('utils')
    # select a mirror for R packages
    utils.chooseCRANmirror(ind=1)  # select the first mirror in the list

    # R vector of strings
    from rpy2.robjects.vectors import StrVector
    # file
    packnames_to_install = [x for x in packnames if not isinstalled(x)]
    if len(packnames_to_install) > 0:
        utils.install_packages(StrVector(packnames_to_install))

# pi = robjects.r['pi']
# print(pi[0])

mlr = importr('mlr')
datasets = importr('datasets')
iris_env = data(datasets).fetch('iris')
iris = iris_env['iris']
# print(iris)
robjects.r('task = makeClassifTask(data = iris, target = "Species")')
robjects.r('lrn = makeLearner("classif.lda")')
robjects.r('n = nrow(iris)')
robjects.r('train.set = sample(n, size = 2/3*n)')
robjects.r('test.set =setdiff(1:n, train.set)')
robjects.r('model = train(lrn, task, subset = train.set)')
robjects.r('pred = predict(model, task = task, subset = test.set)')
performance = robjects.r('performance(pred, measures = list(mmce, acc))')
print performance
Esempio n. 16
0
# bupaR using SVG animations.https://github.com/bupaverse/processanimateR
# Unleash the value of PROCESS MINING https://towardsdatascience.com/unleash-the-value-of-process-mining-4e3b5af4e9d8
# https://www.win.tue.nl/bpi/dorky.php?id=2012:challenge
# https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0207806

import rpy2.robjects.packages as rpackages
utils = rpackages.importr('utils')
utils.chooseCRANmirror(ind=1)  # select the first mirror in the list

#packnames = ('bupaR', 'eventdataR','edeaR','processmapR','processmonitR','xesreadR','petrinetR')
#from rpy2.robjects.vectors import StrVector
#utils.install_packages(StrVector(packnames))

from rpy2.robjects.packages import importr, data
eventdataR = importr('eventdataR')
patients = data(eventdataR).fetch('patients')[
    'patients']  #fetch dataset from library

process_map = importr("processmapR")
pm = process_map.process_map(patients)

# from rpy2.interactive import process_revents
# process_revents.start()
# grdevices = importr('grDevices')
# grdevices.png(file="file.png", width=512, height=512)
# rpy2.rinterface.NULL
# pm

import os

os.path.dirname(os.path.abspath('file.png'))
#'C:\\Windows\\system32'
Esempio n. 17
0
#%%
import cloudpickle as cp
from urllib.request import urlopen
pko = cp.load(urlopen('https://github.com/resourcesbookvisual/data/raw/master/demo.pkl'))

#%%
import pickle
from urllib.request import urlopen
linkRepo='https://github.com/resourcesbookvisual/data/'
linkDemo="raw/master/demo.pkl" # RDATA file!
fullLink=linkRepo+linkDemo

respk22 = pickle.load(urlopen(fullLink))
#%%
link="https://github.com/EvansDataScience/data/raw/master/crime.RData"

from numpy import *
import scipy as sp
from pandas import *
from rpy2.robjects.packages import importr
import rpy2.robjects as ro
import pandas.rpy.common as com

ro.r('load(link)')

#%%

from rpy2.robjects.packages import importr, data,oad
datasets = importr('datasets')
mtcars_env = data(datasets).fetch(fullLink)
mtcars = mtcars_env['crime']
Esempio n. 18
0
import unittest

# Try to load R dplyr package, and see if it works
from rpy2.rinterface import RRuntimeError
has_dplyr = None
try:
    from rpy2.robjects.lib import dplyr
    has_dplyr = True
except RRuntimeError:
    has_dplyr = False

from rpy2.robjects.packages import importr, data
datasets = importr('datasets')
mtcars = data(datasets).fetch('mtcars')['mtcars']

@unittest.skipUnless(has_dplyr, 'dplyr package not available in R')
class DplyrTestCase(unittest.TestCase):

    def testSetup(self):
        pass

    def tearDown(self):
        pass

    def testDataFrame(self):
        dataf = dplyr.DataFrame(mtcars)
        # FIXME: no testing much at the moment...
        self.assertTrue(isinstance(dataf, dplyr.DataFrame))

    def testFilter_NoFilter(self):
        dataf = dplyr.DataFrame(mtcars)
Esempio n. 19
0
    X3, Y3 = make_moons(n_samples=500, noise=0.1)
    optics3 = OPTICS(epsilon=0.85, MinPts=15)
    Ordered, ReachDist = optics3.fit(X3)
    optics3.plt_show(X3, Y3, ReachDist, Ordered, name=3)
    plt.savefig("/Volumes/PXWIN/datadepth/DepthbasedClustering/pic3.png")
'''
#flea = data(mclust).fetch('flea')['flea']
from rpy2 import robjects
from rpy2.robjects import Formula, Environment
from rpy2.robjects.vectors import IntVector, FloatVector
from rpy2.robjects.lib import grid
from rpy2.robjects.packages import importr, data
from rpy2.rinterface import RRuntimeError
import warnings

# The R 'print' function
rprint = robjects.globalenv.get("print")
stats = importr('stats')
grdevices = importr('grDevices')
base = importr('base')
datasets = importr('datasets')

grid.activate()
import math, datetime
import rpy2.robjects.lib.ggplot2 as ggplot2
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
base = importr('base')

mtcars = data(mclust).fetch('banknote')['banknote']
Esempio n. 20
0
datasets = importr('datasets')

grid.activate()

lattice = importr('lattice')


grdevices.png(file="test.png", width=512, height=512)
# plotting code here
#r = robjects.r

#x = robjects.IntVector(range(10))
#y = r.rnorm(10)
#r.plot(r.runif(10), y, xlab="runif", ylab="foo/bar", col="red")

tmpenv = data(datasets).fetch("volcano")
volcano = tmpenv["volcano"]

p = lattice.wireframe(volcano, shade = True,
                      zlab = "",
                      aspect = FloatVector((61.0/87, 0.4)),
                      light_source = IntVector((10,0,10)))
rprint(p)

grdevices.dev_off()


#r = robjects.r

#x = robjects.IntVector(range(10))
#y = r.rnorm(10)
Esempio n. 21
0
#-- setupxyplot-begin
xyplot = lattice.xyplot
#-- setupxyplot-end

#-- dataset-begin
rnorm = stats.rnorm
dataf_rnorm = robjects.DataFrame({'value': rnorm(300, mean=0) + rnorm(100, mean=3),
                                  'other_value': rnorm(300, mean=0) + rnorm(100, mean=3),
                                  'mean': IntVector([0, ]*300 + [3, ] * 100)})
#-- dataset-end

grdevices.png('../../_static/graphics_lattice_xyplot_1.png',
              width = 612, height = 612, antialias="subpixel", type="cairo")
#-- xyplot1-begin
datasets = importr('datasets')
mtcars = data(datasets).fetch('mtcars')['mtcars']
formula = Formula('mpg ~ wt')
formula.getenvironment()['mpg'] = mtcars.rx2('mpg')
formula.getenvironment()['wt'] = mtcars.rx2('wt')

p = lattice.xyplot(formula)
rprint(p)
#-- xyplot1-end
grdevices.dev_off()

grdevices.png('../../_static/graphics_lattice_xyplot_2.png',
    width = 612, height = 612, antialias="subpixel", type="cairo")
#-- xyplot2-begin
p = lattice.xyplot(formula, groups = mtcars.rx2('cyl'))
rprint(p)
#-- xyplot2-end
Esempio n. 22
0
    IntVector([
        0,
    ] * 300 + [
        3,
    ] * 100)
})
#-- dataset-end

grdevices.png('../../_static/graphics_lattice_xyplot_1.png',
              width=612,
              height=612,
              antialias="subpixel",
              type="cairo")
#-- xyplot1-begin
datasets = importr('datasets')
mtcars = data(datasets).fetch('mtcars')['mtcars']
formula = Formula('mpg ~ wt')
formula.getenvironment()['mpg'] = mtcars.rx2('mpg')
formula.getenvironment()['wt'] = mtcars.rx2('wt')

p = lattice.xyplot(formula)
rprint(p)
#-- xyplot1-end
grdevices.dev_off()

grdevices.png('../../_static/graphics_lattice_xyplot_2.png',
              width=612,
              height=612,
              antialias="subpixel",
              type="cairo")
#-- xyplot2-begin
Esempio n. 23
0
  def __init__(self):

    for name in dataset_names:
      onedataset = data(datasets).fetch(name)[name]
      self._data[name] = onedataset
from rpy2.robjects.packages import importr, data
import numpy as np

spep = importr("mlbench")
dataset = data(spep).fetch('PimaIndiansDiabetes2')

array = []
for k,v in dataset.items():
    print(v)
    array.append(np.array(v).astype(np.float64))

array = np.array(array).T.squeeze()
trimmed = []
for row in array:
    if np.all(np.isfinite(row)):
        trimmed.append(row)
trimmed = np.array(trimmed)

np.save("PimaIndiansDiabetes2", trimmed)
def get_insilico_symptom_descriptions():
    insilico_r = importr('InSilicoVA')
    as_data_frame = globalenv.get('as.data.frame')
    probbase = as_data_frame(data(insilico_r).fetch('probbase')['probbase'])
    return ri2py(probbase).set_index('INDIC.C.10')['QDESC.C.70'].to_dict()