def run(self, data_object): """Read canned dataset from R to a pandas dataframe Returns: data_object (DataObject): DataObject instance terminate (bool): should we terminate the DAG? true or false """ dataset = self.node_config["dataset"] logging.info("Reading {} from R".format(dataset)) try: from rpy2.robjects.packages import importr, data except ImportError: # pragma: no cover raise ImportError( "This example needs Rpy2." "Please refer to the R requirements in the README" ) datasets = importr("datasets") r_env = data(datasets).fetch(dataset) import rpy2.robjects as robjects # why we do this: # > data(euro) # > euro # ATS BEF DEM ESP FIM FRF IEP ITL LUF NLG PTE # 13.760300 40.339900 1.955830 166.386000 5.945730 6.559570 0.787564 1936.270000 40.339900 2.203710 200.482000 # # > as.data.frame(euro) # euro # ATS 13.760300 # BEF 40.339900 # DEM 1.955830 data = robjects.r("as.data.frame(%s)" % dataset) # at time of writing, rpy2's R dataframe to pandas dataframe was not fully supported # However, as python list() seems to work for FloatVector, StrVector, and FactorVector, let's use it from rpy2.robjects import r colnames = r.colnames(data) pandas_data = {} # convert each column of the R dataframe in turn for i, colname in enumerate(colnames): pandas_data[colname] = list(data[i]) # Unfortunately, some datasets have rownames that should be an ID column (e.g., see mtcars where rownames=names of the cars). # This is the best we can do: pull it out as an additional column for each and every dataset pandas_data["row_names"] = list(data.rownames) df = pd.DataFrame(pandas_data) data_object.add(self, df) terminate = df.empty return data_object, terminate
def _read_data(self): """ Activate R to Pandas, import package from R and extract dataset. convert to pandas dataframe Input: None Output: CreditCard data read to dataframe in Object """ pandas2ri.activate() aer = importr('AER') credcard = data(aer).fetch('CreditCard') self.df = pandas2ri.ri2py(credcard['CreditCard'])
def import_useeior_mastercrosswalk(): """ Load USEEIOR's MasterCrosswalk that links BEA data to NAICS :return: """ pandas2ri.activate() # import the useeior package (r package) useeior = packages.importr('useeior') # load the .Rd file for cw = packages.data(useeior).fetch( 'MasterCrosswalk2012')['MasterCrosswalk2012'] return cw
def import_useeior_mastercrosswalk(): """ Load USEEIOR's MasterCrosswalk that links BEA data to NAICS :return: """ pandas2ri.activate() # import the useeior package (r package) useeior = packages.importr('useeior') # load the .Rd file for cw = packages.data(useeior).fetch( 'MasterCrosswalk2012')['MasterCrosswalk2012'] # save as csv cw.to_csv(datapath + "NAICS_to_BEA_Crosswalk.csv", index=False)
def build(self): ##print grdevices.palette() if self.spec['type'] == 'csv' : df = robjects.DataFrame.from_csvfile('./data/' + self.spec['name'] + '.csv') else : print type(self.spec['name']) samplename = self.spec['name'].encode('ascii','ignore') df = data(datasets).fetch(samplename)[samplename] #print df grdevices.png(file=self.sfilename, width=700, height=400) pp = ggplot2.ggplot(df) ppargs = {} if len(self.spec['viz[xaxis]']) != 0 : ppargs['x'] = self.spec['viz[xaxis]'] if len(self.spec['viz[yaxis]']) != 0 : ppargs['y'] = self.spec['viz[yaxis]'] if len(self.spec['viz[color]']) != 0 : ppargs['colour'] = self.spec['viz[color]'] if len(self.spec['viz[shape]']) != 0 : ppargs['shape'] = self.spec['viz[shape]'] player1 = self.spec['viz[layer1]'] if len(self.spec['viz[layer1]']) != 0 else None player2 = self.spec['viz[layer2]'] if len(self.spec['viz[layer2]']) != 0 else None pp = pp + ggplot2.aes_string(**ppargs) ##pp = pp + ggplot2.geom_bar(stat="identity", fill="white", colour="darkgreen") ##pp = pp + ggplot2.scale_fill_brewer(palette="blues") ##pp = pp + ggplot2.geom_point() pp = pp + ggplot2.geom_point(size=5) pp.plot() grdevices.dev_off() return self.cfilename
import pytest from rpy2.robjects import packages try: from rpy2.robjects.lib import tidyr has_tidyr = True msg = '' except packages.PackageNotInstalledError as error: has_tidyr = False msg = str(error) from rpy2 import rinterface from rpy2.robjects import vectors datasets = packages.importr('datasets') mtcars = packages.data(datasets).fetch('mtcars')['mtcars'] @pytest.mark.skipif(not has_tidyr, reason=msg) class TestTidyr(object): def test_dataframe(self): dataf = tidyr.DataFrame({ 'x': vectors.IntVector((1, 2, 3, 4, 5)), 'labels': vectors.StrVector(('a', 'b', 'b', 'b', 'a')) }) assert isinstance(dataf, tidyr.DataFrame) assert sorted(['x', 'labels']) == sorted(list(dataf.colnames)) def test_spread(self):
W_3 = cur_param[(2*J+J*K+K):(2*J+J*K+2*K)] b_3 = cur_param[2*J+J*K+2*K] nn = neural_net(X,W_1,b_1,W_2,b_2,W_3,b_3) est_q[i,] = np.reshape((nn+1)*(max_y-min_y)/2+min_y,(len(X),)) est_q = np.mean(est_q, axis = 0) plt.plot(X_s,y_s,'k.') plt.plot(X_s,q_true,'r-',label='True') plt.plot(X_s,est_q,'b-',label='Estimate') plt.legend() utils = importr("utils") utils.install_packages("MASS") MASS = importr("MASS") motor = data(MASS).fetch('mcycle')['mcycle'] motor = pandas2ri.ri2py(motor) plt.plot('times', 'accel', '.', data = motor) plt.xlabel('Time') plt.ylabel('Acceleration') X = np.array(motor['times']) X = np.reshape(X,(len(X),1)) y = np.array(motor['accel']) y = np.reshape(y,(len(y),1)) max_X = np.max(X) min_X = np.min(X) max_y = np.max(y) min_y = np.min(y)
def get_brms_data(dataset_name:str): "A helper function for importing different datasets included in brms." with localconverter(default_converter + pandas2ri.converter + numpy2ri.converter) as cv: return pd.DataFrame(rpackages.data(brms).fetch(dataset_name)[dataset_name])
def __init__(self): for name in dataset_names: onedataset = data(datasets).fetch(name)[name] self._data[name] = onedataset
import pytest import pandas as pd from anndata import AnnData from rpy2.robjects import r from rpy2.robjects.packages import importr, data import anndata2ri from anndata2ri.test_utils import conversions_rpy2py se = importr("SummarizedExperiment") sce = importr("SingleCellExperiment") sc_rna_seq_data = data(importr("scRNAseq")) as_ = getattr(importr("methods"), "as") def check_allen(adata): assert adata.uns.keys() == {"SuppInfo", "which_qc"} assert set(adata.obs.keys()) > { "NREADS", "NALIGNED", "Animal.ID", "passes_qc_checks_s" } def check_example(adata): assert set(adata.obsm.keys()) == {"X_pca", "X_tsne"} assert adata.obsm["X_pca"].shape == (100, 5) sumex_allen = sc_rna_seq_data.fetch("allen")["allen"] code_example = """ local({ ncells <- 100
import pytest from rpy2.robjects.packages import importr, data, PackageNotInstalledError try: from rpy2.robjects.lib import dplyr has_dplyr = True msg = '' except PackageNotInstalledError as error: has_dplyr = False msg = str(error) datasets = importr('datasets') mtcars = data(datasets).fetch('mtcars')['mtcars'] @pytest.mark.skipif(not has_dplyr, reason=msg) class TestDplyr(object): def test_dataframe(self): dataf = dplyr.DataFrame(mtcars) # FIXME: no testing much at the moment... assert isinstance(dataf, dplyr.DataFrame) def test_filter_nofilter_method(self): dataf = dplyr.DataFrame(mtcars) dataf_filter = dataf.filter() assert dataf.nrow == dataf_filter.nrow def test_filter_nofilter_function(self): dataf = dplyr.DataFrame(mtcars) dataf_filter = dplyr.filter(dataf) assert dataf.nrow == dataf_filter.nrow
@staticmethod def from_formula(formula, data=rinterface.MissingArg, family=rinterface.MissingArg, subset=rinterface.MissingArg, weights=rinterface.MissingArg): """ Build an LmList from a formula """ res = LmList._lmfit_from_formula(formula, data=data, family=family, subset=subset, weights=weights) res = LmList(res) return res #-- LmList-end #-- buildLmList-begin sleepstudy = data(lme4).fetch('sleepstudy')['sleepstudy'] formula = robjects.Formula('Reaction ~ Days | Subject') lml1 = LmList.from_formula(formula, data=sleepstudy) #-- buildLmList-end #-- buildLmListBetterCall-begin sleepstudy = data(lme4).fetch('sleepstudy')['sleepstudy'] formula = robjects.Formula('Reaction ~ Days | Subject') lml1 = LmList.from_formula(formula, data=sleepstudy) #-- buildLmListBetterCall-end
try: from rpy2.robjects.numpy2ri import numpy2ri, ri2py except ImportError: from rpy2.robjects.numpy2ri import numpy2rpy as numpy2ri from rpy2.robjects.numpy2ri import rpy2py as ri2py #from rpy2.robjects.numpy2ri import numpy2ri, ri2py from rpy2.robjects.packages import importr, data import openturns as ot # Require stats = importr("stats") faraway = importr("faraway") savings_data = data(faraway).fetch("savings")["savings"] #data(faraway).fetch("savings")["savings"] # Model 1 : 2 param, non intercept sr = ri2py(savings_data)["sr"] r.assign('sr', numpy2ri(sr)) pop15 = ri2py(savings_data)["pop15"] r.assign('pop15', numpy2ri(pop15)) pop75 = ri2py(savings_data)["pop75"] r.assign('pop75', numpy2ri(pop75)) formula = Formula('sr ~ pop75 + pop15 - 1') fit = stats.lm(formula) summary = stats.summary_lm(fit) """ list(summary.names) provides
from rpy2.robjects.vectors import DataFrame from rpy2.robjects.packages import importr, data # this shit works like a f*****g charm! r_base = importr('base') datasets = importr('datasets') faithful_data = data(datasets).fetch('faithful')['faithful'] edsummary = r_base.summary(faithful_data.rx2("eruptions")) for k, v in edsummary.items(): print("%s: %.3f\n" %(k, v)) graphics = importr('graphics') print("Stem-and-leaf plot of Old Faithful eruption duration data") graphics.stem(faithful_data.rx2("eruptions")) grdevices = importr('grDevices') stats = importr('stats') grdevices.png('faithful_histogram.png', width = 733, height = 550) ed = faithful_data.rx2("eruptions") graphics.hist(ed, r_base.seq(1.6, 5.2, 0.2), prob = True, col = "lightblue", main = "Old Faithful eruptions", xlab = "Eruption duration (seconds)") graphics.lines(stats.density(ed,bw=0.1), col = "orange") graphics.rug(ed) grdevices.dev_off() from rpy2.robjects.vectors import FloatVector long_ed = FloatVector([x for x in ed if x > 3])
if not have_tutorial_packages: # import R's utility package utils = importr('utils') # select a mirror for R packages utils.chooseCRANmirror(ind=1) # select the first mirror in the list # R vector of strings from rpy2.robjects.vectors import StrVector # file packnames_to_install = [x for x in packnames if not isinstalled(x)] if len(packnames_to_install) > 0: utils.install_packages(StrVector(packnames_to_install)) # pi = robjects.r['pi'] # print(pi[0]) mlr = importr('mlr') datasets = importr('datasets') iris_env = data(datasets).fetch('iris') iris = iris_env['iris'] # print(iris) robjects.r('task = makeClassifTask(data = iris, target = "Species")') robjects.r('lrn = makeLearner("classif.lda")') robjects.r('n = nrow(iris)') robjects.r('train.set = sample(n, size = 2/3*n)') robjects.r('test.set =setdiff(1:n, train.set)') robjects.r('model = train(lrn, task, subset = train.set)') robjects.r('pred = predict(model, task = task, subset = test.set)') performance = robjects.r('performance(pred, measures = list(mmce, acc))') print performance
# bupaR using SVG animations.https://github.com/bupaverse/processanimateR # Unleash the value of PROCESS MINING https://towardsdatascience.com/unleash-the-value-of-process-mining-4e3b5af4e9d8 # https://www.win.tue.nl/bpi/dorky.php?id=2012:challenge # https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0207806 import rpy2.robjects.packages as rpackages utils = rpackages.importr('utils') utils.chooseCRANmirror(ind=1) # select the first mirror in the list #packnames = ('bupaR', 'eventdataR','edeaR','processmapR','processmonitR','xesreadR','petrinetR') #from rpy2.robjects.vectors import StrVector #utils.install_packages(StrVector(packnames)) from rpy2.robjects.packages import importr, data eventdataR = importr('eventdataR') patients = data(eventdataR).fetch('patients')[ 'patients'] #fetch dataset from library process_map = importr("processmapR") pm = process_map.process_map(patients) # from rpy2.interactive import process_revents # process_revents.start() # grdevices = importr('grDevices') # grdevices.png(file="file.png", width=512, height=512) # rpy2.rinterface.NULL # pm import os os.path.dirname(os.path.abspath('file.png')) #'C:\\Windows\\system32'
#%% import cloudpickle as cp from urllib.request import urlopen pko = cp.load(urlopen('https://github.com/resourcesbookvisual/data/raw/master/demo.pkl')) #%% import pickle from urllib.request import urlopen linkRepo='https://github.com/resourcesbookvisual/data/' linkDemo="raw/master/demo.pkl" # RDATA file! fullLink=linkRepo+linkDemo respk22 = pickle.load(urlopen(fullLink)) #%% link="https://github.com/EvansDataScience/data/raw/master/crime.RData" from numpy import * import scipy as sp from pandas import * from rpy2.robjects.packages import importr import rpy2.robjects as ro import pandas.rpy.common as com ro.r('load(link)') #%% from rpy2.robjects.packages import importr, data,oad datasets = importr('datasets') mtcars_env = data(datasets).fetch(fullLink) mtcars = mtcars_env['crime']
import unittest # Try to load R dplyr package, and see if it works from rpy2.rinterface import RRuntimeError has_dplyr = None try: from rpy2.robjects.lib import dplyr has_dplyr = True except RRuntimeError: has_dplyr = False from rpy2.robjects.packages import importr, data datasets = importr('datasets') mtcars = data(datasets).fetch('mtcars')['mtcars'] @unittest.skipUnless(has_dplyr, 'dplyr package not available in R') class DplyrTestCase(unittest.TestCase): def testSetup(self): pass def tearDown(self): pass def testDataFrame(self): dataf = dplyr.DataFrame(mtcars) # FIXME: no testing much at the moment... self.assertTrue(isinstance(dataf, dplyr.DataFrame)) def testFilter_NoFilter(self): dataf = dplyr.DataFrame(mtcars)
X3, Y3 = make_moons(n_samples=500, noise=0.1) optics3 = OPTICS(epsilon=0.85, MinPts=15) Ordered, ReachDist = optics3.fit(X3) optics3.plt_show(X3, Y3, ReachDist, Ordered, name=3) plt.savefig("/Volumes/PXWIN/datadepth/DepthbasedClustering/pic3.png") ''' #flea = data(mclust).fetch('flea')['flea'] from rpy2 import robjects from rpy2.robjects import Formula, Environment from rpy2.robjects.vectors import IntVector, FloatVector from rpy2.robjects.lib import grid from rpy2.robjects.packages import importr, data from rpy2.rinterface import RRuntimeError import warnings # The R 'print' function rprint = robjects.globalenv.get("print") stats = importr('stats') grdevices = importr('grDevices') base = importr('base') datasets = importr('datasets') grid.activate() import math, datetime import rpy2.robjects.lib.ggplot2 as ggplot2 import rpy2.robjects as ro from rpy2.robjects.packages import importr base = importr('base') mtcars = data(mclust).fetch('banknote')['banknote']
datasets = importr('datasets') grid.activate() lattice = importr('lattice') grdevices.png(file="test.png", width=512, height=512) # plotting code here #r = robjects.r #x = robjects.IntVector(range(10)) #y = r.rnorm(10) #r.plot(r.runif(10), y, xlab="runif", ylab="foo/bar", col="red") tmpenv = data(datasets).fetch("volcano") volcano = tmpenv["volcano"] p = lattice.wireframe(volcano, shade = True, zlab = "", aspect = FloatVector((61.0/87, 0.4)), light_source = IntVector((10,0,10))) rprint(p) grdevices.dev_off() #r = robjects.r #x = robjects.IntVector(range(10)) #y = r.rnorm(10)
#-- setupxyplot-begin xyplot = lattice.xyplot #-- setupxyplot-end #-- dataset-begin rnorm = stats.rnorm dataf_rnorm = robjects.DataFrame({'value': rnorm(300, mean=0) + rnorm(100, mean=3), 'other_value': rnorm(300, mean=0) + rnorm(100, mean=3), 'mean': IntVector([0, ]*300 + [3, ] * 100)}) #-- dataset-end grdevices.png('../../_static/graphics_lattice_xyplot_1.png', width = 612, height = 612, antialias="subpixel", type="cairo") #-- xyplot1-begin datasets = importr('datasets') mtcars = data(datasets).fetch('mtcars')['mtcars'] formula = Formula('mpg ~ wt') formula.getenvironment()['mpg'] = mtcars.rx2('mpg') formula.getenvironment()['wt'] = mtcars.rx2('wt') p = lattice.xyplot(formula) rprint(p) #-- xyplot1-end grdevices.dev_off() grdevices.png('../../_static/graphics_lattice_xyplot_2.png', width = 612, height = 612, antialias="subpixel", type="cairo") #-- xyplot2-begin p = lattice.xyplot(formula, groups = mtcars.rx2('cyl')) rprint(p) #-- xyplot2-end
IntVector([ 0, ] * 300 + [ 3, ] * 100) }) #-- dataset-end grdevices.png('../../_static/graphics_lattice_xyplot_1.png', width=612, height=612, antialias="subpixel", type="cairo") #-- xyplot1-begin datasets = importr('datasets') mtcars = data(datasets).fetch('mtcars')['mtcars'] formula = Formula('mpg ~ wt') formula.getenvironment()['mpg'] = mtcars.rx2('mpg') formula.getenvironment()['wt'] = mtcars.rx2('wt') p = lattice.xyplot(formula) rprint(p) #-- xyplot1-end grdevices.dev_off() grdevices.png('../../_static/graphics_lattice_xyplot_2.png', width=612, height=612, antialias="subpixel", type="cairo") #-- xyplot2-begin
from rpy2.robjects.packages import importr, data import numpy as np spep = importr("mlbench") dataset = data(spep).fetch('PimaIndiansDiabetes2') array = [] for k,v in dataset.items(): print(v) array.append(np.array(v).astype(np.float64)) array = np.array(array).T.squeeze() trimmed = [] for row in array: if np.all(np.isfinite(row)): trimmed.append(row) trimmed = np.array(trimmed) np.save("PimaIndiansDiabetes2", trimmed)
def get_insilico_symptom_descriptions(): insilico_r = importr('InSilicoVA') as_data_frame = globalenv.get('as.data.frame') probbase = as_data_frame(data(insilico_r).fetch('probbase')['probbase']) return ri2py(probbase).set_index('INDIC.C.10')['QDESC.C.70'].to_dict()