def __fill_rpy2r__(self): """ Fill the attribute _rpy2r """ name = self.__rname__ for rname in self._env: if rname in self._translation: rpyname = self._translation[rname] else: dot_i = rname.find('.') if dot_i > -1: rpyname = rname.replace('.', '_') if rpyname in self._rpy2r: raise LibraryError(('Conflict in converting R symbol'+\ ' to a Python symbol ' +\ '(%s -> %s while there is already'+\ ' %s)') %(rname, rpyname, rpyname)) else: rpyname = rname if rpyname in self.__dict__ or rpyname == '__dict__': raise LibraryError('The symbol ' + rname +\ ' in the package ' + name + \ ' is conflicting with ' +\ 'a Python object attribute') self._rpy2r[rpyname] = rname rpyobj = conversion.ri2py(self._env[rname]) rpyobj.__rname__ = rname #FIXME: shouldn't the original R name be also in the __dict__ ? self.__dict__[rpyname] = rpyobj
def plot_clustering_agreement(target, source, env): args = source[-1].read() counts = sorted([(int(x.rstr().split("/")[-2]), os.path.splitext(x.rstr().split("/")[-1])[0], ri2py(numpy.load(x.rstr())).rx2("cluster")) for x in source[0:-1]]) features = {} gaps = [] for feat in set([x[1] for x in counts]): features[feat] = [x[2] for x in counts if x[1] == feat] print features.keys() data = numpy.empty(shape=(len(features), len(features.values()[0]), len(features))) print data.shape for r, featA in enumerate(features.keys()): for c in range(data.shape[1]): for l, featB in enumerate(features.keys()): vals = r_clv.std_ext(features[featA][c], features[featB][c]) data[r, c, l] = getattr(r_clv, "clv_%s" % args["TYPE"])(vals)[0] pyplot.figure(figsize=(15 * 2, 7 * len(features) + 1)) for i, (nameA, feat) in enumerate(zip(features.keys(), data)): pyplot.subplot(len(data) / 2 + 1, 2, i) for j, (nameB, vals) in enumerate(zip(features.keys(), feat.T)): if nameB != nameA: if j > 6: pyplot.plot(vals, label=nameB, ls="--") else: pyplot.plot(vals, label=nameB) pyplot.legend(prop={"size" : 7}, numpoints=2) pyplot.title(nameA) pyplot.xticks(range(0, data.shape[1], 2), [i + 2 for i in range(0, data.shape[1], 2)]) pyplot.xlabel("Clusters") pyplot.ylabel(args["TYPE"]) pyplot.savefig(target[0].rstr(), bbox_inches="tight") pyplot.cla() return None
def plot_clusterings(target, source, env): """ Plot items with clustering from first file, using 2-d coordinates from second file. The functions GET_COORDS and GET_CLUSTS specify operations to turn each file object into a mapping from item name to coordinate tuple or cluster number, respectively. """ pyplot.rcdefaults() pyplot.figure(figsize=(10, 10)) args = source[-1].read() # rcdefaults() clusts = dict(ri2py(eval("lambda x : %s" % args.get("GET_CLUSTERS", "x"))(numpy.load(source[0].rstr()))).rx2("cluster").iteritems()) coords = eval("lambda x : %s" % args.get("GET_COORDS", "x"))(numpy.load(source[1].rstr())) labels = coords.keys() #if args.get("NORMALIZE", False): # for i in [0, 1]: # ttcoords[:, i] = (ttcoords[:, i] - ttcoords[:, i].min()) / numpy.max(ttcoords[:, i] - ttcoords[:, i].min()) [pyplot.scatter(coords[l][0], coords[l][1], label=l, s=64, marker=shapes[clusts[l]], color=colors[clusts[l]]) for i, l in enumerate(labels)] ymin, ymax = pyplot.ylim() inc = (ymax - ymin) / 40.0 [pyplot.text(coords[l][0], coords[l][1] + inc, l, fontsize=12, ha="center") for i, l in enumerate(labels)] pyplot.xticks([], []) pyplot.yticks([], []) pyplot.xlabel("First principal component") pyplot.ylabel("Second principal component") pyplot.savefig(target[0].rstr(), bbox_inches="tight") pyplot.cla() return None
def colnames(self): """ Column names :rtype: SexpVector """ res = baseNameSpaceEnv["colnames"](self) return conversion.ri2py(res)
def rownames(self): """ Row names :rtype: SexpVector """ res = baseNameSpaceEnv["rownames"](self) return conversion.ri2py(res)
def __call__(self, *args, **kwargs): new_args = [conversion.py2ri(a) for a in args] new_kwargs = {} for k, v in kwargs.iteritems(): new_kwargs[k] = conversion.py2ri(v) res = super(RFunction, self).__call__(*new_args, **new_kwargs) res = conversion.ri2py(res) return res
def get(self, item): """ Get a object from its R name/symol :param item: string (name/symbol) :rtype: object (as returned by :func:`conversion.ri2py`) """ res = super(REnvironment, self).get(item) res = conversion.ri2py(res) return res
def conver(): import pandas as pd import rpy2.robjects as ro import rpy2.robjects.conversion as conversion from rpy2.robjects import pandas2ri pandas2ri.activate() R = ro.r df = conversion.ri2py(R['mtcars']) print(df.head())
def assign(self, index, value): """ Assign a given value to a given index position in the vector """ if not (isinstance(index, rlc.TaggedList) | \ isinstance(index, rlc.ArgsDict)): args = rlc.TaggedList([conversion.py2ro(index), ]) else: for i in xrange(len(index)): index[i] = conversion.py2ro(index[i]) args = index args.append(conversion.py2ro(value)) args.insert(0, self) res = r["[<-"].rcall(args.items()) res = conversion.ri2py(res) return res
def eval(x, envir=ri.globalenv): """ Evaluate R code. If the input object is an R expression it evaluates it directly, if it is a string it parses it before evaluating it. By default the evaluation is performed in R's global environment but a specific environment can be specified.""" if isinstance(x, str) or isinstance(x, unicode): p = _parse(x) else: p = x res = _reval(p, envir=envir) res = conversion.ri2py(res) return res
def eval(x, envir = ri.globalenv): """ Evaluate R code. If the input object is an R expression it evaluates it directly, if it is a string it parses it before evaluating it. By default the evaluation is performed in R's global environment but a specific environment can be specified.""" if isinstance(x, str) or isinstance(x, unicode): p = _parse(x) else: p = x res = _reval(p, envir = envir) res = conversion.ri2py(res) return res
def _load_and_predict(data): idx, base_estimator_name = data base_estimator_dir = join(models_dir, base_estimator_name) estimators = [] for afile in os.listdir(base_estimator_dir): path = join(base_estimator_dir, afile) with gzip.open(path, "rb") as fp: estimator = pickle.load(fp) if isinstance(estimator, ExternalREstimatorMixin): estimator.model_ = ri2py(estimator.model_) estimators.append(estimator) avg_estimator = EnsembleAverage(estimators, name=base_estimator_name) pred = avg_estimator.predict(X) if not isinstance(estimator, ExternalREstimatorMixin): pred = numpy.exp(pred) return idx, pred
def default_py2ri(o): """ Convert arbitrary Python object to :class:`rpy2.rinterface.Sexp` to objects, creating an R object with the content of the Python object in the process (wichi means data copying). :param o: object :rtype: :class:`rpy2.rinterface.Sexp` (and subclasses) """ if isinstance(o, RObject): res = rinterface.Sexp(o) if isinstance(o, rinterface.Sexp): res = o elif isinstance(o, array.array): if o.typecode in ('h', 'H', 'i', 'I'): res = rinterface.SexpVector(o, rinterface.INTSXP) elif o.typecode in ('f', 'd'): res = rinterface.SexpVector(o, rinterface.REALSXP) else: raise(ValueError("Nothing can be done for this array type at the moment.")) elif isinstance(o, bool): res = rinterface.SexpVector([o, ], rinterface.LGLSXP) elif isinstance(o, int): res = rinterface.SexpVector([o, ], rinterface.INTSXP) elif isinstance(o, float): res = rinterface.SexpVector([o, ], rinterface.REALSXP) elif isinstance(o, str): res = rinterface.SexpVector([o, ], rinterface.STRSXP) elif isinstance(o, unicode): res = rinterface.SexpVector([o, ], rinterface.STRSXP) elif isinstance(o, list): res = r.list(*[conversion.ri2py(conversion.py2ri(x)) for x in o]) elif isinstance(o, complex): res = rinterface.SexpVector([o, ], rinterface.CPLXSXP) else: raise(ValueError("Nothing can be done for the type %s at the moment." %(type(o)))) return res
def testDataFrameToNumpy(self): df = robjects.vectors.DataFrame(dict((('a', 1), ('b', 2)))) rec = conversion.ri2py(df) self.assertEqual(numpy.recarray, type(rec)) self.assertEqual(1, rec.a[0]) self.assertEqual(2, rec.b[0])
trainer_scanvi.labelled_set.to_monitor = ['accuracy'] trainer_scanvi.full_dataset.to_monitor = ['entropy_batch_mixing'] trainer_scanvi.train(n_epochs=n_epochs_scanvi) if i == 0: print("Score UMI->nonUMI:", trainer_scanvi.unlabelled_set.accuracy()) else: print("Score nonUMI->UMI:", trainer_scanvi.unlabelled_set.accuracy()) elif model_type == 'Seurat': from scvi.harmonization.clustering.seurat import SEURAT SEURAT = SEURAT() seurat1 = SEURAT.create_seurat(UMI, 0) seurat2 = SEURAT.create_seurat(nonUMI, 1) latent, batch_indices,labels = SEURAT.combine_seurat(seurat1, seurat2) numpy2ri.activate() latent = ri2py(latent) batch_indices = ri2py(batch_indices) labels = ri2py(labels) keys,labels = np.unique(labels,return_inverse=True) latent = np.array(latent) batch_indices = np.array(batch_indices) labels = np.array(labels) elif model_type == 'Combat': from scvi.harmonization.clustering.combat import COMBAT COMBAT = COMBAT() # corrected = COMBAT.combat_correct(gene_dataset) latent = COMBAT.combat_pca(gene_dataset) latent = latent.T batch_indices = np.concatenate(gene_dataset.batch_indices) labels = np.concatenate(gene_dataset.labels) keys = gene_dataset.cell_types
if not ok: raise LibraryError("The R package %s could not be imported" %name) env = _as_env(rinterface.StrSexpVector(['package:'+name, ])) if signature_translation: pack = SignatureTranslatedPackage(env, name, translation = robject_translations) else: pack = Package(env, name, translation = robject_translations) return pack def wherefrom(symbol, startenv = rinterface.globalenv): """ For a given symbol, return the environment this symbol is first found in, starting from 'startenv' """ env = startenv obj = None tryagain = True while tryagain: try: obj = env[symbol] tryagain = False except LookupError, knf: env = env.enclos() if env.rsame(rinterface.emptyenv): tryagain = False else: tryagain = True return conversion.ri2py(env)
#fetching R datasets to use in python import rpy2.robjects as ro import rpy2.robjects.conversion as conversion from rpy2.robjects import pandas2ri pandas2ri.activate() import numpy as np import matplotlib.pyplot as plt import matplotlib.cm as cm %matplotlib inline R = ro.r df = conversion.ri2py(R['mtcars']) print(df.head()) from sklearn.cluster import KMeans df.columns #get data X = df[['mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']] #import standard Scaler to scale the data from sklearn.preprocessing import StandardScaler scaler = StandardScaler() X_scaled = scaler.fit_transform( X ) #set the number of cluser range assumption
def getenvironment(self): """ Get the environment in which the formula is finding its symbols.""" res = self.do_slot(".Environment") res = conversion.ri2py(res) return res
def __getitem__(self, item): res = rinterface.globalEnv.get(item) res = conversion.ri2py(res) return res
def r2py(results, p_col=None): tbl = ri2py(results) tbl = tbl.rename(columns=reformat_name) if p_col: tbl['signif'] = tbl[reformat_name(p_col)].apply(pval) return tbl
def __getattr__(self, attr): res = self.do_slot(attr) res = conversion.ri2py(res) return res
def __getitem__(self, item): res = super(REnvironment, self).__getitem__(item) res = conversion.ri2py(res) return res
def __getitem__(self, item): res = rinterface.globalEnv.get(item) res = conversion.ri2py(res) return res #FIXME: check that this is properly working def __cleanup__(self): rinterface.endEmbeddedR() del(self) def __str__(self): s = super(R, self).__str__() s += os.linesep version = self["version"] tmp = [n+': '+val[0] for n, val in itertools.izip(version.getnames(), version)] s += str.join(os.linesep, tmp) return s def __call__(self, string): p = self.parse(text=string) res = self.eval(p) return res r = R() globalEnv = conversion.ri2py(rinterface.globalEnv) baseNameSpaceEnv = conversion.ri2py(rinterface.baseNameSpaceEnv) emptyEnv = conversion.ri2py(rinterface.emptyEnv)
def __getitem__(self, i): res = super(RVector, self).__getitem__(i) if isinstance(res, rinterface.Sexp): res = conversion.ri2py(res) return res
def testDataFrameToNumpy(self): df = robjects.vectors.DataFrame(dict((('a', 1), ('b', 2)))) rec = conversion.ri2py(df) self.assertEqual(numpy.recarray, type(rec)) self.assertEqual(1, rec.a[0]) self.assertEqual(2, rec.b[0])
def getdim(self): res = r.dim(self) res = conversion.ri2py(res) return res