Beispiel #1
0
 def __fill_rpy2r__(self):
     """ Fill the attribute _rpy2r """
     name = self.__rname__
     for rname in self._env:
         if rname in self._translation:
             rpyname = self._translation[rname]
         else:
             dot_i = rname.find('.')
             if dot_i > -1:
                 rpyname = rname.replace('.', '_')
                 if rpyname in self._rpy2r:
                     raise LibraryError(('Conflict in converting R symbol'+\
                                         ' to a Python symbol ' +\
                                         '(%s -> %s while there is already'+\
                                         ' %s)') %(rname, rpyname,
                                                   rpyname))
             else:
                 rpyname = rname
             if rpyname in self.__dict__ or rpyname == '__dict__':
                 raise LibraryError('The symbol ' + rname +\
                                    ' in the package ' + name + \
                                    ' is conflicting with ' +\
                                    'a Python object attribute')
         self._rpy2r[rpyname] = rname
         rpyobj = conversion.ri2py(self._env[rname])
         rpyobj.__rname__ = rname
         #FIXME: shouldn't the original R name be also in the __dict__ ?
         self.__dict__[rpyname] = rpyobj
Beispiel #2
0
def plot_clustering_agreement(target, source, env):
    args = source[-1].read()
    counts = sorted([(int(x.rstr().split("/")[-2]),
                      os.path.splitext(x.rstr().split("/")[-1])[0],
                      ri2py(numpy.load(x.rstr())).rx2("cluster")) for x in source[0:-1]])
    features = {}
    gaps = []
    for feat in set([x[1] for x in counts]):
        features[feat] = [x[2] for x in counts if x[1] == feat]
    print features.keys()
    data = numpy.empty(shape=(len(features), len(features.values()[0]), len(features)))
    print data.shape
    for r, featA in enumerate(features.keys()):
        for c in range(data.shape[1]):
            for l, featB in enumerate(features.keys()):
                vals = r_clv.std_ext(features[featA][c], features[featB][c])
                data[r, c, l] = getattr(r_clv, "clv_%s" % args["TYPE"])(vals)[0]
    pyplot.figure(figsize=(15 * 2, 7 * len(features) + 1))
    for i, (nameA, feat) in enumerate(zip(features.keys(), data)):
        pyplot.subplot(len(data) / 2 + 1, 2, i)
        for j, (nameB, vals) in enumerate(zip(features.keys(), feat.T)):
            if nameB != nameA:
                if j > 6:
                    pyplot.plot(vals, label=nameB, ls="--")
                else:
                    pyplot.plot(vals, label=nameB)
        pyplot.legend(prop={"size" : 7}, numpoints=2)
        pyplot.title(nameA)
        pyplot.xticks(range(0, data.shape[1], 2), [i + 2 for i in range(0, data.shape[1], 2)])
        pyplot.xlabel("Clusters")
        pyplot.ylabel(args["TYPE"])
    pyplot.savefig(target[0].rstr(), bbox_inches="tight")
    pyplot.cla()
    return None
Beispiel #3
0
def plot_clusterings(target, source, env):
    """
    Plot items with clustering from first file, using 2-d coordinates from second file.
    The functions GET_COORDS and GET_CLUSTS specify operations to turn each file object
    into a mapping from item name to coordinate tuple or cluster number, respectively.
    """
    pyplot.rcdefaults()
    pyplot.figure(figsize=(10, 10))
    args = source[-1].read()
    # rcdefaults()
    clusts = dict(ri2py(eval("lambda x : %s" % args.get("GET_CLUSTERS", "x"))(numpy.load(source[0].rstr()))).rx2("cluster").iteritems())
    coords = eval("lambda x : %s" % args.get("GET_COORDS", "x"))(numpy.load(source[1].rstr()))
    labels = coords.keys()
    #if args.get("NORMALIZE", False):
    #    for i in [0, 1]:
    #        ttcoords[:, i] = (ttcoords[:, i] - ttcoords[:, i].min()) / numpy.max(ttcoords[:, i] - ttcoords[:, i].min())
    

    [pyplot.scatter(coords[l][0], coords[l][1], label=l, s=64, marker=shapes[clusts[l]], color=colors[clusts[l]]) for i, l in enumerate(labels)]
    ymin, ymax = pyplot.ylim()
    inc = (ymax - ymin) / 40.0

    [pyplot.text(coords[l][0], coords[l][1] + inc, l, fontsize=12, ha="center") for i, l in enumerate(labels)]
    pyplot.xticks([], [])
    pyplot.yticks([], [])
    pyplot.xlabel("First principal component")
    pyplot.ylabel("Second principal component")
    pyplot.savefig(target[0].rstr(), bbox_inches="tight")
    pyplot.cla()
    return None
Beispiel #4
0
    def colnames(self):
        """ Column names

        :rtype: SexpVector
        """
        res = baseNameSpaceEnv["colnames"](self)
        return conversion.ri2py(res)
Beispiel #5
0
 def rownames(self):
     """ Row names
     
     :rtype: SexpVector
     """
     res = baseNameSpaceEnv["rownames"](self)
     return conversion.ri2py(res)
Beispiel #6
0
    def __call__(self, *args, **kwargs):
        new_args = [conversion.py2ri(a) for a in args]
	new_kwargs = {}
        for k, v in kwargs.iteritems():
            new_kwargs[k] = conversion.py2ri(v)
        res = super(RFunction, self).__call__(*new_args, **new_kwargs)
        res = conversion.ri2py(res)
        return res
Beispiel #7
0
 def get(self, item):
     """ Get a object from its R name/symol
     :param item: string (name/symbol)
     :rtype: object (as returned by :func:`conversion.ri2py`)
     """
     res = super(REnvironment, self).get(item)
     res = conversion.ri2py(res)
     return res
Beispiel #8
0
def conver():
    import pandas as pd
    import rpy2.robjects as ro
    import rpy2.robjects.conversion as conversion
    from rpy2.robjects import pandas2ri
    pandas2ri.activate()

    R = ro.r

    df = conversion.ri2py(R['mtcars'])
    print(df.head())
Beispiel #9
0
 def assign(self, index, value):
     """ Assign a given value to a given index position in the vector """
     if not (isinstance(index, rlc.TaggedList) | \
                 isinstance(index, rlc.ArgsDict)):
         args = rlc.TaggedList([conversion.py2ro(index), ])
     else:
         for i in xrange(len(index)):
             index[i] = conversion.py2ro(index[i])
         args = index
     args.append(conversion.py2ro(value))
     args.insert(0, self)
     res = r["[<-"].rcall(args.items())
     res = conversion.ri2py(res)
     return res
Beispiel #10
0
def eval(x, envir=ri.globalenv):
    """ Evaluate R code. If the input object is an R expression it
    evaluates it directly, if it is a string it parses it before 
    evaluating it.

    By default the evaluation is performed in R's global environment
    but a specific environment can be specified."""
    if isinstance(x, str) or isinstance(x, unicode):
        p = _parse(x)
    else:
        p = x
    res = _reval(p, envir=envir)
    res = conversion.ri2py(res)
    return res
Beispiel #11
0
def eval(x, envir = ri.globalenv):
    """ Evaluate R code. If the input object is an R expression it
    evaluates it directly, if it is a string it parses it before 
    evaluating it.

    By default the evaluation is performed in R's global environment
    but a specific environment can be specified."""
    if isinstance(x, str) or isinstance(x, unicode):
        p = _parse(x)
    else:
        p = x
    res = _reval(p, envir = envir)
    res = conversion.ri2py(res)
    return res
Beispiel #12
0
def _load_and_predict(data):
    idx, base_estimator_name = data
    base_estimator_dir = join(models_dir, base_estimator_name)
    estimators = []
    for afile in os.listdir(base_estimator_dir):
        path = join(base_estimator_dir, afile)
        with gzip.open(path, "rb") as fp:
            estimator = pickle.load(fp)
            if isinstance(estimator, ExternalREstimatorMixin):
                estimator.model_ = ri2py(estimator.model_)
        estimators.append(estimator)

    avg_estimator = EnsembleAverage(estimators, name=base_estimator_name)
    pred = avg_estimator.predict(X)
    if not isinstance(estimator, ExternalREstimatorMixin):
        pred = numpy.exp(pred)

    return idx, pred
Beispiel #13
0
def default_py2ri(o):
    """ Convert arbitrary Python object to :class:`rpy2.rinterface.Sexp` to objects,
    creating an R object with the content of the Python object in the process
    (wichi means data copying).

    :param o: object
    :rtype: :class:`rpy2.rinterface.Sexp` (and subclasses)

    """
    if isinstance(o, RObject):
        res = rinterface.Sexp(o)
    if isinstance(o, rinterface.Sexp):
        res = o
    elif isinstance(o, array.array):
        if o.typecode in ('h', 'H', 'i', 'I'):
            res = rinterface.SexpVector(o, rinterface.INTSXP)
        elif o.typecode in ('f', 'd'):
            res = rinterface.SexpVector(o, rinterface.REALSXP)
        else:
            raise(ValueError("Nothing can be done for this array type at the moment."))
    elif isinstance(o, bool):
        res = rinterface.SexpVector([o, ], rinterface.LGLSXP)
    elif isinstance(o, int):
        res = rinterface.SexpVector([o, ], rinterface.INTSXP)
    elif isinstance(o, float):
        res = rinterface.SexpVector([o, ], rinterface.REALSXP)
    elif isinstance(o, str):
        res = rinterface.SexpVector([o, ], rinterface.STRSXP)
    elif isinstance(o, unicode):
        res = rinterface.SexpVector([o, ], rinterface.STRSXP)
    elif isinstance(o, list):
        res = r.list(*[conversion.ri2py(conversion.py2ri(x)) for x in o])
    elif isinstance(o, complex):
        res = rinterface.SexpVector([o, ], rinterface.CPLXSXP)
    else:
        raise(ValueError("Nothing can be done for the type %s at the moment." %(type(o))))
    return res
 def testDataFrameToNumpy(self):
     df = robjects.vectors.DataFrame(dict((('a', 1), ('b', 2))))
     rec = conversion.ri2py(df)
     self.assertEqual(numpy.recarray, type(rec))
     self.assertEqual(1, rec.a[0])
     self.assertEqual(2, rec.b[0])
Beispiel #15
0
            trainer_scanvi.labelled_set.to_monitor = ['accuracy']
            trainer_scanvi.full_dataset.to_monitor = ['entropy_batch_mixing']
            trainer_scanvi.train(n_epochs=n_epochs_scanvi)

            if i == 0:
                print("Score UMI->nonUMI:", trainer_scanvi.unlabelled_set.accuracy())
            else:
                print("Score nonUMI->UMI:", trainer_scanvi.unlabelled_set.accuracy())
    elif model_type == 'Seurat':
        from scvi.harmonization.clustering.seurat import SEURAT
        SEURAT = SEURAT()
        seurat1 = SEURAT.create_seurat(UMI, 0)
        seurat2 = SEURAT.create_seurat(nonUMI, 1)
        latent, batch_indices,labels = SEURAT.combine_seurat(seurat1, seurat2)
        numpy2ri.activate()
        latent  = ri2py(latent)
        batch_indices  = ri2py(batch_indices)
        labels  = ri2py(labels)
        keys,labels = np.unique(labels,return_inverse=True)
        latent  = np.array(latent)
        batch_indices  = np.array(batch_indices)
        labels = np.array(labels)
    elif model_type == 'Combat':
        from scvi.harmonization.clustering.combat import COMBAT
        COMBAT = COMBAT()
    # corrected = COMBAT.combat_correct(gene_dataset)
        latent = COMBAT.combat_pca(gene_dataset)
        latent = latent.T
        batch_indices = np.concatenate(gene_dataset.batch_indices)
        labels = np.concatenate(gene_dataset.labels)
        keys = gene_dataset.cell_types
Beispiel #16
0
    if not ok:
        raise LibraryError("The R package %s could not be imported" %name)
    env = _as_env(rinterface.StrSexpVector(['package:'+name, ]))
    if signature_translation:
        pack = SignatureTranslatedPackage(env, name, 
                                          translation = robject_translations)
    else:
        pack = Package(env, name, translation = robject_translations)
        
    return pack


def wherefrom(symbol, startenv = rinterface.globalenv):
    """ For a given symbol, return the environment
    this symbol is first found in, starting from 'startenv'
    """
    env = startenv
    obj = None
    tryagain = True
    while tryagain:
        try:
            obj = env[symbol]
            tryagain = False
        except LookupError, knf:
            env = env.enclos()
            if env.rsame(rinterface.emptyenv):
                tryagain = False
            else:
                tryagain = True
    return conversion.ri2py(env)
Beispiel #17
0
#fetching R datasets to use in python
import rpy2.robjects as ro
import rpy2.robjects.conversion as conversion
from rpy2.robjects import pandas2ri
pandas2ri.activate()

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm

%matplotlib inline

R = ro.r

df = conversion.ri2py(R['mtcars'])
print(df.head())

from sklearn.cluster import KMeans

df.columns

#get data 
X = df[['mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']]

#import standard Scaler to scale the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform( X )

#set the number of cluser range assumption
Beispiel #18
0
 def getenvironment(self):
     """ Get the environment in which the formula is finding its symbols."""
     res = self.do_slot(".Environment")
     res = conversion.ri2py(res)
     return res
Beispiel #19
0
    def __getitem__(self, item):
        res = rinterface.globalEnv.get(item)
            
	res = conversion.ri2py(res)
        return res
def r2py(results, p_col=None):
    tbl = ri2py(results)
    tbl = tbl.rename(columns=reformat_name)
    if p_col:
        tbl['signif'] = tbl[reformat_name(p_col)].apply(pval)
    return tbl
Beispiel #21
0
 def __getattr__(self, attr):
     res = self.do_slot(attr)
     res = conversion.ri2py(res)
     return res
Beispiel #22
0
 def __getitem__(self, item):
     res = super(REnvironment, self).__getitem__(item)
     res = conversion.ri2py(res)
     return res
Beispiel #23
0
    def __getitem__(self, item):
        res = rinterface.globalEnv.get(item)
            
	res = conversion.ri2py(res)
        return res

    #FIXME: check that this is properly working
    def __cleanup__(self):
        rinterface.endEmbeddedR()
        del(self)

    def __str__(self):
        s = super(R, self).__str__()
        s += os.linesep
        version = self["version"]
        tmp = [n+': '+val[0] for n, val in itertools.izip(version.getnames(), version)]
        s += str.join(os.linesep, tmp)
        return s

    def __call__(self, string):
        p = self.parse(text=string)
        res = self.eval(p)
        return res

r = R()

globalEnv = conversion.ri2py(rinterface.globalEnv)
baseNameSpaceEnv = conversion.ri2py(rinterface.baseNameSpaceEnv)
emptyEnv = conversion.ri2py(rinterface.emptyEnv)
Beispiel #24
0
 def __getitem__(self, i):
     res = super(RVector, self).__getitem__(i)
     if isinstance(res, rinterface.Sexp):
         res = conversion.ri2py(res)
     return res
 def testDataFrameToNumpy(self):
     df = robjects.vectors.DataFrame(dict((('a', 1), ('b', 2))))
     rec = conversion.ri2py(df)
     self.assertEqual(numpy.recarray, type(rec))
     self.assertEqual(1, rec.a[0])
     self.assertEqual(2, rec.b[0])
Beispiel #26
0
 def getdim(self):
     res = r.dim(self)
     res = conversion.ri2py(res)
     return res