Exemple #1
0
    def test_convert_r_dataframe(self):

        is_na = robj.baseenv.get("is.na")

        seriesd = tm.getSeriesData()
        frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])

        # Null data
        frame["E"] = [np.nan for item in frame["A"]]
        # Some mixed type data
        frame["F"] = ["text" if item %
                      2 == 0 else np.nan for item in range(30)]

        r_dataframe = com.convert_to_r_dataframe(frame)

        assert np.array_equal(
            com.convert_robj(r_dataframe.rownames), frame.index)
        assert np.array_equal(
            com.convert_robj(r_dataframe.colnames), frame.columns)
        assert all(is_na(item) for item in r_dataframe.rx2("E"))

        for column in frame[["A", "B", "C", "D"]]:
            coldata = r_dataframe.rx2(column)
            original_data = frame[column]
            assert np.array_equal(com.convert_robj(coldata), original_data)

        for column in frame[["D", "E"]]:
            for original, converted in zip(frame[column],
                                           r_dataframe.rx2(column)):

                if pd.isnull(original):
                    assert is_na(converted)
                else:
                    assert original == converted
Exemple #2
0
    def test_convert_r_dataframe(self):

        is_na = robj.baseenv.get("is.na")

        seriesd = tm.getSeriesData()
        frame = pd.DataFrame(seriesd, columns=["D", "C", "B", "A"])

        # Null data
        frame["E"] = [np.nan for item in frame["A"]]
        # Some mixed type data
        frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]

        r_dataframe = com.convert_to_r_dataframe(frame)

        assert np.array_equal(com.convert_robj(r_dataframe.rownames), frame.index)
        assert np.array_equal(com.convert_robj(r_dataframe.colnames), frame.columns)
        assert all(is_na(item) for item in r_dataframe.rx2("E"))

        for column in frame[["A", "B", "C", "D"]]:
            coldata = r_dataframe.rx2(column)
            original_data = frame[column]
            assert np.array_equal(com.convert_robj(coldata), original_data)

        for column in frame[["D", "E"]]:
            for original, converted in zip(frame[column], r_dataframe.rx2(column)):

                if pd.isnull(original):
                    assert is_na(converted)
                else:
                    assert original == converted
def SCCA_r(X,Y, n_components, pen):


	df_X = pd.DataFrame(X)
	df_Y = pd.DataFrame(Y)

	rmat_X = com.convert_to_r_matrix(df_X)
	rmat_Y = com.convert_to_r_matrix(df_Y)

	ri.globalenv['X'] = rmat_X
	ri.globalenv['Y'] = rmat_Y

	com.r(
	    """
	    out <- CCA(x = X, z = Y, K = %i, niter = 100, standardize = FALSE,
	               penaltyx = %f, penaltyz = %f)
	    """ % (n_components, pen[0], pen[1]))

	# convert the results back to dataframes and then to numpy arrays
	df_u = com.convert_robj(com.r('out[1]'))['u']
	df_v = com.convert_robj(com.r('out[2]'))['v']
	cors = com.convert_robj(com.r('out[16]'))['cors']

	x_loadings = df_u.as_matrix()
	y_loadings = df_v.as_matrix()
	cors = np.array(cors)
	
	loadings = (x_loadings, y_loadings)

	return loadings, cors
Exemple #4
0
    def sample(self, niter, thin=1, variables=None, run_diagnostic=True):
        """
        variables: if None, use all as extracted with self.get_variables(which='unobserved')
        """
        if not self._burnin_ok:
            print "WARNING: you might want to run burnin() first"
        if variables==None:
            variables=self.get_variables(which='unobserved')
        robj.r.assign('pyjags_variables', np.array(variables))
        
        with capture_output() as io: # get rid of some remaining output
            robj.r(_R_sample_dic.format(niter=niter,
                                        thin=thin))
    
        ## temporarily disable numpy conversion
        rpy2.robjects.numpy2ri.deactivate()
            
        if run_diagnostic:
            robj.r('pyjags_gelman=gelman.diag(pyjags_samp$samples)$psrf')
            self._gelmandiag_last_run=com.convert_robj(robj.r('pyjags_gelman'))
            if np.any(self._gelmandiag_last_run.iloc[:,0]>1.05):
                print "WARNING: there may be problems with your convergence (some R>1.05)"
        else:
            self._gelmandiag_last_run=None
        ms=com.convert_robj(robj.r('as.matrix(pyjags_samp$samples)'))        
        self._dic_last_run=com.convert_robj(robj.r('pyjags_samp$dic'))

        ## enable numpy conversion again
        rpy2.robjects.numpy2ri.activate()
        
        return ms
Exemple #5
0
    def test_convert_r_matrix(self):

        is_na = robj.baseenv.get("is.na")

        seriesd = tm.getSeriesData()
        frame = pd.DataFrame(seriesd, columns=["D", "C", "B", "A"])
        # Null data
        frame["E"] = [np.nan for item in frame["A"]]

        r_dataframe = com.convert_to_r_matrix(frame)

        assert np.array_equal(com.convert_robj(r_dataframe.rownames), frame.index)
        assert np.array_equal(com.convert_robj(r_dataframe.colnames), frame.columns)
        assert all(is_na(item) for item in r_dataframe.rx(True, "E"))

        for column in frame[["A", "B", "C", "D"]]:
            coldata = r_dataframe.rx(True, column)
            original_data = frame[column]
            assert np.array_equal(com.convert_robj(coldata), original_data)

        # Pandas bug 1282
        frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]

        try:
            wrong_matrix = com.convert_to_r_matrix(frame)
        except TypeError:
            pass
        except Exception:
            raise
Exemple #6
0
    def _ccaPermute(self, X, Z, **params):
        """Performs CCA.permute from the PMA package to see which penalty values are better"""
        pma = importr("PMA")

        kwParams = {"typex": "standard", "typez": "standard", "trace": True}
        kwParams.update(params)

        print("\tCCA permute parameters:", kwParams)

        cca_permute = ro.r['CCA.permute'](X, Z, **kwParams)
        header = [
            'penaltyxs', 'penaltyzs', 'zstats', 'pvals', 'cors', 'ft.corperms',
            'nnonzerous', 'nnonzerovs'
        ]
        header2 = [
            "X Penalty", "Z Penalty", "Z-Stat", "P-Value", "Cors", "FT(Cors)",
            "# U's Non-Zero", "# Vs Non-Zero"
        ]

        cca_permute = {k: v for k, v in list(cca_permute.items())}

        df = pd.DataFrame(
            {h: com.convert_robj(cca_permute[h])
             for h in header},
            columns=header)
        df.columns = header2
        df.index = range(1, 18)

        print("\n", df)
        print()
        print("Best L1 bound for x: %.5f" %
              com.convert_robj(cca_permute["bestpenaltyx"])[0])
        print("Best L1 bound for z: %.5f" %
              com.convert_robj(cca_permute["bestpenaltyz"])[0])
Exemple #7
0
    def ccaOutcomesVsControls(self, penaltyX = None, penaltyZ = None, NAthresh = 4):
        """Performs CCA using controls and outcomes, no language"""
        (groups, allOutcomes, controls) = self.outcomeGetter.getGroupsAndOutcomes()
        # groups: set(group_ids)
        # allOutcomes: {outcome: {group_id: value}}
        # controls: {control: {group_id: value}}

        print "X: controls\nZ: outcomes"
        Zdict = allOutcomes
        Xdict = controls

        # R doesn't handle '$'s in column names
        Xdict = {k.replace('$','.'):v for k, v in Xdict.iteritems()}
        Zdict = {k.replace('$','.'):v for k, v in Zdict.iteritems()}

        Xdf = pd.DataFrame(data=Xdict)
        Zdf = pd.DataFrame(data=Zdict)
        
        # X, Z, Xfreqs, Zfreqs = self.prepMatrices(Xdf,Zdf, NAthresh = NAthresh, softImputeXtoo=True)
        X, Z, Xfreqs, Zfreqs = self.prepMatricesTogether(Xdf,Zdf, NAthresh = NAthresh)
        kwParams = {}
        if penaltyX: kwParams['penaltyx'] = penaltyX
        if penaltyZ: kwParams['penaltyz'] = penaltyZ
        # kwParams['upos'] = True
        # kwParams['vneg'] = True

        cca = self._cca(X,Z, **kwParams)
    
        Xcomp = com.convert_robj(cca['u']) # Controls
        Zcomp = com.convert_robj(cca['v']) # Outcomes

        d = com.convert_robj(cca['d']) # Something
        self.model = {
            'u': Xcomp,
            'v': Zcomp,
            'd': d,
        }

        featureNames = X.columns
        Xcomp.index = [i.strip("X") for i in featureNames]
        Xfreqs = {k.strip("X"): v for k,v in Xfreqs.iteritems()}
        Xcomp.columns = ["%.2d_comp" % i for i in xrange(Xcomp.shape[1])]

        outcomeNames = Z.columns
        Zcomp.index = [i.strip("X") for i in outcomeNames]
        Zfreqs = {k.strip("X"): v for k,v in Zfreqs.iteritems()}
        Zcomp.columns = ["%.2d_comp" % i for i in xrange(Zcomp.shape[1])]
        
        Zcomp2 = pd.concat([Xcomp, Zcomp])
        
        Xcomp_dict = {k: {i:(j,
                             0.0 if j != 0 else 1,
                             cca["nGroups"],
                             Xfreqs[i]) for i, j in v.iteritems()} for k, v in Xcomp.to_dict().iteritems()}
        Zcomp_dict = {k: {i:(j,0.0 if j != 0 else 1,cca["nGroups"],
                             Zfreqs[i] if i in Zfreqs.keys() else Xfreqs[i]
                         ) for i, j in v.iteritems()} for k, v in Zcomp2.to_dict().iteritems()}

        d_dict = dict(zip(Zcomp.columns,d))
        return Xcomp_dict, Zcomp_dict, d_dict
Exemple #8
0
    def ccaOutcomesVsControls(self, groupFreqThresh = 0, penaltyX = None, penaltyZ = None, NAthresh = 4):
        """Performs CCA using controls and outcomes, no language"""
        (groups, allOutcomes, controls) = self.outcomeGetter.getGroupsAndOutcomes(groupFreqThresh)
        # groups: set(group_ids)
        # allOutcomes: {outcome: {group_id: value}}
        # controls: {control: {group_id: value}}

        print "X: controls\nZ: outcomes"
        Zdict = allOutcomes
        Xdict = controls

        # R doesn't handle '$'s in column names
        Xdict = {k.replace('$','.'):v for k, v in Xdict.iteritems()}
        Zdict = {k.replace('$','.'):v for k, v in Zdict.iteritems()}

        Xdf = pd.DataFrame(data=Xdict)
        Zdf = pd.DataFrame(data=Zdict)
        
        # X, Z, Xfreqs, Zfreqs = self.prepMatrices(Xdf,Zdf, NAthresh = NAthresh, softImputeXtoo=True)
        X, Z, Xfreqs, Zfreqs = self.prepMatricesTogether(Xdf,Zdf, NAthresh = NAthresh)
        kwParams = {}
        if penaltyX: kwParams['penaltyx'] = penaltyX
        if penaltyZ: kwParams['penaltyz'] = penaltyZ
        # kwParams['upos'] = True
        # kwParams['vneg'] = True

        cca = self._cca(X,Z, **kwParams)
    
        Xcomp = com.convert_robj(cca['u']) # Controls
        Zcomp = com.convert_robj(cca['v']) # Outcomes

        d = com.convert_robj(cca['d']) # Something
        self.model = {
            'u': Xcomp,
            'v': Zcomp,
            'd': d,
        }

        featureNames = X.columns
        Xcomp.index = [i.strip("X") for i in featureNames]
        Xfreqs = {k.strip("X"): v for k,v in Xfreqs.iteritems()}
        Xcomp.columns = ["%.2d_comp" % i for i in xrange(Xcomp.shape[1])]

        outcomeNames = Z.columns
        Zcomp.index = [i.strip("X") for i in outcomeNames]
        Zfreqs = {k.strip("X"): v for k,v in Zfreqs.iteritems()}
        Zcomp.columns = ["%.2d_comp" % i for i in xrange(Zcomp.shape[1])]
        
        Zcomp2 = pd.concat([Xcomp, Zcomp])
        
        Xcomp_dict = {k: {i:(j,
                             0.0 if j != 0 else 1,
                             cca["nGroups"],
                             Xfreqs[i]) for i, j in v.iteritems()} for k, v in Xcomp.to_dict().iteritems()}
        Zcomp_dict = {k: {i:(j,0.0 if j != 0 else 1,cca["nGroups"],
                             Zfreqs[i] if i in Zfreqs.keys() else Xfreqs[i]
                         ) for i, j in v.iteritems()} for k, v in Zcomp2.to_dict().iteritems()}

        d_dict = dict(zip(Zcomp.columns,d))
        return Xcomp_dict, Zcomp_dict, d_dict
Exemple #9
0
    def _ccaPermute(self, X, Z, **params):
        """Performs CCA.permute from the PMA package to see which penalty values are better"""
        pma = importr("PMA")
        
        kwParams = {"typex": "standard",
                    "typez": "standard",
                    "trace": True}
        kwParams.update(params)

        print "\tCCA permute parameters:", kwParams
        
        cca_permute = ro.r['CCA.permute'](X, Z, **kwParams)
        header = ['penaltyxs', 'penaltyzs', 'zstats', 'pvals','cors', 'ft.corperms', 'nnonzerous', 'nnonzerovs']
        header2 = ["X Penalty", "Z Penalty", "Z-Stat", "P-Value", "Cors", "FT(Cors)", "# U's Non-Zero", "# Vs Non-Zero"]

        cca_permute = {k:v for k,v in cca_permute.items()}

        df = pd.DataFrame({h:com.convert_robj(cca_permute[h]) for h in header}, columns=header)
        df.columns = header2
        df.index = xrange(1,18)

        print "\n", df
        print 
        print "Best L1 bound for x: %.5f" % com.convert_robj(cca_permute["bestpenaltyx"])[0]
        print "Best L1 bound for z: %.5f" % com.convert_robj(cca_permute["bestpenaltyz"])[0]
Exemple #10
0
    def sample(self, niter, thin=1, variables=None, run_diagnostic=True):
        """
        variables: if None, use all as extracted with self.get_variables(which='unobserved')
        """
        if not self._burnin_ok:
            print "WARNING: you might want to run burnin() first"
        if variables == None:
            variables = self.get_variables(which='unobserved')
        robj.r.assign('pyjags_variables', np.array(variables))

        with capture_output() as io:  # get rid of some remaining output
            robj.r(_R_sample_dic.format(niter=niter, thin=thin))

        ## temporarily disable numpy conversion
        rpy2.robjects.numpy2ri.deactivate()

        if run_diagnostic:
            robj.r('pyjags_gelman=gelman.diag(pyjags_samp$samples)$psrf')
            self._gelmandiag_last_run = com.convert_robj(
                robj.r('pyjags_gelman'))
            if np.any(self._gelmandiag_last_run.iloc[:, 0] > 1.05):
                print "WARNING: there may be problems with your convergence (some R>1.05)"
        else:
            self._gelmandiag_last_run = None
        ms = com.convert_robj(robj.r('as.matrix(pyjags_samp$samples)'))
        self._dic_last_run = com.convert_robj(robj.r('pyjags_samp$dic'))

        ## enable numpy conversion again
        rpy2.robjects.numpy2ri.activate()

        return ms
Exemple #11
0
    def test_convert_r_matrix(self):

        is_na = robj.baseenv.get("is.na")

        seriesd = tm.getSeriesData()
        frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
        # Null data
        frame["E"] = [np.nan for item in frame["A"]]

        r_dataframe = com.convert_to_r_matrix(frame)

        assert np.array_equal(
            com.convert_robj(r_dataframe.rownames), frame.index)
        assert np.array_equal(
            com.convert_robj(r_dataframe.colnames), frame.columns)
        assert all(is_na(item) for item in r_dataframe.rx(True, "E"))

        for column in frame[["A", "B", "C", "D"]]:
            coldata = r_dataframe.rx(True, column)
            original_data = frame[column]
            assert np.array_equal(com.convert_robj(coldata),
                                  original_data)

        # Pandas bug 1282
        frame["F"] = ["text" if item %
                      2 == 0 else np.nan for item in range(30)]

        try:
            wrong_matrix = com.convert_to_r_matrix(frame)
        except TypeError:
            pass
        except Exception:
            raise
Exemple #12
0
 def test_fit_with_pandas_data(self, Model, dataframe):
     X, y = dataframe
     model = Model(scriptname='myscript', funcname='myfunc', some='kwarg')
     model.fit(X, y)
     funcargs = model.r['myfunc'].call_args
     assert (convert_robj(funcargs[0][0]) == X).all().all()
     assert (convert_robj(funcargs[0][1]) == y).all()
     assert funcargs[1]['some'] == 'kwarg'
Exemple #13
0
def to_py(o, skip_list=False):
    """
        Converts to python object if possible. 
        Otherwise wraps in ROBjectWrapper
    """
    res = None
    try:
        rcls = o.do_slot("class")
        rcls = list(rcls)
    except LookupError as le:
        rcls = []

    try:
        rclass = list(o.rclass)
    except:
        rclass = []


    classes = rclass + rcls

    if isinstance(o, SexpVector) and len(classes) > 0:
        if 'xts' in classes:
            res = rconv.convert_xts_to_df(o)
        elif 'POSIXct' in classes:
            res = rconv.convert_posixct_to_index(o)
        elif 'logical' in classes:
            res = rcommon._convert_vector(o)

    if res is None and isinstance(o, DataFrame):
        res = rcommon.convert_robj(o) 

    if res is None and isinstance(o, ListVector) and not skip_list:
        res = convert_ListVector(o)

    if res is None:
        try:
            res = rcommon.convert_robj(o) # fallback to pandas
        except:
            pass

    try: 
        if len(res) == 1:
            return res[0]
    except:
        pass
        
    if res is None and isinstance(o, SexpVector):
        res = RObjectWrapper(o)

    if res is None:
        res = o

    return res
Exemple #14
0
def runHW(param,meta):
    try:
        meta=pd.read_pickle(param['dspath']+param['dsname']+'.hw.df')
    except: 
        geno=meta[param['biallele']]
        ro.r('library(HardyWeinberg)')
        ro.globalenv['geno'] = com.convert_to_r_dataframe(geno)
        index,pval=zip(* map(lambda (k,v): (int(k),v['pval']),com.convert_robj(ro.r('apply(geno,1,function(x) HWExact(as.numeric(x)))')).items()))
        pval=pd.DataFrame(map(lambda x: x[0],pval),index, columns=['pval'])
        meta=pd.merge(meta,pval,left_index=True,right_index=True, how='left')
        index,f=zip(* map(lambda (k,v): (int(k),v['f']),com.convert_robj(ro.r('apply(geno,1,function(x) HWChisq(as.numeric(x)))')).items()))
        f=pd.DataFrame(map(lambda x: x[0],f),index, columns=['f'])
        meta=pd.merge(meta,f,left_index=True,right_index=True, how='left')
        meta.to_pickle(param['dspath']+param['dsname']+'.hw.df')
    return meta
Exemple #15
0
    def transform(self, method="vst", inplace=True):
        """
        perform transformation on counts table
        current methods are:
         - deseq2 variance stabalising transformation
         - deseq rlog transformation
        """

        assert method in ["vst", "rlog"], "method must be one of" "[vst, rlog]"

        method2function = {"vst": "varianceStabilizingTransformation", "rlog": "rlog"}

        t_function = method2function[method]

        transform = R(
            """
        function(df){

        suppressMessages(library('DESeq2'))

        design = data.frame(row.names = colnames(df),
                            condition = seq(1, length(colnames(df))))

        dds <- suppressMessages(DESeqDataSetFromMatrix(
                 countData= df, colData = design, design = ~condition))

        transformed <- suppressMessages(%(t_function)s(dds))
        transformed_df <- as.data.frame(assay(transformed))

        return(transformed_df)
        }"""
            % locals()
        )

        r_counts = com.convert_to_r_dataframe(self.table)
        r_df = com.convert_robj(transform(r_counts))
        # losing rownames for some reason during the conversion?!
        r_df.index = self.table.index

        if inplace:
            self.table = com.convert_robj(r_df)
            # R replaces "-" in column names with ".". Revert back!
            self.table.columns = [x.replace(".", "-") for x in self.table.columns]
        else:
            tmp_counts = self.clone()
            tmp_counts.table = com.convert_robj(r_df)
            tmp_counts.table.columns = [x.replace(".", "-") for x in tmp_counts.table.columns]
            return tmp_counts
Exemple #16
0
    def test_convert_matrix(self):
        mat = self._test_matrix()

        converted = com.convert_robj(mat)

        assert np.array_equal(converted.index, ['a', 'b', 'c'])
        assert np.array_equal(converted.columns, ['one', 'two', 'three'])
Exemple #17
0
def runComBat(infiles, outfile):

    # Split infiles
    vstFile, annotationFile = infiles

    # Read expression dataframe
    vstDataframe = pd.read_table(vstFile,
                                 index_col='gene_symbol').drop(['B8N', 'B10C'],
                                                               axis=1)

    # Read annotation dataframe
    annotationDataframe = pd.read_table(annotationFile,
                                        index_col='sample_name')

    # Get common samples
    annotationDataframe = annotationDataframe.loc[vstDataframe.columns]

    # Run function
    combatMatrix = r.runComBat(com.convert_to_r_dataframe(vstDataframe),
                               com.convert_to_r_dataframe(annotationDataframe),
                               covariateFormula='~treatment',
                               batchColumn='patient')

    # Convert to dataframe
    combatDataframe = com.convert_robj(combatMatrix)

    # Write file
    combatDataframe.to_csv(outfile, sep='\t', index_label='gene_symbol')
Exemple #18
0
    def test_convert_nested_list(self):
        obj = r('list(a=list(foo=1, bar=2))')

        converted = com.convert_robj(obj)
        expected = {'a': {'foo': [1], 'bar': [2]}}

        tm.assert_dict_equal(converted, expected)
Exemple #19
0
    def test_convert_list(self):
        obj = r('list(a=1, b=2, c=3)')

        converted = com.convert_robj(obj)
        expected = {'a': [1], 'b': [2], 'c': [3]}

        tm.assert_dict_equal(converted, expected)
Exemple #20
0
 def RsoftImpute(self, X):
     softImpute = importr("softImpute")
     X = com.convert_to_r_dataframe(X)
     X = softImpute.complete(
         X, softImpute.softImpute(softImpute.biScale(X, maxit=100)))
     X = com.convert_robj(X)
     return X
def runDESeq(infile, outfiles, outfileRoot):

    # Report
    print 'Doing ' + infile + '...'

    # Read dataframe
    countDataframe = pd.read_table(infile, index_col='gene_symbol')

    # Sample counts
    sampleCounts = collections.Counter(
        [x.split('-')[-1] for x in countDataframe.columns])

    # Make annotation dataframe
    annotationDataframe = pd.DataFrame.from_dict([{
        'sample_id':
        x,
        'sample_type':
        x.split('-')[-1]
    } for x in countDataframe.columns]).set_index('sample_id')

    # Sample counts
    sampleCounts = collections.Counter(
        [x.split('-')[-1] for x in countDataframe.columns])

    # Get comparisons
    comparisons = [
        list(x[::-1]) for x in itertools.combinations(
            [key for key, value in sampleCounts.iteritems() if value >= 5], 2)
    ]

    # Loop through comparisons
    for comparison in comparisons:

        # Filter
        annotationDataframeSubset = annotationDataframe[
            annotationDataframe['sample_type'].isin(comparison)]
        countDataframeSubset = countDataframe[annotationDataframeSubset.index]

        # Run function
        deseqDataframe = r.runDESeq2(
            com.convert_to_r_dataframe(countDataframeSubset),
            com.convert_to_r_dataframe(annotationDataframeSubset),
            '~ sample_type')

        # Convert to dataframe
        deseqDataframe = com.convert_robj(deseqDataframe)

        # Get comparison string
        comparisonString = 'v'.join(comparison)

        # Get outfile
        outfile = '{outfileRoot}{comparisonString}.txt'.format(**locals())

        # Create outdir
        outDir = os.path.dirname(outfile)
        if not os.path.exists(outDir):
            os.makedirs(outDir)

        # Write
        deseqDataframe.to_csv(outfile, sep='\t', index_label='gene_symbol')
Exemple #22
0
def get_stats(s):
    b = base.summary(s)
    hazard = convert_robj(b.rx2('conf.int')).ix['feature']
    stat = pd.Series(b.rx2('logtest'), index=['stat', 'df', 'p'])
    concordance = pd.Series(b.rx2('concordance'), index=['stat', 'se'])
    ret = pd.concat([hazard, stat, concordance], keys=['hazard', 'LR', 'concordance'])
    return ret
    def predict(self, xtest):
        """Predicts class via majority vote.

        Parameters
        ----------
        xtest : pd.DataFrame
            features for test set
        """
        if new_pandas_flag:
            r_xtest = pandas2ri.py2ri(xtest)
        else:
            r_xtest = com.convert_to_r_dataframe(xtest)
        #r_xtest = pandas2ri.py2ri(xtest)
        pred = self.rf_pred(self.rf, r_xtest)
        if new_pandas_flag:
            #py_pred = pandas2ri.ri2py(pred)
            tmp_genes = pred[1]
            tmp_pred_class = pred[0]
            genes = pandas2ri.ri2py(tmp_genes)
            pred_class = pandas2ri.ri2py(tmp_pred_class)
        else:
            py_pred = com.convert_robj(pred)
            genes, pred_class = zip(*py_pred.items())
            #genes = com.convert_robj(tmp_genes)
            #pred_class = com.convert_robj(tmp_pred_class)
        tmp_df = pd.DataFrame({'pred_class': pred_class},
                              index=genes)
        tmp_df = tmp_df.reindex(xtest.index)
        tmp_df -= 1  # for some reason the class numbers start at 1
        return tmp_df['pred_class']
Exemple #24
0
    def ccaPermuteOutcomesVsControls(self, nPerms = 25, penaltyXs = None , penaltyZs = None):
        (groups, allOutcomes, controls) = self.outcomeGetter.getGroupsAndOutcomes()
        # groups: set(group_ids)
        # allOutcomes: {outcome: {group_id: value}}
        # controls: {control: {group_id: value}}

        # X contains feature group_norms, Z contains outcome values
        Zdict = allOutcomes
        Xdict = controls
        
        # R doesn't handle '$'s in column names
        Xdict = {k.replace('$','.'):v for k, v in Xdict.items()}
        Zdict = {k.replace('$','.'):v for k, v in Zdict.items()}
        
        # X, Z, Xfreqs, Zfreqs = self.prepMatrices(pd.DataFrame(data=Xdict),pd.DataFrame(data=Zdict), softImputeXtoo=True)
        X, Z, Xfreqs, Zfreqs = self.prepMatricesTogether(pd.DataFrame(data=Xdict), pd.DataFrame(data=Zdict))
        try:
            X = com.convert_to_r_dataframe(X)
            Z = com.convert_to_r_dataframe(Z)
            Ngroups = com.convert_robj(ro.r["nrow"](X)[0])
        except NameError:
            warn("pandas.rpy.common cannot be imported")
            sys.exit(1)
        
        kwParams = {"nperms": nPerms}
        kwParams['penaltyxs'] = penaltyXs if penaltyXs else ro.vectors.FloatVector(np.arange(.1,.91,.05))
        kwParams['penaltyzs'] = penaltyZs if penaltyZs else ro.vectors.FloatVector(np.arange(.1,.91,.05))
        
        self._ccaPermute(X,Z, **kwParams)
Exemple #25
0
    def test_convert_list(self):
        obj = r('list(a=1, b=2, c=3)')

        converted = com.convert_robj(obj)
        expected = {'a': [1], 'b': [2], 'c': [3]}

        tm.assert_dict_equal(converted, expected)
 def forecast(self , resampled_df ,data_freq = 52 , number_of_predctions = 5):
     
     # start and end date of the series
     start_date = pd.to_datetime(resampled_df.ix[0].name).date()
     end_date = pd.to_datetime(resampled_df.ix[-1].name).date()
     
     r_series = self.convert_to_r_series(resampled_df, start_date, data_freq)
 
     # fit the model
     log_r_series = self.base.log(r_series)
     holt_winter_fit = self.stats.HoltWinters(r_series)
     
     # forecast
     holt_winter_forecast = self.forecast_lib.forecast_HoltWinters(holt_winter_fit , \
                                                               h = number_of_predctions)
     # prepare and convert results to pandas dataframe
     reshaped_melted_results= self.reshape.melt(holt_winter_forecast) 
     if data_freq == 52:
         forecast_duration = self.base.as_Date(end_date.strftime('%Y-%m-%d')).ro +\
                             (self.base.seq(1,number_of_predctions).ro * 7)
         myxts = self.xts.xts(reshaped_melted_results, forecast_duration)
         results_field =  'value.value.Point.Forecast'
     elif  data_freq == 12:
         myxts =  holt_winter_forecast 
         results_field =  'value.Point.Forecast' 
         
     results_pd_df = com.convert_robj(self.r.melt(myxts)) 
     results_pd_ts  = results_pd_df[results_field ]
     
     return (results_pd_ts ,holt_winter_forecast)
Exemple #27
0
    def _cca(self, X, Z, **params):
        """Given two Pandas dataframes and a set of parameters, performs CCA
        returns CCA dict (converted from R CCA named list object)
        """
        
        pma = importr("PMA")
        
        # Defaults:
        kwParams = {"typex": "standard",
                    "typez": "standard",
                    "trace": False,
                    "K": self.numComponents,
        }
        kwParams.update(params)
        
        if isinstance(X, pd.core.frame.DataFrame):
            X = com.convert_to_r_dataframe(X)
        if isinstance(Z, pd.core.frame.DataFrame):
            Z = com.convert_to_r_dataframe(Z)

        assert isinstance(X, ro.vectors.DataFrame) and isinstance(Z, ro.vectors.DataFrame), "X, Z need to be either Pandas DataFrames or R dataframes!"

        assert self.numComponents <= min(len(X.names),len(Z.names)), "Number of components must be smaller than the minimum of columns in each of your matrices"

        nGroups = com.convert_robj(ro.r["nrow"](X)[0])
        
        print("\tCCA parameters:", kwParams)
        cca = pma.CCA(X, Z, **kwParams)
        cca = {k:v for k, v in list(cca.items())}
        cca['nGroups'] = nGroups
        return cca
Exemple #28
0
    def gt_basic(es, gene_sets, pheno_class_column,
                 model="logistic",
                 permutations=100):
        """
            @param es: Expression set with defined user class in pheno
            @type es: ExpressionSet

            @type gene_sets: environment.structures.GeneSets

            @param pheno_class_column: Column name of target classes in phenotype table
            @type pheno_class_column: string or None
        """
        GlobalTest.gt_init()

        dataset = com.convert_to_r_matrix(es.get_assay_data_frame())
        response = es.get_pheno_column_as_r_obj(pheno_class_column)

        genes_in_es = es.get_assay_data_frame().index.tolist()
        gs_filtered = filter_gs_by_genes(gene_sets.get_gs(), genes_in_es)

        gt_instance = GlobalTest.gt(
            response,
            R.r['t'](dataset),
            subsets=gs_filtered.to_r_obj(),
            model=model,
            permutations=permutations,
        )

        result = gt_instance.do_slot('result')
        result_df = com.convert_robj(result)
        return result_df
Exemple #29
0
    def test_convert_nested_list(self):
        obj = r('list(a=list(foo=1, bar=2))')

        converted = com.convert_robj(obj)
        expected = {'a': {'foo': [1], 'bar': [2]}}

        tm.assert_dict_equal(converted, expected)
Exemple #30
0
    def ccaPermuteOutcomesVsControls(self, groupFreqThresh = 0, nPerms = 25, penaltyXs = None , penaltyZs = None):
        (groups, allOutcomes, controls) = self.outcomeGetter.getGroupsAndOutcomes(groupFreqThresh)
        # groups: set(group_ids)
        # allOutcomes: {outcome: {group_id: value}}
        # controls: {control: {group_id: value}}

        # X contains feature group_norms, Z contains outcome values
        Zdict = allOutcomes
        Xdict = controls
        
        # R doesn't handle '$'s in column names
        Xdict = {k.replace('$','.'):v for k, v in Xdict.iteritems()}
        Zdict = {k.replace('$','.'):v for k, v in Zdict.iteritems()}
        
        # X, Z, Xfreqs, Zfreqs = self.prepMatrices(pd.DataFrame(data=Xdict),pd.DataFrame(data=Zdict), softImputeXtoo=True)
        X, Z, Xfreqs, Zfreqs = self.prepMatricesTogether(pd.DataFrame(data=Xdict), pd.DataFrame(data=Zdict))
        X = com.convert_to_r_dataframe(X)
        Z = com.convert_to_r_dataframe(Z)

        Ngroups = com.convert_robj(ro.r["nrow"](X)[0])
        
        kwParams = {"nperms": nPerms}
        kwParams['penaltyxs'] = penaltyXs if penaltyXs else ro.vectors.FloatVector(np.arange(.1,.91,.05))
        kwParams['penaltyzs'] = penaltyZs if penaltyZs else ro.vectors.FloatVector(np.arange(.1,.91,.05))
        
        self._ccaPermute(X,Z, **kwParams)
Exemple #31
0
    def fit(self, xtrain, ytrain):
        """The fit method trains R's random forest classifier.

        NOTE: the method name ("fit") and method signature were choosen
        to be consistent with scikit learn's fit method.

        Parameters
        ----------
        xtrain : pd.DataFrame
            features for training set
        ytrain : pd.DataFrame
            true class labels (as integers) for training set
        """
        label_counts = ytrain.value_counts()
        if self.is_onco_pred and self.is_tsg_pred:
            sampsize = [label_counts[self.other_num],
                        label_counts[self.onco_num],
                        label_counts[self.tsg_num]]
        elif self.is_onco_pred:
            sampsize = [label_counts[self.other_num],
                        label_counts[self.onco_num]]
        elif self.is_tsg_pred:
            sampsize = [label_counts[self.other_num],
                        label_counts[self.tsg_num]]

        self.set_sample_size(sampsize)
        ytrain.index = xtrain.index  # ensure indexes match
        xtrain['true_class'] = ytrain
        r_xtrain = com.convert_to_r_dataframe(xtrain)
        #r_xtrain = pandas2ri.py2ri(xtrain)
        self.rf = self.rf_fit(r_xtrain, self.ntrees, self.sample_size)
        r_imp = self.rf_imp(self.rf)  # importance dataframe in R
        self.feature_importances_ = com.convert_robj(r_imp)
    def predict(self, xtest):
        """Predicts class via majority vote.

        Parameters
        ----------
        xtest : pd.DataFrame
            features for test set
        """
        if new_pandas_flag:
            r_xtest = pandas2ri.py2ri(xtest)
        else:
            r_xtest = com.convert_to_r_dataframe(xtest)
        #r_xtest = pandas2ri.py2ri(xtest)
        pred = self.rf_pred(self.rf, r_xtest)
        if new_pandas_flag:
            #py_pred = pandas2ri.ri2py(pred)
            tmp_genes = pred[1]
            tmp_pred_class = pred[0]
            genes = pandas2ri.ri2py(tmp_genes)
            pred_class = pandas2ri.ri2py(tmp_pred_class)
        else:
            py_pred = com.convert_robj(pred)
            genes, pred_class = zip(*py_pred.items())
            #genes = com.convert_robj(tmp_genes)
            #pred_class = com.convert_robj(tmp_pred_class)
        tmp_df = pd.DataFrame({'pred_class': pred_class},
                              index=genes)
        tmp_df = tmp_df.reindex(xtest.index)
        tmp_df -= 1  # for some reason the class numbers start at 1
        return tmp_df['pred_class']
Exemple #33
0
    def test_convert_matrix(self):
        mat = self._test_matrix()

        converted = com.convert_robj(mat)

        assert np.array_equal(converted.index, ["a", "b", "c"])
        assert np.array_equal(converted.columns, ["one", "two", "three"])
Exemple #34
0
    def test_convert_nested_list(self):
        obj = r("list(a=list(foo=1, bar=2))")

        converted = com.convert_robj(obj)
        expected = {"a": {"foo": [1], "bar": [2]}}

        tm.assert_dict_equal(converted, expected)
Exemple #35
0
    def test_convert_list(self):
        obj = r("list(a=1, b=2, c=3)")

        converted = com.convert_robj(obj)
        expected = {"a": [1], "b": [2], "c": [3]}

        tm.assert_dict_equal(converted, expected)
Exemple #36
0
    def _cca(self, X, Z, **params):
        """Given two Pandas dataframes and a set of parameters, performs CCA
        returns CCA dict (converted from R CCA named list object)
        """
        
        pma = importr("PMA")
        
        # Defaults:
        kwParams = {"typex": "standard",
                    "typez": "standard",
                    "trace": False,
                    "K": self.numComponents,
        }
        kwParams.update(params)
        
        if isinstance(X, pd.core.frame.DataFrame):
            X = com.convert_to_r_dataframe(X)
        if isinstance(Z, pd.core.frame.DataFrame):
            Z = com.convert_to_r_dataframe(Z)

        assert isinstance(X, ro.vectors.DataFrame) and isinstance(Z, ro.vectors.DataFrame), "X, Z need to be either Pandas DataFrames or R dataframes!"

        assert self.numComponents <= min(len(X.names),len(Z.names)), "Number of components must be smaller than the minimum of columns in each of your matrices"

        nGroups = com.convert_robj(ro.r["nrow"](X)[0])
        
        print "\tCCA parameters:", kwParams
        cca = pma.CCA(X, Z, **kwParams)
        cca = {k:v for k, v in cca.items()}
        cca['nGroups'] = nGroups
        return cca
 def case_classifyCascade(self):
     """ A individual case classification function"""
     ########### To R for classification
     os.chdir("Z:\Cristina\MassNonmass\codeProject\codeBase\extractFeatures\casesDatabase")        
     cF = pd.read_csv('casesFrames_toclasify.csv')
     
     cF['finding.mri_mass_yn'] = cF['finding.mri_mass_yn'].astype('int32')
     cF['finding.mri_nonmass_yn'] = cF['finding.mri_nonmass_yn'].astype('int32')
     cF['finding.mri_foci_yn'] = cF['finding.mri_foci_yn'].astype('int32')
     cF['finding.mri_foci_yn'] = cF['finding.mri_foci_yn'].astype('int32')
     cF['is_insitu'] = cF['is_insitu'].astype('int32')
     cF['is_invasive'] = cF['is_invasive'].astype('int32')
             
     self.rpycasesFrame = com.convert_to_r_dataframe(cF)
     base = importr('base')
     base.source('Z:/Cristina/MassNonmass/codeProject/codeBase/finalClassifier/finalClassifier_classifyCascade.R')
     
     RFcascade = globalenv['finalClassifier_classifyCascade'](self.rpycasesFrame)
     
     self.RFcascade_probs = com.convert_robj(RFcascade)
     print "\n========================"
     print self.RFcascade_probs
     
     # proccess possible outcome
     [veredict, caseoutcome] = self.parse_classes(self.RFcascade_probs)
     print "\n========================\nCascade classification result:"
     print veredict
     print caseoutcome
     
     return
Exemple #38
0
    def test_convert_matrix(self):
        mat = self._test_matrix()

        converted = com.convert_robj(mat)

        assert np.array_equal(converted.index, ['a', 'b', 'c'])
        assert np.array_equal(converted.columns, ['one', 'two', 'three'])
Exemple #39
0
    def __init__(self, data, p=1, type='both'):
        self.rdata = data
        self.p = p
        self.type = type

        self.pydata = rpy.convert_robj(data)
        self._estimate = None
        self.estimate()
Exemple #40
0
def get_stats(s):
    b = base.summary(s)
    hazard = convert_robj(b.rx2('conf.int')).ix['feature']
    stat = pd.Series(b.rx2('logtest'), index=['stat', 'df', 'p'])
    concordance = pd.Series(b.rx2('concordance'), index=['stat', 'se'])
    ret = pd.concat([hazard, stat, concordance],
                    keys=['hazard', 'LR', 'concordance'])
    return ret
Exemple #41
0
    def test_convert_frame(self):
        # built-in dataset
        df = r["faithful"]

        converted = com.convert_robj(df)

        assert np.array_equal(converted.columns, ["eruptions", "waiting"])
        assert np.array_equal(converted.index, np.arange(1, 273))
Exemple #42
0
    def test_convert_frame(self):
        # built-in dataset
        df = r['faithful']

        converted = com.convert_robj(df)

        assert np.array_equal(converted.columns, ['eruptions', 'waiting'])
        assert np.array_equal(converted.index, np.arange(1, 273))
Exemple #43
0
    def test_convert_frame(self):
        # built-in dataset
        df = r['faithful']

        converted = com.convert_robj(df)

        assert np.array_equal(converted.columns, ['eruptions', 'waiting'])
        assert np.array_equal(converted.index, np.arange(1, 273))
Exemple #44
0
    def __init__(self, data, p=1, type='both'):
        self.rdata = data
        self.p = p
        self.type = type

        self.pydata = rpy.convert_robj(data)
        self._estimate = None
        self.estimate()
Exemple #45
0
def runCharacteristicDirection(infiles, outfile):

    # Split infiles
    vstFile, annotationFile = infiles

    # Read expression data
    vstDataframe = pd.read_table(vstFile, index_col='gene_symbol')

    # Read annotation data
    annotationDataframe = pd.read_table(annotationFile,
                                        index_col='sample_name')

    # Get timepoint samples
    timepointSampleDict = {
        'day' + str(day):
        annotationDataframe.index[annotationDataframe['day'] == day].tolist()
        for day in set(annotationDataframe['day'])
    }

    # Group 4 and 5 days
    timepointSampleDict[
        'day4-5'] = timepointSampleDict['day4'] + timepointSampleDict['day5']
    del timepointSampleDict['day4']
    del timepointSampleDict['day5']

    # Get controls
    controlColumns = timepointSampleDict.pop('day0')

    # Initialize empty dataframe
    resultDataframe = pd.DataFrame()

    # Loop through timepoints
    for timepoint in timepointSampleDict.keys():

        # Get experiment samples
        experimentColumns = timepointSampleDict[timepoint]

        # Run characteristic direction
        cdResults = r.runCharacteristicDirection(
            com.convert_to_r_dataframe(vstDataframe), experimentColumns,
            controlColumns, 0.1)

        # Convert to dataframe
        cdDataframe = com.convert_robj(cdResults).reset_index()

        # Add timepoint column
        cdDataframe['timepoint'] = timepoint

        # Append
        resultDataframe = pd.concat([resultDataframe, cdDataframe])

    # Pivot
    resultDataframeCast = resultDataframe.pivot(index='index',
                                                columns='timepoint',
                                                values='CD')

    # Save
    resultDataframeCast.to_csv(outfile, sep='\t', index_label='gene_symbol')
def imputation_loyer(year):

    erf = create_comparable_erf_data_frame(year)
    erf = erf[['logt', 'hnph2', 'iaat', 'mdiplo', 'mtybd', 'tu99_recoded', 'magtr', 'mcs8', 'deci', 'wprm', 'ident']]
    erf = erf.dropna(how = 'any')  # TODO : faire un check avant de dropper les lignes avec des NA

    Logt = create_comparable_logement_data_frame(year)

    Logt = Logt.dropna(how = 'any')

    allvars = ['logt', 'hnph2', 'iaat', 'mdiplo', 'mtybd', 'tu99_recoded', 'magtr', 'mcs8', 'deci']
    classes = ['magtr', 'tu99_recoded']
    matchvars = list(set(allvars) - set(classes))

    for variable in allvars:
        count_NA(variable, Logt)
        count_NA(variable, erf)

    erf['mcs8'] = erf['mcs8'].astype(int)

    rpy2.robjects.pandas2ri.activate()  # Permet à rpy2 de convertir les dataframes   padas2ri doesn't exist anymore in rpy2
#    com.convert_to_r_dataframe() TODO: Probablement à supprimer
    try:
        sm = importr("StatMatch")  # Launch R you need to have StatMatch installed in R
    except:
        sm = importr("StatMatch", lib_loc = STATMATCH_LIB_LOCATION)
    out_nnd = sm.NND_hotdeck(data_rec = erf,
                             data_don = Logt,
                             match_vars = vectors.StrVector(matchvars),
                             don_class = vectors.StrVector(classes),
                             dist_fun = "Gower",
                             )
    fill_erf_nnd = sm.create_fused(data_rec = erf,
                                   data_don = Logt,
                                   mtc_ids = out_nnd[0],
                                   z_vars = vectors.StrVector(["lmlm"]),
                                   )
    del allvars, matchvars, classes, out_nnd
    gc.collect()

    fill_erf_nnd = com.convert_robj(fill_erf_nnd)
    fill_erf_nnd = DataFrame(fill_erf_nnd)
    fill_erf_nnd.rename(columns={'lmlm': 'loym'}, inplace = True)

    loy_imput = fill_erf_nnd[['ident', 'loym']]

    erfmenm = load_temp(name = "menagem", year = year)

    for var in ["loym", "loym_x", "loym_y", "loym_z"]:
        if var in erfmenm:
            del erfmenm[var]
            log.info("{} have been deleted".format(var))

    erfmenm = erfmenm.merge(loy_imput, on='ident', how='left')
    assert 'loym' in erfmenm.columns, u"La variable loym n'est pas présente dans erfmenm"
    save_temp(erfmenm, name = "menagem", year=year)
Exemple #47
0
def convert_xts_to_df(o):
    """
        Will convert xts objects to DataFrame
    """
    dates = o.do_slot('index')
    dates = np.array(dates, dtype=np.dtype("M8[s]"))
    res = robjects.default_ri2py(o)
    df = rcom.convert_robj(res)
    df.index = dates
    return df
Exemple #48
0
def sav_to_pandas_rpy2(inputfile):
    """
    :param inputfile: string

    :return:
    """
    import pandas.rpy.common as com

    w = com.robj.r('foreign::read.spss("%s", to.data.frame=TRUE)' % inputfile)
    return com.convert_robj(w)
 def load_cv(self, path):
     set_wd_str = 'setwd("{0}")'.format(os.getcwd())
     ro.r(set_wd_str)
     ro.r('load("{0}")'.format(path))
     self.rf_cv = ro.r["trained.models"]
     if new_pandas_flag:
         #self.cv_folds = pandas2ri.ri2py(ro.r["cvFoldDf"])
         self.cv_folds = ro.r["cvFoldDf"]
     else:
         self.cv_folds = com.convert_robj(ro.r["cvFoldDf"])
Exemple #50
0
def convert_xts_to_df(o):
    """
        Will convert xts objects to DataFrame
    """
    dates = o.do_slot('index')
    dates = np.array(dates, dtype=np.dtype("M8[s]"))
    res = robjects.default_ri2py(o)
    df = rcom.convert_robj(res)
    df.index = dates
    return df
Exemple #51
0
def sav_to_pandas_rpy2(input_file):
    """
    SPSS .sav files to Pandas DataFrame through Rpy2

    :param input_file: string

    :return:
    """
    import pandas.rpy.common as com

    w = com.robj.r('foreign::read.spss("%s", to.data.frame=TRUE)' % input_file)
    return com.convert_robj(w)
Exemple #52
0
def draw_survival_curves_mpl(fit, ax=None, title=None, colors=None, ms=80, alpha=1):
    """
    Takes an R survfit.
    """
    if ax is None:
        _, ax = plt.subplots(1, 1, figsize=(4, 3))
    s = base.summary(fit)
    tab = pd.DataFrame({v: s.rx2(v) for v in s.names 
                                    if len(s.rx2(v)) == len(s.rx2('time'))},
                       index=s.rx2('time'))
    call = com.convert_robj(fit.rx2('call')[2])
    
    groups = robjects.r.sort(robjects.r.c(*call.feature.unique()))
    
    if 'strata' not in tab:
        groups = [0]
        tab['strata'] = 1
    elif len(tab.strata.unique()) != len(groups):
        gg = list(call[call.event > 0].feature.unique())
        gg = [g for g in groups if g in gg]
        bg = [g for g in groups if g not in gg]
        groups = gg + bg
           
    for i, group in enumerate(groups):
        censoring = call[(call.event == 0) & (call.feature == group)].days
        surv = tab[tab.strata == (i + 1)].surv
        surv = surv.copy().set_value(0., 1.)
        surv = surv.sort_index()
        if surv.index[-1] < censoring.max():
            surv = surv.set_value(censoring.max(), surv.iget(-1)).sort_index()

        censoring_pos = get_markers(censoring, surv)
        ax.step(surv.index, surv, lw=3, where='post', alpha=alpha, label=group)
        if colors is not None:
            try:
                """fix for R-Python str-to-int conversion"""
                color = colors[group]
            except:
                color = colors[i]
            ax.lines[-1].set_color(color)
        if len(censoring_pos) > 0:
            ax.scatter(*zip(*censoring_pos), marker='|', s=ms,
                       color=ax.lines[-1].get_color())
        
    ax.set_ylim(0, 1.05)
    # ax.set_xlim(0, max(surv.index)*1.05)
    ax.set_xlim(0, max(call.days) * 1.05)
    ax.legend(loc='best')
    ax.set_ylabel('Survival')
    ax.set_xlabel('Years')
    if title:
        ax.set_title(title)
Exemple #53
0
def unpack_r_results_list(res_list):
    """Unpacks the results list to a tuple (net_benefit, interventions_avoided)
    for comparison with the results of Python

    Transforms the results of the R analysis into the pandas DataFrame format and
    indexing returned by the Python algorithm

    Parameters
    ----------
    res_list : rpy2.robject
       a list of results from an R analysis (returned by the R dca function)

    Returns
    -------
    tuple(pd.DataFrame, pd.DataFrame)
        (net_benefit, interventions_avoided) -- same result as Python analysis
    """
    r_nb = pdcom.convert_robj(res_list.rx('net.benefit'))
    r_nb = r_nb['net.benefit']  #unpack dataFrame from dict
    r_ia = pdcom.convert_robj(res_list.rx('interventions.avoided'))
    r_ia = r_ia['interventions.avoided']
    return r_nb, r_ia
Exemple #54
0
def runVoom(infile, outfile):

    # Read expression dataframe
    rawcountDataframe = pd.read_table(infile, index_col='gene_symbol')

    # Run function
    voomMatrix = r.runVoom(com.convert_to_r_dataframe(rawcountDataframe))

    # Convert to dataframe
    voomDataframe = com.convert_robj(voomMatrix)

    # Write file
    voomDataframe.to_csv(outfile, sep='\t', index_label='gene_symbol')
Exemple #55
0
def get_surv_fit(surv,
                 feature=None,
                 covariates=None,
                 interactions=None,
                 formula=None,
                 time_cutoff=5):
    df, factors = process_covariates(surv, feature, covariates)
    if formula is None:
        fmla = get_formula(factors, interactions)
        fmla = robjects.Formula(fmla)
    else:
        fmla = robjects.Formula(formula)

    s = survival.survfit(fmla, df)
    summary = base.summary(s, times=robjects.r.c(time_cutoff))
    res = convert_robj(summary.rx2('table'))

    if type(res) == list:
        r = summary.rx2('table')
        r = pd.Series(r, r.names)
        res = pd.DataFrame({'feature=all': r}).T

    res = res.rename(index=lambda idx: idx.split('=')[1])
    res = res[['records', 'events', 'median', '0.95LCL', '0.95UCL']]
    res.columns = pd.MultiIndex.from_tuples([('Stats', '# Patients'),
                                             ('Stats', '# Events'),
                                             ('Median Survival', 'Median'),
                                             ('Median Survival', 'Lower'),
                                             ('Median Survival', 'Upper')])
    if feature is None:
        for f in ['surv', 'lower', 'upper']:
            res[(str(time_cutoff) + 'y Survival',
                 f.capitalize())] = summary.rx2(f)
    else:
        idx = map(lambda s: s.replace('feature=', ''),
                  summary.rx2('strata').iter_labels())

        df = pd.DataFrame(
            {
                d: list(summary.rx2(d))
                for d in ['strata', 'surv', 'lower', 'upper']
            },
            index=idx)
        for f in ['surv', 'lower', 'upper']:
            res[(str(time_cutoff) + 'y Survival', f.capitalize())] = df[f]

    try:
        res.index = map(int, res.index)
    except:
        pass
    return res
Exemple #56
0
    def ccaPermute(self,
                   nPerms=25,
                   penaltyXs=None,
                   penaltyZs=None,
                   controlsWithFeats=False):
        (groups, allOutcomes,
         controls) = self.outcomeGetter.getGroupsAndOutcomes()
        # groups: set(group_ids)
        # allOutcomes: {outcome: {group_id: value}}
        # controls: {control: {group_id: value}}

        (groupNorms, featureNames
         ) = self.featureGetter.getGroupNormsWithZerosFeatsFirst(groups)

        Zdict = allOutcomes
        Xdict = groupNorms

        if controlsWithFeats:
            print("Appending controls to X")
            Xdict.update(controls)
        else:
            print("Appending controls to Z")
            Zdict.update(controls)

        # TO DO: get topic frequencies?

        # groupNorms: {feat: {group_id: group_norm}}
        # featureNames: list of possible feature names

        # X contains feature group_norms, Z contains outcome values
        X, Z, Xfreqs, Zfreqs = self.prepMatrices(pd.DataFrame(data=Xdict),
                                                 pd.DataFrame(data=Zdict))

        try:
            X = com.convert_to_r_dataframe(X)
            Z = com.convert_to_r_dataframe(Z)
            Ngroups = com.convert_robj(ro.r["nrow"](X)[0])
        except NameError:
            warn("pandas.rpy.common cannot be imported")
            sys.exit(1)

        kwParams = {"nperms": nPerms}
        kwParams[
            'penaltyxs'] = penaltyXs if penaltyXs else ro.vectors.FloatVector(
                np.arange(.1, .91, .05))
        kwParams[
            'penaltyzs'] = penaltyZs if penaltyZs else ro.vectors.FloatVector(
                np.arange(.1, .91, .05))

        self._ccaPermute(X, Z, **kwParams)
    def predict_proba(self, xtest):
        """Predicts the probability for each class.

        Parameters
        ----------
        xtest : pd.DataFrame
            features for test set
        """
        r_xtest = com.convert_to_r_dataframe(xtest)
        #r_xtest = pandas2ri.ri2py(xtest)
        pred_prob = self.rf_pred_prob(self.rf, r_xtest)
        py_pred_prob = com.convert_robj(pred_prob)
        #py_pred_prob = pandas2ri.ri2py(pred_prob)
        return py_pred_prob.values