예제 #1
0
def parse_limma_result(r_fit):

    ans = {}

    # Convert to pandas
    for col in [
            'coefficients', 'cov.coefficients', 'stdev.unscaled', 't',
            'p.value', 'lods'
    ]:
        ans[col] = to_dataframe(r_dollar(r_fit, col))

    fit_df = {}

    ans['var.prior'] = pd.Series(numpy2ri.rpy2py(r_dollar(r_fit, 'var.prior')),
                                 index=ans['coefficients'].columns)

    # Additionally convert numpy arrays to pandas series
    for col in [
            'df.prior', 'df.residual', 'sigma', 'Amean', 'df.total', 'F',
            'F.p.value', 's2.post'
    ]:
        np_array = numpy2ri.rpy2py(r_dollar(r_fit, col))
        fit_df[col] = pd.Series(np_array, index=ans['coefficients'].index)

    # These ones need some extra nudge
    for col in ['rank', 'method', 's2.prior', 'proportion']:
        np_array = numpy2ri.rpy2py(r_dollar(r_fit, col))
        # These are only one number for whole dataset
        assert len(np_array) == 1
        fit_df[col] = pd.Series(np_array[0], index=ans['coefficients'].index)

    fit_df = pd.DataFrame(fit_df)
    ans['fit'] = fit_df

    return ans
예제 #2
0
def _rpy2py(X):
    import rpy2.robjects as ro
    from rpy2.robjects import numpy2ri

    try:
        return numpy2ri.rpy2py(X)
    except NotImplementedError:
        return ro.conversion.rpy2py(X)
예제 #3
0
파일: r2py.py 프로젝트: ivirshup/anndata2ri
def rpy2py_single_cell_experiment(obj: SexpS4) -> AnnData:
    with localconverter(default_converter):
        s4v = importr("S4Vectors")
        se = importr("SummarizedExperiment")
        sce = importr("SingleCellExperiment")

        assay_names = se.assayNames(obj)
        if not isinstance(assay_names, NULLType):
            assay_names = [str(a) for a in se.assayNames(obj)]
            # The assays can be stored in an env or elsewise so we don’t use obj.slots['assays']
            assays = [
                numpy2ri.rpy2py(assay).T
                for assay in (se.assay(obj, n) for n in assay_names)
            ]
            # There’s SingleCellExperiment with no assays
            exprs, layers = assays[0], dict(zip(assay_names[1:], assays[1:]))
            assert len(exprs.shape) == 2, exprs.shape
        else:
            exprs, layers = None, {}

        rdim_names = sce.reducedDimNames(obj)
        if not isinstance(rdim_names, NULLType):
            rdim_names = [str(t) for t in rdim_names]
            reduced_dims = [
                numpy2ri.rpy2py(rd)
                for rd in (sce.reducedDim(obj, t) for t in rdim_names)
            ]
            obsm = {
                conv_name.sce2scanpy(n): d
                for n, d in zip(rdim_names, reduced_dims)
            }
        else:
            obsm = None

        col_data = se.colData(obj)
        row_data = se.rowData(obj)
        metadata = s4v.metadata(obj)

    obs = rpy2py_data_frame(col_data)
    var = rpy2py_data_frame(row_data)
    # The whole shebang: configured converter, numpy, pandas and ours
    with localconverter(full_converter()):
        uns = dict(metadata.items())

    return AnnData(exprs, obs, var, uns, obsm, layers=layers)
예제 #4
0
def rpy2py_intvector(obj):
    # special case for factors
    if 'factor' in obj.rclass:
        res = pandas.Categorical.from_codes(numpy.asarray(obj) - 1,
                                            categories = obj.do_slot('levels'),
                                            ordered = 'ordered' in obj.rclass)
    else:
        res = numpy2ri.rpy2py(obj)
    return res
예제 #5
0
파일: pandas2ri.py 프로젝트: rs2/rpy2
def rpy2py_intvector(obj):
    # special case for factors
    if 'factor' in obj.rclass:
        codes = [x - 1 if x > 0 else -1 for x in numpy.array(obj)]
        res = pandas.Categorical.from_codes(codes,
                                            categories=list(
                                                obj.do_slot('levels')),
                                            ordered='ordered' in obj.rclass)
    else:
        res = numpy2ri.rpy2py(obj)
    return res
예제 #6
0
def test_cpm_normalization():
    given = np.array([
        [5, 4, 3],
        [2, 1, 4],
        [3, 4, 6],
        [4, 2, 8],
    ])
    from rpy2.robjects import numpy2ri
    from rpy2.robjects.packages import importr
    edgeR = importr('edgeR')
    expectation = numpy2ri.rpy2py(edgeR.cpm(numpy2ri.py2rpy(given)))
    assert np.allclose(cpm_normalize(given), expectation, atol=1e-2)
예제 #7
0
def rpy2py_floatvector(obj):
    # special case for POSIXct date objects
    if 'POSIXct' in obj.rclass:
        tzone_name = obj.do_slot('tzone')[0]
        if tzone_name == '':
            # R is implicitly using the local timezone, while Python time libraries
            # will assume UTC.
            tzone = get_timezone()
        else:
            tzone = pytz.timezone(tzone_name)
        foo = (tzone.localize(datetime.fromtimestamp(x)) for x in obj)
        res = pandas.to_datetime(tuple(foo))
    else:
        res = numpy2ri.rpy2py(obj)
    return res
예제 #8
0
파일: pandas2ri.py 프로젝트: rs2/rpy2
def rpy2py_floatvector(obj):
    # special case for POSIXct date objects
    if 'POSIXct' in obj.rclass:
        try:
            tzone_name = obj.do_slot('tzone')[0]
        except LookupError:
            warnings.warn('R object inheriting from "POSIXct" but without '
                          'attribute "tzone".')
            tzone_name = ''
        if tzone_name == '':
            # R is implicitly using the local timezone, while Python
            # time libraries will assume UTC.
            tzone = get_timezone()
        else:
            tzone = pytz.timezone(tzone_name)
        foo = (tzone.localize(datetime.fromtimestamp(x)) for x in obj)
        res = pandas.to_datetime(tuple(foo))
    else:
        res = numpy2ri.rpy2py(obj)
    return res
예제 #9
0
파일: pandas2ri.py 프로젝트: rs2/rpy2
def ri2py_vector(obj):
    res = numpy2ri.rpy2py(obj)
    return res
예제 #10
0
 def test_atomic_vector_to_numpy(self):
     v = robjects.vectors.IntVector((1, 2, 3))
     a = rpyn.rpy2py(v)
     assert isinstance(a, numpy.ndarray)
     assert v[0] == 1
예제 #11
0
def rpy2py_floatvector(obj):
    if POSIXct.isrinstance(obj):
        return rpy2py(POSIXct(obj))
    else:
        return numpy2ri.rpy2py(obj)
예제 #12
0
def rpy2py_floatvector(obj):
    return numpy2ri.rpy2py(obj)
예제 #13
0
    def on_click(self, event):
        """
        Event handler
        """
        # Event does not apply for time series plot
        # Check if the click was in a
        if event.inaxes not in [self.left_p, self.right_p]:
            return

        # Clear subplots
        self.observed.clear()
        self.trend.clear()
        self.seasonal.clear()
        self.resid.clear()
        self.climatology.clear()

        # Delete last reference point
        if len(self.left_p.lines) > 0:
            del self.left_p.lines[0]
            del self.right_p.lines[0]

        # Draw a point as a reference
        self.left_p.plot(event.xdata, event.ydata,
                marker='o', color='red', markersize=7, alpha=0.7)
        self.right_p.plot(event.xdata, event.ydata,
                marker='o', color='red', markersize=7, alpha=0.7)

        # Non-masked data
        left_plot_sd = self.left_ds.sel(longitude=event.xdata,
                                        latitude=event.ydata,
                                        method='nearest')
        # Sinlge year dataset
        single_year_ds = self.single_year_ds.sel(longitude=event.xdata,
                                                 latitude=event.ydata,
                                                 method='nearest')

        if left_plot_sd.chunks is not None:
            left_plot_sd = left_plot_sd.compute()
            single_year_ds = single_year_ds.compute()


        # Seasonal decompose
        ts_df = left_plot_sd.to_dataframe()
        self.seasonal_decompose = seasonal_decompose(
                ts_df[self.data_vars.value],
                model=self.model.value,
                freq=self.single_year_ds.shape[0],
                extrapolate_trend='freq')

        # Plot seasonal decompose
        self.observed.plot(self.seasonal_decompose.observed.index,
                self.seasonal_decompose.observed.values,
                label='Observed')

        # MK test
        _mk_test = mk_test(self.seasonal_decompose.trend)
        self.trend.plot(self.seasonal_decompose.trend.index,
                self.seasonal_decompose.trend.values,
                label=f'Trend {_mk_test}')

        # Set the same y limits from observed data
        self.trend.set_ylim(self.observed.get_ylim())

        self.seasonal.plot(self.seasonal_decompose.seasonal.index,
                self.seasonal_decompose.seasonal.values,
                label='Seasonality')
        self.resid.plot(self.seasonal_decompose.resid.index,
                self.seasonal_decompose.resid.values,
                label='Residuals')

        # Climatology
        sbn.boxplot(ts_df.index.dayofyear,
                ts_df[self.data_vars.value], ax=self.climatology)
        # Plot year to analyse
        single_year_df = single_year_ds.to_dataframe()
        sbn.stripplot(single_year_df.index.dayofyear,
                single_year_df[self.data_vars.value],
                color='red', marker='o', size=7, alpha=0.7,
                ax=self.climatology)

        self.climatology.tick_params(axis='x', rotation=70)

        # Change point
        r_vector = FloatVector(self.seasonal_decompose.trend.values)
        #changepoint_r = self.cpt.cpt_mean(r_vector)
        #changepoints_r = self.cpt.cpt_var(r_vector, method='PELT',
        #        penalty='Manual', pen_value='2*log(n)')
        changepoints_r = self.cpt.cpt_meanvar(r_vector,
                test_stat='Normal', method='BinSeg', penalty="SIC")
        changepoints = numpy2ri.rpy2py(self.cpt.cpts(changepoints_r))

        if changepoints.shape[0] > 0:
            # Plot vertical line where the changepoint was found
            for i, i_cpt in enumerate(changepoints):
                i_cpt = int(i_cpt) + 1
                cpt_index = self.seasonal_decompose.trend.index[i_cpt]
                if i == 0 :
                    self.trend.axvline(cpt_index, color='black',
                            lw='1.0', label='Change point')
                else:
                    self.trend.axvline(cpt_index, color='black', lw='1.0')

        # Legend
        self.observed.legend(loc='best', fontsize='small',
                fancybox=True, framealpha=0.5)
        self.observed.set_title('Time series decomposition')
        self.trend.legend(loc='best', fontsize='small',
                fancybox=True, framealpha=0.5)
        self.seasonal.legend(loc='best', fontsize='small',
                fancybox=True, framealpha=0.5)
        self.resid.legend(loc='best', fontsize='small',
                fancybox=True, framealpha=0.5)
        #self.climatology.legend([self.years.value], loc='best',
        self.climatology.legend(loc='best',
                fontsize='small', fancybox=True, framealpha=0.5)
        self.climatology.set_title('Climatology')

        # Grid
        self.observed.grid(axis='both', alpha=.3)
        self.trend.grid(axis='both', alpha=.3)
        self.seasonal.grid(axis='both', alpha=.3)
        self.resid.grid(axis='both', alpha=.3)
        self.climatology.grid(axis='both', alpha=.3)

        # Redraw plot
        plt.draw()
예제 #14
0
def glove_main():
    # Load data from nlp parsing
    with open('{}/articles-with-equations.json'.format(settings.data_dir),
              'r',
              encoding='utf-8') as jf:
        src_data = json.load(jf)

    texts = [
        src_data[art]['text'] for art in src_data
        if src_data[art]['text'] is not None
    ]

    # The "unidecode" step simplifies non-ASCII chars which
    # mess up the R GloVe engine.
    texts_df = pd.Series(texts).apply(lambda x: unidecode(x))
    texts_df = pd.DataFrame({'text': texts_df})

    # Source all the functions contained in the 'trainEmbeddings' R file
    r("source('{}/trainEmbeddings.R'.format('src/data'))")

    # Call the main GloVe-embedding function from the R script
    trainEmbeddings_R = r("trainEmbeddings")

    # Train domain-specific GloVe embedding model and ouput as a Numpy Matrix
    pandas2ri.activate()
    DS_embeddings_R = trainEmbeddings_R(texts_df)
    pandas2ri.deactivate()

    DS_embeddings = numpy2ri.rpy2py(DS_embeddings_R[0])

    # Get domain-specific GloVe vocabulary
    domain_spec_vocab = list(DS_embeddings_R[1])

    # Load in Stanford's 'Common Crawl' domain-general Glove Embedding Model
    # Only pull out the words that are contained in our corpus
    # * This can take a while (~30min) - could use some optimization *
    DG_embeddings = loadGloveModel(
        '{}/glove.42B.300d.txt'.format(settings.data_dir), domain_spec_vocab)

    # Processing to ensure rows match between the domain-general and
    # domain-specific embeddings
    # Convert domain-general embedding from dictionary to array
    domain_gen_vocab = np.array(
        [DG_embeddings[i] for i in DG_embeddings.keys()])

    # Find the indices of matching words
    both = set(domain_gen_vocab).intersection(domain_spec_vocab)
    indices_gen = [domain_gen_vocab.index(x) for x in both]
    indices_spec = [domain_spec_vocab.index(x) for x in both]
    indices_spec_notDG = [
        domain_spec_vocab.index(x) for x in domain_spec_vocab if x not in both
    ]

    # Sort and subset domain-specific array to match indices of domain-general
    # array
    DS_embeddings_subset = DS_embeddings[indices_spec, :].copy()
    DG_embeddings_subset = DG_embeddings[indices_gen, :].copy()

    # fit cca model
    cca_res, DA_embeddings = domain_adapted_CCA(DG_embeddings_subset,
                                                DS_embeddings_subset,
                                                NC=100)

    DS_embeddings_notinDG = DS_embeddings[indices_spec_notDG, :]
    DS_embeddings_notinDG_norm = zscore(DS_embeddings_notinDG)

    DA_notinDG_embeddings = cca_res.y_weights_.T @ DS_embeddings_notinDG_norm.T
    DA_embeddings_final = np.append(DA_embeddings,
                                    DA_notinDG_embeddings.T,
                                    axis=0)

    # write data to disk
    np.savetxt('{}/da_embeddings.txt'.format(settings.models_dir),
               DA_embeddings_final,
               fmt='%d')
예제 #15
0
    def on_click(self, event):
        """
        Event handler
        """
        # Event does not apply for time series plot
        if event.inaxes not in [self.left_p, self.right_p]:
            return

        # Clear subplots
        self.observed.clear()
        self.trend_p.clear()
        self.seasonal_p.clear()
        self.resid_p.clear()
        self.climatology.clear()

        # Delete last reference point
        if len(self.left_p.lines) > 0:
            del self.left_p.lines[0]
            del self.right_p.lines[0]

        # Draw a point as a reference
        self.left_p.plot(event.xdata, event.ydata,
                marker='o', color='red', markersize=7, alpha=0.7)
        self.right_p.plot(event.xdata, event.ydata,
                marker='o', color='red', markersize=7, alpha=0.7)

        # Non-masked data
        left_plot_sd = self.left_ds.sel(longitude=event.xdata,
                                        latitude=event.ydata,
                                        method='nearest')
        # Sinlge year dataset
        single_year_ds = self.single_year_ds.sel(longitude=event.xdata,
                                                 latitude=event.ydata,
                                                 method='nearest')

        if left_plot_sd.chunks is not None:
            left_plot_sd = left_plot_sd.compute()
            single_year_ds = single_year_ds.compute()

        ts_df = left_plot_sd.to_dataframe()

        # Mann-Kendall test
        _mk_test = mk_test(left_plot_sd.data, _round=3)

        # Observations + peaks and valleys
        self.observed.plot(left_plot_sd.time, left_plot_sd.data,
                label=f'Observed {_mk_test}')

        distance = int(np.ceil(len(self.single_year_ds.time) / 4))
        peaks, _ = find_peaks(left_plot_sd.data, distance=distance)
        self.observed.plot(left_plot_sd.time[peaks],
                left_plot_sd[peaks],
                label=f'Peaks [{peaks.shape[0]}]',
                marker='x', color='C1', alpha=0.3)

        valleys, _ = find_peaks(left_plot_sd.data*(-1), distance=distance)
        self.observed.plot(left_plot_sd.time[valleys],
                left_plot_sd[valleys],
                label=f'Valleys, [{valleys.shape[0]}]',
                marker='x', color='C2', alpha=0.3)

        # Seasonal decompose
        period = int(self.bandwidth.currentText())
        nobs = len(left_plot_sd)

        # TODO interpolate and extrapolate trend
        trend = left_plot_sd.rolling(time=period, min_periods=1,
                center=True).mean()
        self.trend_p.plot(left_plot_sd.time.data, trend.data,
                label=f'Trend (window = {period})')

        period_averages = left_plot_sd.groupby("time.dayofyear").mean()

        if self.model.currentText()[0] == 'a':
            period_averages -= period_averages.mean(axis=0)
            seasonal = np.tile(period_averages.T, nobs // period + 1).T[:nobs]
            resid = (left_plot_sd - trend) - seasonal
        else:
            period_averages /= period_averages.mean(axis=0)
            seasonal = np.tile(period_averages.T, nobs // period + 1).T[:nobs]
            resid = left_plot_sd / seasonal / trend

        self.seasonal_p.plot(left_plot_sd.time.data, seasonal,
                label='Seasonality')
        self.resid_p.plot(left_plot_sd.time.data, resid.data,
                label='Residuals')

        # Set the same y limits from observed data
        self.trend_p.set_ylim(self.observed.get_ylim())

        # Climatology
        sbn.boxplot(ts_df.index.dayofyear,
                ts_df[self.data_vars.currentText()], ax=self.climatology)
        # Plot year to analyse
        single_year_df = single_year_ds.to_dataframe()
        sbn.stripplot(x=single_year_df.index.dayofyear,
                y=single_year_df[self.data_vars.currentText()],
                color='red', marker='o', size=7, alpha=0.7,
                ax=self.climatology)

        self.climatology.tick_params(axis='x', rotation=70)

        # Change point
        r_vector = FloatVector(trend.data)
        #changepoint_r = self.cpt.cpt_mean(r_vector)
        #changepoints_r = self.cpt.cpt_var(r_vector, method='PELT',
        #        penalty='Manual', pen_value='2*log(n)')
        changepoints_r = self.cpt.cpt_meanvar(r_vector,
                test_stat='Normal', method='BinSeg', penalty="SIC")
        changepoints = numpy2ri.rpy2py(self.cpt.cpts(changepoints_r))

        if changepoints.shape[0] > 0:
            # Plot vertical line where the changepoint was found
            for i, i_cpt in enumerate(changepoints):
                i_cpt = int(i_cpt) + 1
                cpt_index = self.seasonal_decompose.trend.index[i_cpt]
                if i == 0 :
                    self.trend_p.axvline(cpt_index, color='black',
                            lw='1.0', label='Change point')
                else:
                    self.trend_p.axvline(cpt_index, color='black', lw='1.0')

        # Legend
        self.observed.legend(loc='best', fontsize='small',
                fancybox=True, framealpha=0.5)
        self.observed.set_title('Time series decomposition')
        self.trend_p.legend(loc='best', fontsize='small',
                fancybox=True, framealpha=0.5)
        self.seasonal_p.legend(loc='best', fontsize='small',
                fancybox=True, framealpha=0.5)
        self.resid_p.legend(loc='best', fontsize='small',
                fancybox=True, framealpha=0.5)

        #self.climatology.legend([self.years.value], loc='best',
        self.climatology.legend(loc='best',
                fontsize='small', fancybox=True, framealpha=0.5)

        self.climatology.set_title('Climatology')

        # Grid
        self.observed.grid(axis='both', alpha=.3)
        self.trend_p.grid(axis='both', alpha=.3)
        self.seasonal_p.grid(axis='both', alpha=.3)
        self.resid_p.grid(axis='both', alpha=.3)
        self.climatology.grid(axis='both', alpha=.3)

        # Redraw plot
        plt.draw()
예제 #16
0
    def on_pbCPD_click(self):
        """
        Compute change points on the detrended time series
        """
        # Wait cursor
        QtWidgets.QApplication.setOverrideCursor(Qt.WaitCursor)

        msg = f"Identifying change points..."
        self.progressBar.setEnabled(True)
        self.progressBar.setFormat(msg)
        self.progressBar.setValue(1)

        # Compute first the trend
        period = int(self.bandwidth.currentText())
        nobs = len(self.left_ds)

        # Get data type
        dtype = self.left_ds.dtype

        # Get trend based on a moving window
        trend = self.left_ds.rolling(time=period, min_periods=1,
                center=True).mean().astype(dtype)
        trend = trend.compute()
        trend.attrs = self.left_ds.attrs

        # Output data
        output = xr.zeros_like(trend).astype(np.int16).load()
        output.attrs = self.left_ds.attrs

        layers, rows, cols = trend.shape

        for x in range(cols):
            self.progressBar.setValue(int((x / cols) * 100))
            for y in range(rows):
                _data = trend[:,y,x]
                r_vector = FloatVector(_data)

                #changepoint_r = self.cpt.cpt_mean(r_vector)
                #changepoints_r = self.cpt.cpt_var(r_vector, method='PELT',
                #        penalty='Manual', pen_value='2*log(n)')

                # CPD methods
                _method = 'BinSeg'
                _penalty = 'SIC'

                changepoints_r = self.cpt.cpt_meanvar(r_vector,
                        test_stat='Normal', method=_method, penalty=_penalty)

                changepoints = numpy2ri.rpy2py(
                        self.cpt.cpts(changepoints_r)).astype(int)

                if changepoints.shape[0] > 0:
                    output[changepoints+1, y, x] = True

        fname = (f'{os.path.splitext(self.fname)[0]}'
                     f'_change_points.tif')

        msg = f"Saving change points..."
        self.progressBar.setFormat(msg)
        self.progressBar.setValue(1)

        save_dask_array(fname=fname, data=output,
                data_var=self.data_vars.currentText(), method=None,
                n_workers=4, progressBar=self.progressBar)

        self.progressBar.setValue(0)
        self.progressBar.setEnabled(False)

        # Standard cursor
        QtWidgets.QApplication.restoreOverrideCursor()
예제 #17
0
파일: pandas2ri.py 프로젝트: rs2/rpy2
def rpy2py_listvector(obj):
    if 'data.frame' in obj.rclass:
        res = rpy2py(DataFrame(obj))
    else:
        res = numpy2ri.rpy2py(obj)
    return res
예제 #18
0
def to_series(r_vector, name=None):
    index = numpy2ri.rpy2py(r_vector.names)
    values = numpy2ri.rpy2py(r_vector)

    return pd.Series(values, index=index, name=name)