Beispiel #1
0
def r_convert_pandas_dataframe(df: pd.DataFrame) -> RDataFrame:
    """
    Pandas dataframe to R dataframe conversion.

    See:
        http://chris.friedline.net/2015-12-15-rutgers/lessons/python2/03-data-types-and-format.html

    TODO/FIXME: Error when a dataframe column has NA elements.
    """
    pd_names = [str(header) for header in list(df)]
    pd_types = [str(dtype) for dtype in df.dtypes]
    elements = {}

    for column_name, column_type in zip(pd_names, pd_types):
        if column_type == 'int64':
            elements[column_name] = RIntVector(df[column_name])
        elif column_type == 'float64':
            elements[column_name] = RFloatVector(df[column_name])
        elif column_type == 'object':
            elements[column_name] = RFactorVector(df[column_name])
        elif column_type == 'datetime64' or column_type == 'timedelta[ns]':
            raise NotImplementedError(
                'Date values are not currently implemented')
        else:
            msg = ' '.join(
                ['Given column_type is not recognized', column_type])
            raise TypeError(msg)

    return RDataFrame(elements)
Beispiel #2
0
    def _extract_mapping(self, cimpl_obj, cis_sites):
        # Convert CIS sites to frame format.
        cis_frame = CisSite.to_frame(cis_sites)

        # Convert to R representation for cimpl.
        chr_with_prefix = add_prefix(cis_frame['chromosome'], prefix='chr')

        r_base = importr('base')
        cis_frame_r = RDataFrame({
            'id':
            r_base.I(StrVector(cis_frame['id'])),
            'chromosome':
            r_base.I(StrVector(chr_with_prefix)),
            'scale':
            StrVector(cis_frame['scale']),
            'start':
            IntVector(cis_frame['start']),
            'end':
            IntVector(cis_frame['end'])
        })
        cis_frame_r.rownames = StrVector(cis_frame['id'])

        # Retrieve cis matrix from cimpl.
        cis_matrix_r = self._cimpl.getCISMatrix(cimpl_obj, cis_frame_r)
        cis_matrix = dataframe_to_pandas(cis_matrix_r)

        # Extract scale information from cis matrix.
        scale_cols = [c for c in cis_matrix.columns if c.startswith('X')]
        cis_matrix_scales = cis_matrix[['id'] + scale_cols]

        # Melt matrix into long format.
        mapping = pd.melt(cis_matrix_scales, id_vars=['id'])
        mapping = mapping[['id', 'value']]
        mapping = mapping.rename(columns={
            'id': 'insertion_id',
            'value': 'cis_id'
        })

        # Split cis_id column into individual entries (for entries
        # with multiple ids). Then drop any empty rows, as these
        # entries are empty cells in the matrix.
        mapping = mapping.ix[mapping['cis_id'] != '']
        mapping = expand_column(mapping, col='cis_id', delimiter='|')

        mapping_dict = {
            ins_id: set(grp['cis_id'])
            for ins_id, grp in mapping.groupby('insertion_id')
        }

        return mapping_dict
    def _python_params_to_r_objects(self, r_package=None):
        """Converts python objects to the appropriate R objects."""

        # get a ref to base R namespace
        Rbase = r_package

        # convert the pandas.DataFrame to an R dataframe
        r_df_ = RDataFrame(self.df)

        # convert the model params to strings in R
        r_model_params_ = {
            "path_feature":
            Rbase.toString(self.path_feature),
            "conversion_feature":
            Rbase.toString(self.conversion_feature),
            "conversion_value_feature":
            Rbase.toString(self.conversion_value_feature)
            if self.conversion_value_feature is not None else NULL,
            "null_path_feature":
            Rbase.toString(self.null_path_feature)
            if self.null_path_feature is not None else NULL,
            "separator":
            Rbase.toString(self.separator),
            "order":
            Rbase.as_double(self.order),
            "n_simulations":
            Rbase.as_double(self.n_simulations)
            if self.n_simulations is not None else NULL,
            "max_step":
            Rbase.as_double(self.max_step)
            if self.max_step is not None else NULL,
            "return_transition_probs":
            Rbase.as_logical(self.return_transition_probs)
            if self.return_transition_probs is not None else NULL,
            "random_state":
            Rbase.as_double(self.random_state)
            if self.random_state != None else NULL
        }

        return r_df_, r_model_params_
Beispiel #4
0
def py2ro_pandasdataframe(obj):
    ri_dataf = conversion.py2ri(obj)
    # cast down to an R list (goes through a different code path
    # in the DataFrame constructor, avoiding `str(k)`)
    ri_list = rinterface.SexpVector(ri_dataf)
    return RDataFrame(ri_list)