def r_convert_pandas_dataframe(df: pd.DataFrame) -> RDataFrame: """ Pandas dataframe to R dataframe conversion. See: http://chris.friedline.net/2015-12-15-rutgers/lessons/python2/03-data-types-and-format.html TODO/FIXME: Error when a dataframe column has NA elements. """ pd_names = [str(header) for header in list(df)] pd_types = [str(dtype) for dtype in df.dtypes] elements = {} for column_name, column_type in zip(pd_names, pd_types): if column_type == 'int64': elements[column_name] = RIntVector(df[column_name]) elif column_type == 'float64': elements[column_name] = RFloatVector(df[column_name]) elif column_type == 'object': elements[column_name] = RFactorVector(df[column_name]) elif column_type == 'datetime64' or column_type == 'timedelta[ns]': raise NotImplementedError( 'Date values are not currently implemented') else: msg = ' '.join( ['Given column_type is not recognized', column_type]) raise TypeError(msg) return RDataFrame(elements)
def _extract_mapping(self, cimpl_obj, cis_sites): # Convert CIS sites to frame format. cis_frame = CisSite.to_frame(cis_sites) # Convert to R representation for cimpl. chr_with_prefix = add_prefix(cis_frame['chromosome'], prefix='chr') r_base = importr('base') cis_frame_r = RDataFrame({ 'id': r_base.I(StrVector(cis_frame['id'])), 'chromosome': r_base.I(StrVector(chr_with_prefix)), 'scale': StrVector(cis_frame['scale']), 'start': IntVector(cis_frame['start']), 'end': IntVector(cis_frame['end']) }) cis_frame_r.rownames = StrVector(cis_frame['id']) # Retrieve cis matrix from cimpl. cis_matrix_r = self._cimpl.getCISMatrix(cimpl_obj, cis_frame_r) cis_matrix = dataframe_to_pandas(cis_matrix_r) # Extract scale information from cis matrix. scale_cols = [c for c in cis_matrix.columns if c.startswith('X')] cis_matrix_scales = cis_matrix[['id'] + scale_cols] # Melt matrix into long format. mapping = pd.melt(cis_matrix_scales, id_vars=['id']) mapping = mapping[['id', 'value']] mapping = mapping.rename(columns={ 'id': 'insertion_id', 'value': 'cis_id' }) # Split cis_id column into individual entries (for entries # with multiple ids). Then drop any empty rows, as these # entries are empty cells in the matrix. mapping = mapping.ix[mapping['cis_id'] != ''] mapping = expand_column(mapping, col='cis_id', delimiter='|') mapping_dict = { ins_id: set(grp['cis_id']) for ins_id, grp in mapping.groupby('insertion_id') } return mapping_dict
def _python_params_to_r_objects(self, r_package=None): """Converts python objects to the appropriate R objects.""" # get a ref to base R namespace Rbase = r_package # convert the pandas.DataFrame to an R dataframe r_df_ = RDataFrame(self.df) # convert the model params to strings in R r_model_params_ = { "path_feature": Rbase.toString(self.path_feature), "conversion_feature": Rbase.toString(self.conversion_feature), "conversion_value_feature": Rbase.toString(self.conversion_value_feature) if self.conversion_value_feature is not None else NULL, "null_path_feature": Rbase.toString(self.null_path_feature) if self.null_path_feature is not None else NULL, "separator": Rbase.toString(self.separator), "order": Rbase.as_double(self.order), "n_simulations": Rbase.as_double(self.n_simulations) if self.n_simulations is not None else NULL, "max_step": Rbase.as_double(self.max_step) if self.max_step is not None else NULL, "return_transition_probs": Rbase.as_logical(self.return_transition_probs) if self.return_transition_probs is not None else NULL, "random_state": Rbase.as_double(self.random_state) if self.random_state != None else NULL } return r_df_, r_model_params_
def py2ro_pandasdataframe(obj): ri_dataf = conversion.py2ri(obj) # cast down to an R list (goes through a different code path # in the DataFrame constructor, avoiding `str(k)`) ri_list = rinterface.SexpVector(ri_dataf) return RDataFrame(ri_list)