예제 #1
0
    def recommend_ads(self, job_ads):
        """Provides recommendations for ads using instance model.

        Arguments
        ----------
        job_ads : list[:class:`JobAd`]
            Each instance should have id, site, searchterm, title 
            and description defined.

        Returns
        ----------
        results : list[:class:`JobAd`]
            Each instance has id and recommendation defined.
        """
        #convert to dataframe and clean ads
        dataf = self._create_R_dataframe(job_ads, self._class_columns)
        ids = dataf.rx2('id') 
        dataf = self._R_functions.cleanJobAds(dataf, StrVector(self._search_terms), 
                                              StrVector(self._sites))
        dataf = self._R_functions.createJoinDTM(dataf, self._language.lower())
        dataf = self._R_functions.prepNewAds(self._RFmodel, dataf)

        #classify ads
        pred = self._R_functions.RFpred(self._RFmodel, dataf)

        #combine predictions with ids in a list of dictionaries
        results = [JobAd.create({"id" : ids[i], "recommendation": int(pred[i])-1}) 
                   for i in range(0, robjects.r['length'](ids)[0])]
                           
        return results
예제 #2
0
def df2mtr(df):
    '''
    Convert pandas dataframe to r matrix. Category dtype is casted as
    factorVector considering missing values
    (original py2ri function of rpy2 can't handle this properly so far)

    Args:
        data: pandas dataframe of shape (# samples, # features)
              with numeric dtype

    Returns:
        mtr: r matrix of shape (# samples # features)
    '''
    # check arguments
    assert isinstance(df,
                      pd.DataFrame), 'Argument df need to be a pd.Dataframe.'

    # select only numeric columns
    df = df.select_dtypes('number')

    # create and return r matrix
    values = FloatVector(df.values.flatten())
    dimnames = ListVector(
        rlc.OrdDict([('index', StrVector(tuple(df.index))),
                     ('columns', StrVector(tuple(df.columns)))]))

    return robjects.r.matrix(values,
                             nrow=len(df.index),
                             ncol=len(df.columns),
                             dimnames=dimnames,
                             byrow=True)
예제 #3
0
파일: pandas2ri.py 프로젝트: rs2/rpy2
def py2rpy_pandasseries(obj):
    if obj.dtype.name == 'O':
        warnings.warn('Element "%s" is of dtype "O" and converted '
                      'to R vector of strings.' % obj.name)
        res = StrVector(obj)
    elif obj.dtype.name == 'category':
        res = py2rpy_categoryseries(obj)
        res = FactorVector(res)
    elif is_datetime64_any_dtype(obj.dtype):
        # time series
        tzname = obj.dt.tz.zone if obj.dt.tz else ''
        d = [
            IntVector([x.year for x in obj]),
            IntVector([x.month for x in obj]),
            IntVector([x.day for x in obj]),
            IntVector([x.hour for x in obj]),
            IntVector([x.minute for x in obj]),
            FloatSexpVector([x.second + x.microsecond * 1e-6 for x in obj])
        ]
        res = ISOdatetime(*d, tz=StrSexpVector([tzname]))
        # TODO: can the POSIXct be created from the POSIXct constructor ?
        # (is '<M8[ns]' mapping to Python datetime.datetime ?)
        res = POSIXct(res)
    elif (obj.dtype == dt_O_type):
        homogeneous_type = None
        for x in obj.values:
            if x is None:
                continue
            if homogeneous_type is None:
                homogeneous_type = type(x)
                continue
            if type(x) is not homogeneous_type:
                raise ValueError('Series can only be of one type, or None.')
        # TODO: Could this be merged with obj.type.name == 'O' case above ?
        res = {
            int: IntVector,
            bool: BoolVector,
            None: BoolVector,
            str: StrVector,
            bytes: numpy2ri.converter.py2rpy.registry[numpy.ndarray]
        }[homogeneous_type](obj)
    else:
        # converted as a numpy array
        func = numpy2ri.converter.py2rpy.registry[numpy.ndarray]
        # current conversion as performed by numpy

        res = func(obj)
        if len(obj.shape) == 1:
            if (obj.dtype != dt_O_type):
                # force into an R vector
                res = as_vector(res)

    # "index" is equivalent to "names" in R
    if obj.ndim == 1:
        res.do_slot_assign('names',
                           StrVector(tuple(str(x) for x in obj.index)))
    else:
        res.do_slot_assign('dimnames',
                           SexpVector(conversion.py2rpy(obj.index)))
    return res
예제 #4
0
파일: cimpl.py 프로젝트: rajithbt/pyim
    def _extract_mapping(self, cimpl_obj, cis_sites):
        # Convert CIS sites to frame format.
        cis_frame = CisSite.to_frame(cis_sites)

        # Convert to R representation for cimpl.
        chr_with_prefix = add_prefix(cis_frame['chromosome'], prefix='chr')

        r_base = importr('base')
        cis_frame_r = RDataFrame({
            'id':
            r_base.I(StrVector(cis_frame['id'])),
            'chromosome':
            r_base.I(StrVector(chr_with_prefix)),
            'scale':
            StrVector(cis_frame['scale']),
            'start':
            IntVector(cis_frame['start']),
            'end':
            IntVector(cis_frame['end'])
        })
        cis_frame_r.rownames = StrVector(cis_frame['id'])

        # Retrieve cis matrix from cimpl.
        cis_matrix_r = self._cimpl.getCISMatrix(cimpl_obj, cis_frame_r)
        cis_matrix = dataframe_to_pandas(cis_matrix_r)

        # Extract scale information from cis matrix.
        scale_cols = [c for c in cis_matrix.columns if c.startswith('X')]
        cis_matrix_scales = cis_matrix[['id'] + scale_cols]

        # Melt matrix into long format.
        mapping = pd.melt(cis_matrix_scales, id_vars=['id'])
        mapping = mapping[['id', 'value']]
        mapping = mapping.rename(columns={
            'id': 'insertion_id',
            'value': 'cis_id'
        })

        # Split cis_id column into individual entries (for entries
        # with multiple ids). Then drop any empty rows, as these
        # entries are empty cells in the matrix.
        mapping = mapping.ix[mapping['cis_id'] != '']
        mapping = expand_column(mapping, col='cis_id', delimiter='|')

        mapping_dict = {
            ins_id: set(grp['cis_id'])
            for ins_id, grp in mapping.groupby('insertion_id')
        }

        return mapping_dict
예제 #5
0
 def _comparisons_dataframe(self):
     # col = ('Label.1', 'Label.2', 'win1', 'win2')
     # data = zip(col, [*self.comparison_items, *self.comparison_wins])
     # return DataFrame(OrdDict([data]))
     column_comp1 = ('Label.1',
                     FactorVector(self.comparison_items[0],
                                  levels=StrVector(self.items)))
     column_comp2 = ('Label.2',
                     FactorVector(self.comparison_items[1],
                                  levels=StrVector(self.items)))
     column_win1 = ('win1', FloatVector(self.comparison_wins[0]))
     column_win2 = ('win2', FloatVector(self.comparison_wins[1]))
     return DataFrame(
         OrdDict([column_comp1, column_comp2, column_win1, column_win2]))
예제 #6
0
 def get_dosage_by_chunk(self, snpid_list):
     '''
     Extract genotype dosage prob
     and convert to expected dosage of second allele by
     dosage[:, 1] + 2 * dosage[:, 2].
     The samples labelled as missing will be imputed to population mean.
     '''
     cached_data = self.rbgen.bgen_load(self.bgen_path,
                                        index_filename=self.bgi_path,
                                        rsids=StrVector(snpid_list),
                                        max_entries_per_sample=4)
     with localconverter(ro.default_converter + pandas2ri.converter):
         df_var = ro.conversion.rpy2py(cached_data[0])
         dosage = ro.conversion.rpy2py(cached_data[4])
     # dosage: nvariant (we have several here) x nsample x num_of_allele_combination
     with np.errstate(invalid='ignore'):
         dosage = dosage[:, :, 1] + 2 * dosage[:, :, 2]
     missing_ind = np.isnan(dosage)
     if missing_ind.sum() > 0:
         missing = np.where(missing_ind)
         dosage[missing[0], missing[1]] = np.nanmean(dosage,
                                                     axis=1)[missing[0]]
     # need to handle potential duplicated snpid (due to multialleleic)
     _, dup_idx = np.unique(df_var.rsid.tolist(), return_index=True)
     dosage = dosage[dup_idx, :]
     df_var = df_var.iloc[dup_idx, :]
     # need to reorder so that it is the same as input query
     match_idx = self.match_y_to_x(df_var.rsid.values, np.array(snpid_list))
     df_var = df_var.iloc[match_idx, :]
     return dosage[match_idx, :], df_var.allele0.tolist(
     ), df_var.allele1.tolist(), df_var.rsid.tolist()
예제 #7
0
def create_clustering_file(outdir, outfile):
    base = rpackages.importr('base')
    packageNames = ('tidyverse')
    utils = rpackages.importr('utils')
    utils.chooseCRANmirror(ind=1)
    packnames_to_install = [
        x for x in packageNames if not rpackages.isinstalled(x)
    ]
    if len(packnames_to_install) > 0:
        utils.install_packages(StrVector(packnames_to_install))
    tidyverse = rpackages.importr('tidyverse')
    robjects.r['options'](warn=-1)
    create_file = robjects.r('''
		function(results_file,outdir) {
			all_domains_blast_df <- read_tsv(results_file, col_names = F)
			names(all_domains_blast_df) <- c("sseqid", "slen","sstart", "send", "qseqid", "qlen", "qstart", "qend", "qcovs", "pident"," evalue", "Sample", "cohort")
			all_domains_blast_df_count <- all_domains_blast_df %>% group_by(Sample, qseqid) %>% count() %>% ungroup()
			all_domains_blast_df_count_table <- all_domains_blast_df_count %>% spread(., Sample, n, fill =0 )
			abundFile = file.path(outdir, "unique-biosynthetic-reads-abundance-table.txt")
			abundWideFile = file.path(outdir, "unique-biosynthetic-reads-abundance-table-wide.txt")
			write_tsv(all_domains_blast_df_count_table, abundFile, col_names = T)
			write_tsv(all_domains_blast_df_count, abundWideFile, col_names = T)
		}
		''')

    create_file(outfile, outdir)
예제 #8
0
파일: pflacco.py 프로젝트: Reiyan/pflacco
def _translate_control(control):
    """
    Transforms a python dict to a valid R object
    Args:
      control: python dict

    Returns: R object of type ListVector

    """
    ctrl = {}
    for key, lst in control.items():
        if isinstance(lst, list):
            if all(isinstance(n, int) for n in lst):
                entry = IntVector(control[key])
            elif all(isinstance(n, bool) for n in lst):
                entry = BoolVector(control[key])
            elif all(isinstance(n, float) for n in lst):
                entry = FloatVector(control[key])
            elif all(isinstance(n, str) for n in lst):
                entry = StrVector(control[key])
            else:
                entry = None
            if entry is not None:
                ctrl[key] = entry
        else:
            ctrl[key] = lst
    return ListVector(ctrl)
def set_minus_with_cluster(result_R, cluster_file_name, clade_set, color_set,
                           color_num, order_levels, file_name_suffix):
    '''
  input 1: set_minus_set_minus.xlsx in result_R
  input 2: cluster_file_name
  input 3: clade_set
  input 4: color_set
  input 5: color_num 
  input 6: order_levels 决定几号对应的位置
  output in result_R directory: png, svg plot
  '''
    robjects.globalenv["result_R"] = result_R
    robjects.globalenv["color_clade"] = create_color_clade(
        clade_set, color_set, color_num)
    robjects.globalenv["cluster_file_name"] = cluster_file_name
    robjects.globalenv["order_levels"] = StrVector(order_levels)
    robjects.r(R_code_set_minus_with_cluster)
    robjects.r['draw_curve'](robjects.r['in_fl'], "set_minus_clade_mix_" +
                             str(color_num) + "_binary.svg")
    robjects.r['draw_curve'](robjects.r['in_fl'], "set_minus_clade_mix_" +
                             str(color_num) + "_binary.png")
    robjects.r['draw_curve'](robjects.r['in_fl_sorted'],
                             "set_minus_clade_break_clearly_" +
                             str(color_num) + "_" + file_name_suffix + ".svg")
    robjects.r['draw_curve'](robjects.r['in_fl_sorted'],
                             "set_minus_clade_break_clearly_" +
                             str(color_num) + "_" + file_name_suffix + ".png")
예제 #10
0
    def items(self, n_rows_cached=100, include_rsid=None):
        """
        Retrieve generator of variants, one by one. Although variants are returned in the order as they are stored in
        the BGEN file, when there are variants with the same positions their order is not guaranteed.
        :param n_rows_cached:
        :return:
        """
        # retrieve positions
        if include_rsid is not None:
            stm = 'select distinct rsid, position from Variant where rsid in ({}) order by file_start_position asc'.format(
                ', '.join(["'{}'".format(x) for x in include_rsid]))
        else:
            stm = 'select distinct rsid, position from Variant order by file_start_position asc'

        with sqlite3.connect(self.bgi_path) as conn:
            cur = conn.cursor()
            cur.execute(stm)

            iteration = 0

            while True:
                if iteration > 0:
                    cached_data_struct = cached_data.__sexp__
                    del cached_data
                    del cached_data_struct
                    gc.collect()

                positions = cur.fetchmany(size=n_rows_cached)
                if not positions:
                    break

                rsids = [x[0] for x in positions]
                positions = [x[1] for x in positions]

                if include_rsid is None:
                    ranges = pd.DataFrame({
                        'chromosome': [self.chr_number],
                        'start': [positions[0]],
                        'end': [positions[-1]],
                    })

                    # rbgen = importr('rbgen')
                    cached_data = self.rbgen.bgen_load(self.bgen_path, ranges)

                else:
                    cached_data = self.rbgen.bgen_load(self.bgen_path,
                                                       rsids=StrVector(rsids))

                all_variants = pandas2ri.ri2py(cached_data[0])
                all_probs = pandas2ri.ri2py(cached_data[4])

                iteration += 1

                for row_idx, (rsid, row) in enumerate(all_variants.iterrows()):
                    dosage_row = row.rename({'chromosome': 'chr'})
                    dosage_row['chr'] = int(dosage_row.chr)
                    dosage_row['dosages'] = np.dot(all_probs[row_idx, :, :],
                                                   [0, 1, 2])

                    yield dosage_row
예제 #11
0
    def _create_R_dataframe(self, job_ads, include_columns):
        """Converts job ads to R dataframe.

        Arguments
        ----------
        job_ads : list[:class:`JobAd`]
            List of :class:`JobAd` instances.
        include_columns : list[str]
            Defines which columns are included in the dataframe. 

        Returns
        ----------
        dataf : :class:`robjects.DataFrame`
            :class:`robjects.DataFrame` representing job ads.
        """
        
        #modify structure to type {column:[rows]}   
        if len(job_ads) == 0:
            raise Exception("No job ads to convert to R dataframe.")

        job_ads_dataf = {}
        for column in include_columns:
            job_ads_dataf[column] = [self._remove_diacritics(ad[column]) 
                                       for ad in job_ads]
            if (column == "relevant"):
                job_ads_dataf[column] = IntVector(job_ads_dataf[column])
            else:
                job_ads_dataf[column] = self._base.I(StrVector(job_ads_dataf[column]))
             
        return robjects.DataFrame(job_ads_dataf)
def importFromDashUtils(funcName):
    # Check to ensure that the direct parent of study_management/ is dashboard/
    path = Path(os.getcwd())
    parent = path.parent.parts[-1]
    if parent != "dashboard":
        # raise exception if the direct parent is not dashboard/
        raise Exception(
            "Error: the direct parent of 'study_management/' should be 'dashboard/'."
        )
    # import R's utility package
    utils = rpackages.importr('utils')
    # select a mirror for R packages
    utils.chooseCRANmirror(ind=1)  # select the first mirror in the list
    # R package names, copied from dashboard/dashboard_utils.R
    packnames = ("RSQLite", "plyr", "dplyr", "ggplot2", "zoo", "sqldf",
                 "rjson", "reticulate")
    # Selectively install what needs to be install.
    names_to_install = [x for x in packnames if not rpackages.isinstalled(x)]
    if len(names_to_install) > 0:
        utils.install_packages(StrVector(names_to_install))
    # Defining the R script and loading the instance in Python
    r = robjects.r
    r['source']('../dashboard_utils.R')
    # Loading the function we have defined in R
    r_func = robjects.globalenv[funcName]
    # return the selected function
    return r_func
예제 #13
0
def get_exons(mart):
    """Queries a Mart object to find all exons of its dataset attribute.

    Forms a specific getBM query that is sent to the BioMart API to
    retrieve information about the exons (and their exonic coordinates)
    of a specific Dataset. The output is then transformed via the GRanges
    Bioconductor package and seqnames converted to UCSC standard.

    Args:
        mart: an rpy2-converted biomaRt Mart object.

    Returns:
        An rpy2 DataFrame containing a table of relevant exon information.
        DataFrame column headers are:
        ["seqnames", "start", "end", "width", "strand"]
    """
    exons = R.getBM(attributes = StrVector(("chromosome_name",
                "exon_chrom_start", "exon_chrom_end", "strand")),
                mart=mart)

    exons_ranges = R.GRanges(
        seqnames=exons.rx2('chromosome_name'),
        ranges=R.IRanges(start=exons.rx2('exon_chrom_start'),
                         end=exons.rx2('exon_chrom_end')),
        strand='+' if exons.rx2('strand') == '1L' else '-')

    # This was hell to find
    # https://stackoverflow.com/questions/38806898/
    set_method = R("`seqlevelsStyle<-`")
    exons_ranges = set_method(exons_ranges, "UCSC")

    as_data_frame = R("function(x) as.data.frame(x)")
    exons_ranges_df = as_data_frame(exons_ranges)

    return exons_ranges_df
예제 #14
0
def get_genes(mart):
    """Queries a Mart object to find all genes of its dataset attribute.

    Forms a specific getBM query that is sent to the BioMart API to
    retrieve information about the genes of a specifc Dataset. This
    output is then converted from an rpy2 DataFrame to a pandas
    DataFrame.

    Args:
        mart: an rpy2-converted biomaRt Mart object.

    Returns:
        An pandas DataFrame containing a table of relevant gene information.
        DataFrame column headers are:
        ["gene_name", "chromosome_name", "start_position", "end_position"]
    """
    genes = R.getBM(
        attributes = StrVector(("external_gene_name", "chromosome_name",
            "start_position", "end_position")),
        mart=mart)

    genes_df = pandas2ri.ri2py(genes)
    genes_df.rename(columns={'external_gene_name': 'gene_name'}, inplace=True)

    return genes_df
예제 #15
0
    def load_geo(self):
        tar_path = os.path.join(
            os.path.join(self.input_save, '{}_RAW'.format(self.ID)))
        if not os.path.isdir(
                os.path.join(self.input_save, '{}_RAW'.format(self.ID))):
            if os.path.isfile(
                    os.path.join(self.input_save,
                                 '{}_RAW.tar'.format(self.ID))):
                pass
            else:
                geoquery.getGEOSuppFiles(self.ID,
                                         baseDir=self.input_save,
                                         makeDirectory=False)
            tarfile.open(name=os.path.join(self.input_save,
                                           '{}_RAW.tar'.format(self.ID)),
                         mode="r:*").extractall(path=tar_path)
            # for affy_file in os.scandir(tar_path):
            #     with gzip.open(affy_file.path, 'rb') as f_in:
            #         with open(affy_file.path.split('.gz')[0], 'wb') as f_out:
            #             shutil.copyfileobj(f_in, f_out)
        affy_files = oligo_classes.list_celfiles(tar_path, listGzipped=True)
        cel_files = [
            str(tar_path) + "/" + str(affy_file) for affy_file in affy_files
        ]
        sv = StrVector(cel_files)
        print(type(sv[0]))
        data = oligo.read_celfiles(filenames=sv)

        self.raw_data = data
예제 #16
0
def GetRFilePath(folderName, fileName):
    """
    'GetRFilePath' is used to generate a R file path in python CGI
    """
    filepwd = StrVector(folderName + fileName)
    filepwd = r['paste'](filepwd, collapse='')
    return filepwd
예제 #17
0
    def expand_namespace(self, pkg):
        # Set up to install R packages as needed
        base = importr('base')
        utils = importr('utils')
        devtools = importr('devtools')
        utils.chooseCRANmirror(ind=1)

        # Load or install as necessary
        pkg_ix = None
        try:
            pkg_ix = importr(pkg, on_conflict="warn")
        except Exception:
            utils.install_packages(StrVector([pkg]))
            try:
                pkg_ix = importr(pkg)
            except Exception:
                if pkg in ["randomForestCI", "causalForest"]:
                    try:
                        robjects.r('install_github("swager/{}")'.format(pkg))
                        pkg_ix = importr(pkg)
                    except Exception:
                        pass  # Give up

        # List names in this package
        ls = []
        try:
            ls = list(base.ls("package:{}".format(pkg)))
        except Exception:
            print("Could not get names for R package: {}.".format(pkg))
        utility = list(base.ls("package:{}".format('utils')))

        # Add names to keyword list
        self.EXTRA_KEYWORDS = set(list(self.EXTRA_KEYWORDS) + ls + utility)
예제 #18
0
파일: pandas2ri.py 프로젝트: Pumawat/Explo
def py2ri_pandasseries(obj):
    if obj.dtype.name == 'category':
        res = py2ri_categoryseries(obj)
        res = FactorVector(res)
    elif obj.dtype == dt_datetime64ns_type:
        # time series
        d = [
            IntVector([x.year for x in obj]),
            IntVector([x.month for x in obj]),
            IntVector([x.day for x in obj]),
            IntVector([x.hour for x in obj]),
            IntVector([x.minute for x in obj]),
            IntVector([x.second for x in obj])
        ]
        res = ISOdatetime(*d)
        #FIXME: can the POSIXct be created from the POSIXct constructor ?
        # (is '<M8[ns]' mapping to Python datetime.datetime ?)
        res = POSIXct(res)
    else:
        # converted as a numpy array
        func = numpy2ri.converter.py2ri.registry[numpy.ndarray]
        # current conversion as performed by numpy
        res = func(obj)
        if len(obj.shape) == 1:
            if (obj.dtype != dt_O_type):
                # force into an R vector
                res = as_vector(res)

    # "index" is equivalent to "names" in R
    if obj.ndim == 1:
        res.do_slot_assign('names',
                           StrVector(tuple(str(x) for x in obj.index)))
    else:
        res.do_slot_assign('dimnames', SexpVector(conversion.py2ri(obj.index)))
    return res
예제 #19
0
    def install_bart(self):
        import rpy2.robjects.packages as rpackages
        from rpy2.robjects.packages import importr
        from rpy2.robjects.vectors import StrVector
        import rpy2.robjects as robjects

        robjects.r.options(download_file_method='curl')

        # install.packages("rJava")
        rj = importr("rJava", robject_translations={'.env': 'rj_env'})
        rj._jinit(parameters="-Xmx16g", force_init=True)
        print(
            "rJava heap size is",
            np.array(rj._jcall(rj._jnew("java/lang/Runtime"), "J",
                               "maxMemory"))[0] / 1e9,
            "GB.",
            file=sys.stderr)

        package_names = ["bartMachine"]
        utils = rpackages.importr('utils')
        utils.chooseCRANmirror(ind=0)
        utils.chooseCRANmirror(ind=0)

        names_to_install = [
            x for x in package_names if not rpackages.isinstalled(x)
        ]
        if len(names_to_install) > 0:
            utils.install_packages(StrVector(names_to_install))

        return importr("bartMachine")
예제 #20
0
 def query(self, method: str, variables: List[str]) -> List:
     from rpy2.robjects.vectors import StrVector
     out = gRain.querygrain(self._network.as_grain(),
                            nodes=StrVector(variables),
                            type=method,
                            result='data.frame')
     return self._format_query_output(method, variables, out)
예제 #21
0
def create_reformat_data(input_df, outdir):
    rpackages.importr('base')
    utils = rpackages.importr('utils')
    packageNames = ('tidyverse')
    packnames_to_install = [
        x for x in packageNames if not rpackages.isinstalled(x)
    ]
    if len(packnames_to_install) > 0:
        utils.install_packages(StrVector(packnames_to_install))
    rpackages.importr('tidyverse')

    robjects.r['options'](warn=-1)
    reformat_df = robjects.r('''
		function(hmmdf,outDir) {
			hmmdfRecoded <- separate(hmmdf, readID, into = c("read","F_R_read_frame"), sep = "_", extra = "merge") %>%
			select(-c(F_R_read_frame))
			hmmdfRecodedDFUnique<-aggregate(HMMScore ~ read + Sample + sampleType + protType , hmmdfRecoded, max)
			colnames(hmmdfRecodedDFUnique)<-c("readID","Sample", "sampleType", "protType","HMMScore")
			write_tsv(hmmdfRecodedDFUnique, file.path(outDir, "spHMM-filtered-results.txt"), col_names = T)
			return(hmmdfRecodedDFUnique)
		}
		''')
    # convert pandas df to R datafame
    with localconverter(ro.default_converter + pandas2ri.converter):
        input_r_df = ro.conversion.py2rpy(input_df)

    data_filter = reformat_df(input_r_df, outdir)
    return (data_filter)
예제 #22
0
def import_R_library(rpacknames):
    from rpy2.robjects.vectors import StrVector
    utils = rpackages.importr('utils')
    utils.chooseCRANmirror(ind=1)
    names_to_install = [x for x in rpacknames if not rpackages.isinstalled(x)]
    if len(names_to_install) > 0:
        utils.install_packages(StrVector(names_to_install))
예제 #23
0
def py2ri_pandasseries(obj):
    if obj.dtype == '<M8[ns]':
        # time series
        d = [
            IntVector([x.year for x in obj]),
            IntVector([x.month for x in obj]),
            IntVector([x.day for x in obj]),
            IntVector([x.hour for x in obj]),
            IntVector([x.minute for x in obj]),
            IntVector([x.second for x in obj])
        ]
        res = ISOdatetime(*d)
        #FIXME: can the POSIXct be created from the POSIXct constructor ?
        # (is '<M8[ns]' mapping to Python datetime.datetime ?)
        res = POSIXct(res)
    else:
        # converted as a numpy array
        res = numpy2ri.numpy2ri(obj.values)
    # "index" is equivalent to "names" in R
    if obj.ndim == 1:
        res.do_slot_assign('names',
                           StrVector(tuple(str(x) for x in obj.index)))
    else:
        res.do_slot_assign('dimnames', SexpVector(conversion.py2ri(obj.index)))
    return res
예제 #24
0
def get_results_from_r(
        processed_data: pd.DataFrame,
        experiment_config: ExperimentConfig) -> ExperimentOutput:
    """
    Get results from the CausalImpact implementation using R
    :type experiment_config.training_period: List[int]
    :type experiment_config.evaluation_period: List[int]
    """

    processed_data.set_index(experiment_config.time_var, inplace=True)
    pre_period = robjects.FloatVector(experiment_config.training_period)
    post_period = robjects.FloatVector(experiment_config.evaluation_period)

    # Load R libraries
    utils = rpackages.importr("utils")
    utils.chooseCRANmirror(ind=1)
    lib_names = ("CausalImpact", "bsts")
    libs_to_install = [x for x in lib_names if not rpackages.isinstalled(x)]

    if len(libs_to_install) > 0:
        utils.install_packages(StrVector(libs_to_install))

    robjects.numpy2ri.activate()
    pandas2ri.activate()

    rdf = robjects.conversion.py2rpy(processed_data)
    causalimpact = importr("CausalImpact")
    ci = causalimpact.CausalImpact(rdf, pre_period, post_period)

    summary = robjects.r("function(x) x$summary")

    results = summary(ci).T
    results.rename(
        index={
            "Actual": "actual",
            "Pred": "predicted",
            "Pred.lower": "predicted_lower",
            "Pred.upper": "predicted_upper",
            "Pred.sd": "predicted_sd",
            "AbsEffect": "abs_effect",
            "AbsEffect.lower": "abs_effect_lower",
            "AbsEffect.upper": "abs_effect_upper",
            "AbsEffect.sd": "abs_effect_sd",
            "RelEffect": "rel_effect",
            "RelEffect.lower": "rel_effect_lower",
            "RelEffect.upper": "rel_effect_upper",
            "RelEffect.sd": "rel_effect_sd",
        },
        inplace=True,
    )

    results.columns = ["average", "cumulative"]

    experiment_output = ExperimentOutput(
        **{
            "results_summary": results,
            "trained_model": ci,
            "experiment_name": experiment_config.experiment_name,
        })
    return experiment_output
예제 #25
0
파일: pvclust.py 프로젝트: jksr/ALLCools
def install_r_package(name):
    from rpy2.robjects.vectors import StrVector
    from rpy2.robjects.packages import importr, isinstalled

    if not isinstalled(name):
        utils = importr("utils")
        utils.chooseCRANmirror(ind=1)
        utils.install_packages(StrVector([name]))
예제 #26
0
파일: pandas2ri.py 프로젝트: theflow/rpy2
def py2ri_pandasdataframe(obj):
    od = OrderedDict()
    for name, values in obj.iteritems():
        if values.dtype.kind == 'O':
            od[name] = StrVector(values)
        else:
            od[name] = conversion.py2ri(values)
    return DataFrame(od)
예제 #27
0
    def to_rpy2(self):
        from rpy2.robjects.vectors import ListVector, StrVector

        data = {
            name: StrVector(list(gene_set.genes))
            for name, gene_set in self.gene_sets_by_name.items()
        }
        r_data = ListVector(data)
        return r_data
예제 #28
0
def r_ttest(x, y, alternative='two.sided', equal_variance=False, paired=False):
    result = pify(
        stats.t_test(
            robjects.FloatVector(x), robjects.FloatVector(y), **{
                'alternative': StrVector((alternative, )),
                'var.equal': equal_variance,
                'paired': paired
            }))
    return {'p': result['p.value'], 't': result['statistic']['t']}
예제 #29
0
def r_importer(modules, install_only=[], log=False):
  """
  Import and install R packages. If the desired packages are not installed it will
  automatically install them. Note that this function will act as a one time
  delay in running time, if modules need to be installed. Import R packages
  manually as e.g. <stargazer =  rpy2.robjects.packages.importr('stargazer')>.
  So, the same name used for installing, should be used to import the functions.
  Important to note, this function imports the following modules from rpy2:
  "rpy2.robjects.packages" and "rpy2.robjects.vectors".
 
  Args:
      modules: list of the desired packages. The packages to be included should
      be as a string. E.g. modules = ['stargazer', 'tidyverse'].
 
      install_only: default=None. list or string of packages to be installed 
      only. Note, combinations are possible.
 
      log: default=False. Prints a log message if true, of the packages that are
      (succesfully) installed.
 
  Returns:
      None
  """
  if not isinstance(modules, list):
      modules = [modules]
  
  if not isinstance(install_only, list):
      install_only = [install_only]
      
  # import R's utility package:
  utils = rpackages.importr('utils')
 
  # R package names:
  packnames = tuple(modules)
 
  # Selectively install what needs to be install. Use CRAN cloud server:
  names_to_install = [x for x in packnames if not rpackages.isinstalled(x)]
  if len(names_to_install) > 0:
    print('Installing:', names_to_install)
    with walpy.suppress():
        utils.install_packages(StrVector(names_to_install),
                            repos='https://cloud.r-project.org/')
    print('Successfully installed:', names_to_install)
  
  # Make modules non-overlapping with install_only:
  modules = set(modules) - set(install_only)
 
  # Import modules to be automatically imported
  for module in modules:
    rpackages.importr(module)
  
  # Print log message if true:
  if log == True:
    print('Successfully imported:', [i for i in modules])
 
  return
예제 #30
0
파일: pandas2ri.py 프로젝트: Pumawat/Explo
def py2ri_pandasindex(obj):
    if obj.dtype.kind == 'O':
        return StrVector(obj)
    else:
        # pandas2ri should definitely not have to know which paths remain to be
        # converted by numpy2ri
        # Answer: the thing is that pandas2ri builds on the conversion
        # rules defined by numpy2ri - deferring to numpy2ri is allowing
        # us to reuse that code.
        return numpy2ri.numpy2ri(obj)