Beispiel #1
0
    def opt_federov(self, design_formula, trials, data, max_iterations = 1000000, nullify = 0):
        info("Starting \"optFederov\" run")
        info("Using Search Space:")
        info(str(self.utils.str(data)))

        formulas = {}

        for parameter in self.parameter_ranges.keys():
            formulas["{0}e".format(parameter)] = Formula("{0}e ~ ({0} - {1}) / {1}".format(parameter, (self.parameter_ranges[parameter][1] - 1.0) / 2.0))

        info("Encoding formulas: " + str(self.utils.str(ListVector(formulas))))
        info("Data Dimensions: " + str(self.base.dim(data)))

        coded_data = self.rsm.coded_data(data, formulas = ListVector(formulas))

        info("Coded data: " + str(self.utils.str(coded_data)))

        output = self.algdesign.optFederov(frml         = Formula(design_formula),
                                           data         = coded_data,
                                           nTrials      = trials,
                                           nullify      = nullify,
                                           nRepeats     = 10,
                                           maxIteration = max_iterations)

        return output
Beispiel #2
0
 def from_r(cls, lv: ro.ListVector):
     cls._check_expected_model(lv, "FIXED")
     return cls(
         name=cls._get_name(lv),
         p=get_int(lv.rx2("p")),
         b=get_float_array(lv.rx2("b")),
         varB=get_float(lv.rx2("varB")),
         SD_b=get_float_array(lv.rx2("SD.b")),
     )
def export_smpl_split_to_r(smpls):
    n_smpls = len(smpls)
    all_train = ListVector.from_length(n_smpls)
    all_test = ListVector.from_length(n_smpls)

    for idx, (train, test) in enumerate(smpls):
        all_train[idx] = IntVector(train + 1)
        all_test[idx] = IntVector(test + 1)

    return all_train, all_test
Beispiel #4
0
    def decode_data(self, data):
        formulas = {}

        for parameter in self.parameter_ranges.keys():
            formulas["{0}".format(parameter)] = Formula(
                "{0} ~ round(({0}e * {1}) + {1})".format(
                    parameter,
                    (self.parameter_ranges[parameter][1] - 1.0) / 2.0))

        info("Encoding formulas: " +
             str(self.base.summary_default(ListVector(formulas))))
        info("Data Dimensions: " + str(self.base.dim(data)))

        return self.rsm.coded_data(data, formulas=ListVector(formulas))
Beispiel #5
0
def create_initial_sample(n_obs,
                          dim,
                          type='lhs',
                          lower_bound=None,
                          upper_bound=None):
    """
    Convenient helper function, which creates an initial sample - either based on random (uniform) sampling or using latin hypercube sampling.

    Args:
      n_obs: number of observations
      dim: number of dimensions
      type: type of sampling strategy (Default value = 'lhs')
      lower_bound: The lower bounds of the initial sample as a list of size dim (Default value = 0)
      upper_bound: The upper bounds of the initial sample as a list of size dim (Default value = 1)

    Returns: numpy array of shape (n_obs x dim)

    """
    if lower_bound is None:
        lower_bound = [0] * dim
    if upper_bound is None:
        upper_bound = [1] * dim

    pcontrol = {
        'init_sample.type': type,
        'init_sample.lower': IntVector(lower_bound),
        'init_sample.upper': IntVector(upper_bound)
    }

    return np.array(
        flacco.createInitialSample(n_obs, dim, ListVector(pcontrol)))
Beispiel #6
0
def _translate_control(control):
    """
    Transforms a python dict to a valid R object
    Args:
      control: python dict

    Returns: R object of type ListVector

    """
    ctrl = {}
    for key, lst in control.items():
        if isinstance(lst, list):
            if all(isinstance(n, int) for n in lst):
                entry = IntVector(control[key])
            elif all(isinstance(n, bool) for n in lst):
                entry = BoolVector(control[key])
            elif all(isinstance(n, float) for n in lst):
                entry = FloatVector(control[key])
            elif all(isinstance(n, str) for n in lst):
                entry = StrVector(control[key])
            else:
                entry = None
            if entry is not None:
                ctrl[key] = entry
        else:
            ctrl[key] = lst
    return ListVector(ctrl)
Beispiel #7
0
 def from_r(cls, lv: ro.ListVector):
     cls._check_expected_model(lv, "BL")
     return cls(
         name=cls._get_name(lv),
         minAbsBeta=get_float(lv.rx2("minAbsBeta")),
         p=get_int(lv.rx2("p")),
         MSx=get_float(lv.rx2("MSx")),
         R2=get_float(lv.rx2("R2")),
         lambda_=get_float(lv.rx2("lambda")),
         type=get_str(lv.rx2("type")),
         shape=get_float(lv.rx2("shape")),
         rate=get_float(lv.rx2("rate")),
         b=get_float_array(lv.rx2("b")),
         tau2=get_float_array(lv.rx2("tau2")),
         SD_b=get_float_array(lv.rx2("SD.b")),
     )
def create_roast_scorer(gene_sets='c2.cp.kegg',
                        id_type='entrez',
                        grouping='by_substance',
                        q_value_cutoff=0.1,
                        na_action='fill_0',
                        cache=True,
                        cache_signatures=False):
    """Only cache signatures when doing permutations, otherwise it will only slow it down"""

    importr('limma')
    importr('Biobase')

    gene_sets_r = ListVector({
        gene_set.name: StrVector(list(gene_set.genes))
        for gene_set in db.load(gene_sets=gene_sets, id_type=id_type).gene_sets
    })

    def set_gene_set_collection():
        globalenv[gene_sets] = gene_sets_r

    def roast_score(disease: ExpressionWithControls,
                    compound: ExpressionWithControls):

        if len(compound.cases.columns) < 2 or len(
                compound.controls.columns) < 2:
            print(
                f'Skipping {compound} not enough degrees of freedom (no way to compute in-group variance)'
            )
            return None

        if cache:
            multiprocess_cache_manager.respawn_cache_if_needed()

        try:
            disease_gene_sets = roast(disease,
                                      gene_sets=gene_sets,
                                      use_cache=cache)
            disease_gene_sets.drop(disease_gene_sets[
                disease_gene_sets['fdr_q-val'] > q_value_cutoff].index,
                                   inplace=True)

            signature_gene_sets = roast(compound,
                                        gene_sets=gene_sets,
                                        use_cache=cache and cache_signatures)

            joined = combine_gsea_results(disease_gene_sets,
                                          signature_gene_sets, na_action)

            if randint(0, 100) == 1:
                r('gc()')

            return joined.score.mean()
        except RRuntimeError as e:
            print(e)
            return None

    return scoring_function(roast_score,
                            input=ExpressionWithControls,
                            grouping=grouping,
                            before_batch=set_gene_set_collection)
Beispiel #9
0
def StrListVector(strList):
    """Convert input to a StrVector, or a ListVector recursively"""
    try:
        assert (len(strList) > 0)  # NULL, None, '', non-str scalar  etc
    except:
        return NULL
    if isinstance(strList, ListVector):  # already a ListVector
        return ListVector(strList)
    elif isinstance(strList, StrVector):  # already a StrVector
        return StrVector(strList)
    elif isinstance(strList, str):  # str scalar, so apply StrVector
        return StrVector([strList])
    elif any([types.is_list_like(s) for s in strList]):  # not the deepest list
        return ListVector([(None, StrListVector(s)) for s in strList])
    else:
        return StrVector(list(strList))  # is deepest list(-like) of str types
Beispiel #10
0
 def from_r(cls, lv: ro.ListVector):
     cls._check_expected_model(lv, "BRR")
     return cls(
         name=cls._get_name(lv),
         p=get_int(lv.rx2("p")),
         df0=get_float(lv.rx2("df0")),
         R2=get_float(lv.rx2("R2")),
         MSx=get_float(lv.rx2("MSx")),
         S0=get_float(lv.rx2("S0")),
         b=get_float_array(lv.rx2("b")),
         varB=get_float(lv.rx2("varB")),
         SD_b=get_float_array(lv.rx2("SD.b")),
         SD_varB=get_float(lv.rx2("SD.varB")),
     )
Beispiel #11
0
    def test_import_intercell_network(self):
        from rpy2.robjects import ListVector

        interactions_params = {"resources": "CellPhoneDB"}
        transmitter_params = {"categories": "ligand"}
        receiver_params = {"categories": "receptor"}

        expected = self.omnipathr.import_intercell_network(
            interactions_param=ListVector(list(interactions_params.items())),
            transmitter_param=ListVector(list(transmitter_params.items())),
            receiver_param=ListVector(list(receiver_params.items())),
        )
        actual = op.interactions.import_intercell_network(
            interactions_params=interactions_params,
            transmitter_params=transmitter_params,
            receiver_params=receiver_params,
        )

        _assert_dataframes_equal(expected, actual)
Beispiel #12
0
def dict_to_named_list(dct):
    if (isinstance(dct, dict) or isinstance(dct, Parameter)
            or isinstance(dct, pd.core.series.Series)):
        dct = {key: val for key, val in dct.items()}
        # convert numbers to builtin types before conversion (see rpy2 #548)
        for key, val in dct.items():
            if isinstance(val, numbers.Integral):
                dct[key] = int(val)
            elif isinstance(val, numbers.Number):
                dct[key] = float(val)
        r_list = ListVector(dct)
        return r_list
    return dct
 def get_R_theta(pi, c, Gamma, A, b, Sigma):
     """Return a R compatible list from numpy arrays"""
     numpy2ri.activate()
     in_theta = ListVector(dict(
         pi=pi,
         c=c.T,
         Gamma=Gamma.transpose((1,2,0)),
         A = A.transpose((1,2,0)),
         b=b.T,
         Sigma=Sigma.transpose((1,2,0))
     ))
     numpy2ri.deactivate()
     return in_theta
Beispiel #14
0
def _convert_python_to_R(data: typing.Union[dict, pd.DataFrame]):
    """
    Converts a python object to an R object brms can handle:
    * python dict      ->   R list
    * python dataframe ->   R dataframe
    """
    with localconverter(default_converter + pandas2ri.converter + numpy2ri.converter) as cv:
        if isinstance(data, pd.DataFrame):
            return DataFrame(data)
        elif isinstance(data, dict):
            return ListVector(data)
        else:
            raise ValueError("Data should be either a pandas dataframe or a dictionary")
def dict_to_named_list(dct):
    if (isinstance(dct, dict) or isinstance(dct, Parameter)
            or isinstance(dct, pd.core.series.Series)):
        dct = {key: val for key, val in dct.items()}
        # convert numbers, numpy arrays and pandas dataframes to builtin
        # types before conversion (see rpy2 #548)
        with conversion.localconverter(default_converter +
                                       pandas2ri.converter +
                                       numpy2ri.converter):
            for key, val in dct.items():
                dct[key] = conversion.py2rpy(val)
        r_list = ListVector(dct)
        return r_list
    return dct
Beispiel #16
0
    def train(self, omics_dataset: Dict, save: bool = False) -> None:
        """Trains the model on multi-omics data.

    Parameters
    ----------
    omics_dataset: dict
        Multi-omics dataset, keyed by datatype
    save: bool, default False
        Whether to save the results
    """
        iClusterPlus = importr("iClusterPlus")
        base = importr("base")

        if type(omics_dataset) != dict:
            raise ValueError("omics_data is supposed to be of type dict")

        X = [omics_dataset[i] for i in omics_dataset]
        self._datatypes = list(omics_dataset.keys())
        n_datasets = len(X)
        n_samples = X[0].shape[0]
        feature_counts = [X[i].shape[1] for i in range(n_datasets)]

        if n_datasets > 4:
            raise ValueError(
                "iClusterPlus allows a maximum of only 4 mulit-omics datasets")

        self._initialize_run(n_samples, n_datasets, feature_counts)

        data = ListVector.from_length(6)
        for idx in range(6):
            data[idx] = NULL
        for idx, omics_data in enumerate(X):
            data[idx] = omics_data

        fitted_model = iClusterPlus.iClusterPlus(data[0],
                                                 data[1],
                                                 data[2],
                                                 data[3],
                                                 self._types,
                                                 self._K,
                                                 self._alpha,
                                                 self._lambda_reg,
                                                 maxiter=self._max_iter,
                                                 eps=self._epsilon)

        if save:
            self.save(results=fitted_model)

        return fitted_model
Beispiel #17
0
def run_mimp(mutation_source: str,
             site_type_name: str,
             model: str = None,
             enzyme_type='kinase') -> DataFrame:
    """Run MIMP for given source of mutations and given site type.

    Args:
        mutation_source: name of mutation source
        site_type_name: name of site type
        model: name of the model or path to custom .mimp file,
            if not specified, an automatically generated,
            custom, site-based model will be used.
        enzyme_type: is the enzyme that modifies the site a kinase?
            if not use "catch-all" strategy: train MIMP as if there
            was just one site-specific enzyme - just because we do
            not have information about enzyme-site specificity for
            enzymes other than kinases (yet!)
    """
    site_type = SiteType.query.filter_by(name=site_type_name).one()

    if not model:
        model = get_or_create_model_path(site_type, enzyme_type)

    mimp = load_mimp()

    sequences, disorder, mutations, sites = prepare_active_driver_data(
        mutation_source, site_type_name)

    mutations = mutations.assign(mutation=Series(
        m.wt_residue + str(m.position) + m.mut_residue
        for m in mutations.itertuples(index=False)).values)

    sites.position = to_numeric(sites.position)

    sequences = ListVector(sequences)

    modified_residues = site_type.find_modified_residues()

    mimp_result = mimp.site_mimp(mutations[['gene', 'mutation']],
                                 sequences,
                                 site_type=site_type_name,
                                 sites=sites[['gene', 'position']],
                                 residues_groups=residues_groups(
                                     site_type, modified_residues),
                                 **{'model.data': model})
    if mimp_result is NULL:
        return DataFrame()
    return pandas2ri.ri2py(mimp_result)
Beispiel #18
0
    def ro(self):
        """Expose a view as RObject, to be manipulated in R environment"""
        # Convert to R vector of correct data type
        if isinstance(self.iloc, dict):
            out = ListVector([(None, PyR(v).ro) for v in self.iloc])
        if types.is_float_dtype(self.iloc):
            out = FloatVector(self.iloc.reshape(-1, order='F'))
        elif types.is_integer_dtype(self.iloc):
            out = IntVector(self.iloc.reshape(-1, order='F'))
        else:
            out = StrVector(self.iloc.reshape(-1, order='F'))
        if len(self.dim) > 1:  # reshape to R Array if has non-trivial dim
            out = ro.r.array(out, dim=IntVector(self.dim))

        # Collect R object name attributes
        if hasattr(self, 'rownames'):
            out.rownames = StrVector(self.rownames)
        if hasattr(self, 'colnames'):
            out.colnames = StrVector(self.colnames)
        if hasattr(self, 'names'):
            out.names = ListVector(self.names) if isinstance(
                self.names, ListVector) else StrVector(self.names)
        return out
Beispiel #19
0
    def from_r(self, lv: ro.ListVector) -> "BGLRResult":

        switch: "Dict[str, Type[BGLRResult]]" = {
            "FIXED": FixedResult,
            "BRR": BRRResult,
            "BL": BLResult,
            "BayesA": BayesAResult,
            "BayesB": BayesBResult,
            "BayesC": BayesCResult,
            "RKHS": RKHSResult,
        }

        model = get_str(lv.rx2("model"))
        if model not in switch:
            raise ValueError(
                f"Model {model} does not correspond to one of the "
                "known BGLR models.")

        return switch[model].from_r(lv)
Beispiel #20
0
def collection_to_R(collection, trim_to, min=5, max=500, name=None):
    if not name:
        name = collection.name
    gene_ids = trim_to
    filtered = {
        gene_set.name: StrVector(list(gene_set.genes))
        for gene_set in (
            collection
            # limma::cameraPR goes crazy without this
            # limma::mroast seems to work fine (and be more aware of the limitted statistical support)
            .subset(gene_ids)
            .gene_sets
            if gene_ids else collection.gene_sets
        )
        if max > len(gene_set.genes) > min
    }
    gene_sets_r = ListVector(filtered)

    globalenv[name] = gene_sets_r

    return filtered
Beispiel #21
0
    def __getitem__(self, args):
        """Returns copy of subset of data object from slice or index args"""
        try:
            if isinstance(self.iloc, dict):  # return item of dict
                if isinstance(args, int):
                    try:
                        args = list(self.names).index(args)
                    except:
                        args = list(self.iloc.keys()).index(args)
                return self.iloc[args]

            # replace any str labels in args with its index in self.names
            if isinstance(args, tuple) and self.names is not None:
                args = tuple(self.index(a, i) for i, a in enumerate(args))

            # extract corresponding subset of names
            if self.names:
                names_ = deepcopy(self.names)
                names = ListVector(names_)
                for i in range(len(self.names)):
                    if isinstance(names_[i], StrVector):
                        s = np.array(names_[i])[args[i]]
                        names[i] = StrVector([s] if isinstance(s, str) else s)
            else:
                names = NULL

            # finally extract by looping over each dim; enables R-like indexing
            out = deepcopy(self.iloc)
            for i, arg in enumerate(args):
                a = [slice(None)] * len(args)
                a[i] = arg
                dims = len(out.shape)
                out = out[tuple(a)]
                if self.verbose:
                    print(i, out.shape, dims, tuple(a))
                if len(out.shape) < dims:  # if this dimension is flattened out
                    names = names[:i] + names[(i + 1):]
            return PyR(out, names=names)
        except:
            raise Exception(f"getitem: {args}")
Beispiel #22
0
    def train(self, omics_dataset: Dict, save: bool = False, **kwargs):
        """Trains the model on multi-omics data.

    Parameters
    ----------
    omics_dataset: dict
        Multi-omics dataset, keyed by datatype
    save: bool, default False
        Whether to save the results
    """
        iClusterPlus = importr("iClusterPlus")
        base = importr("base")

        if type(omics_dataset) != dict:
            raise ValueError("omics_data is supposed to be of type dict")

        X = [omics_dataset[i] for i in omics_dataset]
        self._datatypes = list(omics_dataset.keys())
        n_datasets = len(X)
        n_samples = X[0].shape[0]
        feature_counts = [X[i].shape[1] for i in range(n_datasets)]
        self._initialize_run(n_samples, n_datasets, feature_counts)

        data = ListVector.from_length(n_datasets)
        for idx, omics_data in enumerate(X):
            data[idx] = omics_data

        fitted_model = iClusterPlus.iCluster(data,
                                             self._num_subtypes,
                                             self._lambda_reg,
                                             scalar=False,
                                             max_iter=self.max_iter,
                                             epsilon=self.epsilon)

        if save:
            self.save(results=fitted_model)

        return fitted_model
    def gllim(self,sigma_type,gamma_type,T=None,Y=None,
              Lw=0,in_theta=r('NULL')):
        constraints = {"full":"","iso":"i"}
        c_S = constraints[sigma_type]
        c_G = constraints[gamma_type]
        dic_cst = {"Sigma":c_S}
        if c_G:
            dic_cst["Gammat"] = c_G

        if in_theta:
            in_r =  r('NULL')
            print(np.array(in_theta.rx('c')[0]).shape)
        else:
            in_r = self.r_init

        if T is None:
            T = self.responses
        else:
            T = numpy2ri.numpy2ri(T.T)

        if Y is None:
            Y = self.covariates
        else:
            Y = numpy2ri.numpy2ri(Y.T)

        mod = self.xLLiM.gllim(T,Y,self.K,
                               in_r=in_r,maxiter=self.maxiter,Lw=Lw,
                               cstr=ListVector(dic_cst),
                               in_theta=in_theta,
                               verb=1)
        self.model = mod
        return np.array(mod.rx('pi')[0]),\
               np.array(mod.rx('c')[0]).T,\
               np.array(mod.rx('Gamma')[0]).transpose((2,0,1)),\
               np.array(mod.rx('A')[0]).transpose((2,0,1)),\
               np.array(mod.rx("b")[0]).T,\
               np.array(mod.rx('Sigma')[0]).transpose((2,0,1))
Beispiel #24
0
 def dictToList(obj):
     from rpy2.robjects import ListVector
     return ListVector(obj)
Beispiel #25
0
def call_gsoa(request):
    # data from task tiger
    print("request: {}".format(request))
    local_buffer = []
    try:
        gsoa = importr('GSOA')
        #flex_dashboard =  importr('')
        args = request.copy()
        for field in NECESSARY_FIELDS:
            args.pop(field)
        if len(str(request.get('dataFilePath'))) < 2:
            return "no data"
        outFilePath = "/data/{}_{}.txt".format(
            request.get('email', 'results_txt').replace('.com', '').strip(),
            request.get('dataFilePath').split(".")[0])
        print("email: {}".format(request.get('email', 'results_txt')))
        #redirect everything from R into the python console (local buffer)
        rinterface.set_writeconsole_warnerror(
            lambda line: local_buffer.append(line))
        rinterface.set_writeconsole_regular(
            lambda line: local_buffer.append(line))
        result = gsoa.GSOA_ProcessFiles(
            dataFilePath=request.get('dataFilePath', ''),
            classFilePath=request.get('classFilePath', ''),
            gmtFilePath=request.get('gmtFilePath', ''),
            outFilePath=outFilePath,
            numCores=multiprocessing.cpu_count(),
            numRandomIterations=request.get('numRandomIterations', ''),
            classificationAlgorithm=request.get('classificationAlgorithm', ''),
            numCrossValidationFolds=request.get('numCrossValidationFolds', ''),
            removePercentLowestExpr=request.get('removePercentLowestExpr', ''),
            removePercentLowestVar=request.get('removePercentLowestVar', ''))
        print("Writing RMarkdown")
        outFilePath_html = outFilePath.replace('txt', 'html')
        rmarkdown.render(
            '/app/GSOA_Report.Rmd',
            output_file=outFilePath.replace('txt', 'html'),
            params=ListVector({
                'data1':
                outFilePath,
                'alg':
                request.get('classificationAlgorithm', 'svm'),
                'class':
                request.get('classFilePath', ''),
                'crossval':
                request.get('numCrossValidationFolds', ''),
                'data_files':
                request.get('dataFilePath', ''),
                'genesets':
                request.get('gmtFilePath', ''),
                #'hallmarks':
                'iterations':
                request.get('numRandomIterations', ''),
                'lowexpress':
                request.get('removePercentLowestExpr', ''),
                #'results_hallmark' :
                'var':
                request.get('removePercentLowestVar', '')
            }))
        email_report(request.get('email'), outFilePath)
    except Exception as e:
        email_error(request.get('email'), e, local_buffer)
    finally:
        rinterface.set_writeconsole_warnerror(rinterface.consolePrint)
        rinterface.set_writeconsole_regular(rinterface.consolePrint)
Beispiel #26
0
 def from_r(cls, lv: ro.ListVector):
     cls._check_expected_model(lv, "RKHS")
     K = get_float_array(lv.rx2("K"))
     return cls(
         name=cls._get_name(lv),
         K=K,
         K_inv=np.linalg.pinv(K),
         V=get_float_array(lv.rx2("V")),
         d=get_float_array(lv.rx2("d")),
         tolD=get_float(lv.rx2("tolD")),
         levelsU=get_int(lv.rx2("levelsU")),
         df0=get_float(lv.rx2("df0")),
         R2=get_float(lv.rx2("R2")),
         S0=get_float(lv.rx2("S0")),
         u=get_float_array(lv.rx2("u")),
         varU=get_float(lv.rx2("varU")),
         uStar=get_float_array(lv.rx2("uStar")),
         SD_u=get_float_array(lv.rx2("SD.u")),
         SD_varU=get_float(lv.rx2("SD.varU")),
     )
Beispiel #27
0
 def from_r(cls, lv: ro.ListVector):
     cls._check_expected_model(lv, "BayesC")
     return cls(
         name=cls._get_name(lv),
         p=get_int(lv.rx2("p")),
         MSx=get_float(lv.rx2("MSx")),
         R2=get_float(lv.rx2("R2")),
         df0=get_float(lv.rx2("df0")),
         probIn=get_float(lv.rx2("probIn")),
         counts=get_float(lv.rx2("counts")),
         countsIn=get_float(lv.rx2("countsIn")),
         countsOut=get_float(lv.rx2("countsOut")),
         S0=get_float(lv.rx2("S0")),
         b=get_float_array(lv.rx2("b")),
         d=get_float_array(lv.rx2("d")),
         varB=get_float_array(lv.rx2("varB")),
         SD_b=get_float_array(lv.rx2("SD.b")),
         SD_varB=get_float_array(lv.rx2("SD.varB")),
         SD_probIn=get_float(lv.rx2("SD.probIn")),
     )
Beispiel #28
0
    def __init__(self, params):
        self.base = importr("base")
        self.utils = importr("utils")
        self.stats = importr("stats")
        self.algdesign = importr("AlgDesign")
        self.car = importr("car")
        self.rsm = importr("rsm")
        self.dplyr = importr("dplyr")
        self.quantreg = importr("quantreg")
        self.dicekrig = importr("DiceKriging")
        self.diced = importr("DiceDesign")

        #numpy.random.seed(11221)
        #self.base.set_seed(11221)

        self.complete_design_data = None
        self.complete_search_space = None

        self.total_runs = 20
        orio.main.tuner.search.search.Search.__init__(self, params)

        self.name = "GPR"

        self.parameter_ranges = {}

        for i in range(len(self.params["axis_val_ranges"])):
            self.parameter_ranges[self.params["axis_names"][i]] = [
                0, len(self.params["axis_val_ranges"][i])
            ]

        info("Parameters: " + str(self.parameter_ranges))

        self.parameter_values = {}

        for i in range(len(self.params["axis_val_ranges"])):
            self.parameter_values[self.params["axis_names"]
                                  [i]] = self.params["axis_val_ranges"][i]

        info("Parameter Real Ranges: " + str(self.axis_val_ranges))
        info("Parameter Range Values: " + str(self.parameter_values))

        self.range_matrix = {}

        for i in range(len(self.axis_names)):
            self.range_matrix[self.axis_names[i]] = IntVector(
                self.axis_val_ranges[i])

        self.range_matrix = ListVector(self.range_matrix)
        info("DataFrame Ranges: " +
             str(self.base.summary_default(self.range_matrix)))

        self.starting_sample = int(round(len(self.params["axis_names"]) + 2))
        self.steps = 22
        self.extra_experiments = int(round(len(self.params["axis_names"]) * 1))
        self.testing_set_size = 300000
        self.failure_multiplier = 100

        self.__readAlgoArgs()

        self.experiment_data = None
        self.best_points_complete = None

        if self.time_limit <= 0 and self.total_runs <= 0:
            err(('%s search requires search time limit or ' +
                 'total number of search runs to be defined') %
                self.__class__.__name__)

        self.run_summary_database = dataset.connect("sqlite:///" +
                                                    'run_summary.db')
        self.summary = self.run_summary_database["dlmt_run_summary"]

        info("Starting sample: " + str(self.starting_sample))
        info("GPR steps: " + str(self.steps))
        info("Experiments added per step: " + str(self.extra_experiments))
        info("Initial Testing Set Size: " + str(self.testing_set_size))
        info("Constraints: " + str(self.constraint))
Beispiel #29
0
 def _get_name(lv: ro.ListVector) -> str:
     name = get_str(lv.rx2("Name"))
     return name[len("ETA_"):]
Beispiel #30
0
 def _check_expected_model(lv: ro.ListVector, model: str):
     this_model = get_str(lv.rx2("model"))
     if this_model != model:
         raise ValueError(
             f"Expected to get {model}, but got results for {this_model}.")