def get_pvalue(sorted_scores, stat, n): # approximate the gpd tail n_exceed = 250 is_gpd_fitted = False while n_exceed >= 10: exceedances = sorted_scores[:n_exceed] # check if the n_exceed largest permutation values follow GPD # with Anderson-Darling goodness-of-fit test try: ad = eva.gpdAd(FloatVector(exceedances)) ad_pval = ad.rx2('p.value')[0] except: n_exceed -= 10 continue # H0 = exceedances come from a GPD if ad_pval > 0.05: is_gpd_fitted = True break n_exceed -= 10 if not is_gpd_fitted: #print('GPD good fit is never reached - use ECDF instead...') return (None) # compute the exceedance threshold t t = float((sorted_scores[n_exceed] + sorted_scores[n_exceed - 1]) / 2) # estimate shape and scale params with maximum likelihood gpd_fit = eva.gpdFit(FloatVector(sorted_scores), threshold=t, method='mle') scale, shape = gpd_fit.rx2('par.ests')[0], gpd_fit.rx2('par.ests')[1] # compute GPD p-value f_gpd = genpareto.cdf(x=gt_score - t, c=shape, scale=scale) return (n_exceed / n * (1 - f_gpd))
def sampleSizeRest(): # Get the parsed contents of the form data data = request.json #print(json) k = data["k"].split(',') prev = data["prev"] N = data["N"] unique_id = data["unique_id"] fixed_flag = data["fixed_flag"] sens = data["sens"].split(',') spec = data["spec"].split(',') start = time.time() print "Starting Benchmark" if fixed_flag == "Specificity": jsonrtn = (wrapper.saveAllSensGraphs(IntVector(k), FloatVector(sens), FloatVector(spec), float(prev), IntVector(N), unique_id)) else: jsonrtn = (wrapper.saveAllSpecGraphs(IntVector(k), FloatVector(sens), FloatVector(spec), float(prev), IntVector(N), unique_id)) #end=time.time() #print "Seconds" #print end - start jsonlist = list(jsonrtn) #2 jsonstring = ''.join(jsonlist) print jsonstring return jsonstring
def adjust_pvalue(data): stats = importr('stats') p_adjustBH = stats.p_adjust(FloatVector(data.pval.tolist()), method='BH') data["BH"] = p_adjustBH p_adjustBonferroni = stats.p_adjust(FloatVector(data.pval.tolist()), method='bonferroni') data["Bonferroni"] = p_adjustBonferroni return data
def xicor(x, y, return_pval=False): x, y = remove_missing_values(x, y) if return_pval: xi, sd, pval = XICOR.xicor(FloatVector(x), FloatVector(y), pvalue=True) else: xi = XICOR.xicor(FloatVector(x), FloatVector(y), pvalue=False) if return_pval: return (xi[0], pval[0]) return (xi[0])
def fit(self, x, t, y, refit=False): if self.method_name == "lasso": print("fit lasso") self.model = self.rleaner.rlasso(x, IntVector(t), FloatVector(y)) else: # Takes much longer to fit print("fit boost") self.model = self.rleaner.rboost(x, IntVector(t), FloatVector(y))
def getPv(self, qminrho, MuQ, VarQ, KerQ, lam, VarRemain, Df, tau, rho_list, T): from rpy2.robjects.vectors import FloatVector RV = self.r_davies(FloatVector(qminrho), MuQ, VarQ, KerQ, FloatVector(lam), VarRemain, Df, FloatVector(tau), FloatVector(rho_list), T) RV = [sp.array(RV[i]) for i in range(len(RV))] RV = RV[0].flatten()[0] return RV
def fit(self, model, testIndices): """ """ # ------------------------------ Function --------------------------- # def errorFit(parameters): def fitData(n, testIndices): for i in xrange(n): if i not in testIndices: yield i # Instantiate the surrogate model cModel = modena.libmodena.modena_model_t( model=model, parameters=list(parameters)) return FloatVector( list( model.error(cModel, idxGenerator=fitData(model.nSamples, testIndices), checkBounds=False))) # ------------------------------------------------------------------- # new_parameters = model.parameters if not len(new_parameters): new_parameters = [None] * len(model.surrogateFunction.parameters) for k, v in model.surrogateFunction.parameters.iteritems(): new_parameters[v.argPos] = (v.min + v.max) / 2 # make objects usable in R R_par = FloatVector(new_parameters) R_res = rinterface.rternalize(errorFit) max_parameters = [None] * len(new_parameters) min_parameters = [None] * len(new_parameters) for k, v in model.surrogateFunction.parameters.iteritems(): min_parameters[v.argPos] = v.min max_parameters[v.argPos] = v.max # perform fitting (nonlinear MSSQ) nlfb = nlmrt.nlfb(start=R_par, resfn=R_res, jacfn=rinterface.NULL, trace=rinterface.FALSE, lower=FloatVector(min_parameters), upper=FloatVector(max_parameters), maskidx=rinterface.NULL) # optimised coefficients and sum of squares nlfb_coeffs = nlfb[nlfb.names.index('coefficients')] nlfb_ssqres = nlfb[nlfb.names.index('ssquares')] new_parameters = list(nlfb_coeffs) return new_parameters
def _comparisons_dataframe(self): # col = ('Label.1', 'Label.2', 'win1', 'win2') # data = zip(col, [*self.comparison_items, *self.comparison_wins]) # return DataFrame(OrdDict([data])) column_comp1 = ('Label.1', FactorVector(self.comparison_items[0], levels=StrVector(self.items))) column_comp2 = ('Label.2', FactorVector(self.comparison_items[1], levels=StrVector(self.items))) column_win1 = ('win1', FloatVector(self.comparison_wins[0])) column_win2 = ('win2', FloatVector(self.comparison_wins[1])) return DataFrame( OrdDict([column_comp1, column_comp2, column_win1, column_win2]))
def fit_generator_for_model(self, model, train_generator, train_steps, val_generator, val_steps, num_epochs): from rpy2.robjects.vectors import StrVector, FactorVector, FloatVector, IntVector all_outputs = [] for _ in range(train_steps): generator_output = next(train_generator) x, y = generator_output[0], generator_output[1] all_outputs.append((self.preprocess(x), x[1], y)) x, t, y = map(partial(np.concatenate, axis=0), zip(*all_outputs)) self.model = self.grf.causal_forest( x, FloatVector([float(yy) for yy in y]), FloatVector([float(tt) for tt in t]), seed=909)
def _translate_control(control): """ Transforms a python dict to a valid R object Args: control: python dict Returns: R object of type ListVector """ ctrl = {} for key, lst in control.items(): if isinstance(lst, list): if all(isinstance(n, int) for n in lst): entry = IntVector(control[key]) elif all(isinstance(n, bool) for n in lst): entry = BoolVector(control[key]) elif all(isinstance(n, float) for n in lst): entry = FloatVector(control[key]) elif all(isinstance(n, str) for n in lst): entry = StrVector(control[key]) else: entry = None if entry is not None: ctrl[key] = entry else: ctrl[key] = lst return ListVector(ctrl)
def lmer_feat(mer, Dw): mer = r['refit'](mer, FloatVector(Dw)) df = r['data.frame'](r_coef(r['summary'](mer))) rows = list(r['row.names'](df)) new_tvals = np.rec.fromarrays([[tv] for tv in tuple(df.rx2('t.value'))], names=','.join(rows)) return new_tvals
def df2mtr(df): ''' Convert pandas dataframe to r matrix. Category dtype is casted as factorVector considering missing values (original py2ri function of rpy2 can't handle this properly so far) Args: data: pandas dataframe of shape (# samples, # features) with numeric dtype Returns: mtr: r matrix of shape (# samples # features) ''' # check arguments assert isinstance(df, pd.DataFrame), 'Argument df need to be a pd.Dataframe.' # select only numeric columns df = df.select_dtypes('number') # create and return r matrix values = FloatVector(df.values.flatten()) dimnames = ListVector( rlc.OrdDict([('index', StrVector(tuple(df.index))), ('columns', StrVector(tuple(df.columns)))])) return robjects.r.matrix(values, nrow=len(df.index), ncol=len(df.columns), dimnames=dimnames, byrow=True)
def cpt_gamma(x, penalty='MBIC', minseglen=2, shape=100): """changepoint detection with Gamma distribution as test statistic positive value is required negative value is set to a very large RTT, 1e3. Args: x (list of numeric type): timeseries to be handled penalty (string): possible choices "None", "SIC", "BIC", "MBIC", "AIC", "Hannan-Quinn" Returns: list of int: beginning of new segment in python index, that is starting from 0; the actually return from R changepoint detection is the last index of a segment. since the R indexing starts from 1, the return naturally become the beginning of segment. """ try: base = np.min([i for i in x if i > 0]) except ValueError: # if no positive number if x, set base to 0 base = 0 x = [(i - base + 0.1) if i > 0 else 1e3 for i in x] return [ int(i) for i in changepoint.cpts( changepoint.cpt_meanvar(FloatVector(x), test_stat='Gamma', method='PELT', penalty=penalty, minseglen=minseglen, shape=shape)) ]
def CigNet_prediction(self): new_table_data = self.scaler.transform(self.real_table) real_table = pd.DataFrame(new_table_data, index=self.real_table.index, columns=self.real_table.columns) result = predict_decision(self.predictor, real_table) result2 = predict_proba(self.predictor, real_table) result = np.concatenate([result, result2], axis=1) result_df = pd.DataFrame(result, index=real_table.index) result_df.columns = ['distance', 'non-driver_prob', 'driver_prob'] stats = importr('stats') for l in [ -2, -1.75, -1.5, -1.25 - 1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 1 ]: result_df['p_value'] = 1 - norm.cdf(result_df['distance'], loc=l) result_df['q_value'] = stats.p_adjust(FloatVector( result_df["p_value"].tolist()), method='BH') if result_df[result_df['q_value'] < 0.05].shape[0] * 1. / result_df.shape[0] < 0.65: break candidate_list = self.CellNet['from'].unique() result_df = result_df[result_df.index.isin(candidate_list)] result_df = result_df.sort_values(by='distance', ascending=False) result_df['Rank'] = result_df['distance'].rank(axis=0, ascending=False) return result_df
def fdr_boot(self): """Calculate the False Discovery Rate on the bootstrap ratios. Makes use of the fdrtool package in R, which estimates the signal and null distributions across your features. """ # get the boot ratios brs = self.boot_ratio names = brs.dtype.names qvals = [] for n in names: # get R vector of bootstrap ratios br = brs[n].flatten() good_ind = ~np.isnan(br) qv = np.ones_like(br) br = FloatVector(br[good_ind]) # calc the fdr results = fdrtool.fdrtool(br, statistic='normal', plot=False, verbose=False) # append the qvals qv[good_ind] = np.array(results.rx('qval')) qvals.append(qv.reshape(self._feat_shape)) # convert to recarray qvals = np.rec.fromarrays(qvals, names=','.join(names)) # grab the qs return qvals
def call_peaks(genome, unit_length=200, small_length=1000, medium_length=5000, large_length=10000): peaks_out = [] for contig in genome: total_reads = sum(genome[contig]) contig_length = len(genome[contig]) if total_reads == 0: continue window_counts = window_read_counts(genome[contig], unit_length) window_sum = window_counts.sum(axis=1) small_bin_counts = calculate_bins(window_counts, small_length, unit_length) medium_bin_counts = calculate_bins(window_counts, medium_length, unit_length) large_bin_counts = calculate_bins(window_counts, large_length, unit_length) local_bin_sums = np.hstack( (small_bin_counts.sum(axis=1), medium_bin_counts.sum(axis=1), large_bin_counts.sum(axis=1))) local_lambdas = (local_bin_sums / np.array( [small_length, medium_length, large_length])) * unit_length lambda_bg = np.ones( window_sum.shape) * (total_reads / contig_length) * unit_length all_lambdas = np.hstack((lambda_bg, local_lambdas)) max_lambdas = np.amax(all_lambdas, axis=1) p_vals = 1 - poisson.cdf(window_sum.astype(int), mu=max_lambdas.astype(float)) p_vals = np.transpose(p_vals.astype(np.longdouble))[0] qvalue = importr('qvalue') q_vals = np.array(qvalue.qvalue(FloatVector(p_vals))[2]) q_vals = np.hstack( (np.transpose(np.matrix(list(range(1, len(q_vals) + 1)))), np.transpose(np.matrix(q_vals)))) qv_df = pd.DataFrame(q_vals) qv_df.columns = ['Position', 'qvalue'] peak_indices = np.array( qv_df.query('qvalue < 0.01')['Position'].tolist()).astype(int) peaks = indices_to_peaks(peak_indices) peaks = correct_peaks(peaks, unit_length) for peak in peaks: peaks_out.append([contig, peak[0], peak[1]]) return peaks_out
def build_drf_model(self, x_old, y): from rpy2.robjects.vectors import StrVector, FactorVector, FloatVector, IntVector from rpy2.robjects import Formula, pandas2ri x, ts = x_old[:, :-1], x_old[:, -1] tmp = np.concatenate( [x, np.reshape(ts, (-1, 1)), np.reshape(y, (-1, 1))], axis=-1) data_frame = pandas2ri.py2ri( Baseline.to_data_frame( tmp, column_names=np.arange(0, tmp.shape[-1] - 2).tolist() + ["T", "Y"])) result = self.gps.hi_est( Y="Y", treat="T", treat_formula=Formula('T ~ ' + '+'.join(data_frame.names[:-2])), outcome_formula=Formula('Y ~ T + I(T^2) + gps + T * gps'), data=data_frame, grid_val=FloatVector([float(tt) for tt in np.linspace(0, 1, 256)]), treat_mod="Normal", link_function="log" ) # link_function is not used with treat_mod = "Normal". treatment_model, model = result[1], result[2] fitted_values = treatment_model.rx2('fitted.values') distribution = norm(np.mean(fitted_values), np.std(fitted_values)) return distribution, model
def FDR_adjust_pvalues(pvalue_list, N=None, method='BH'): """ Adjust a list of p-values for false discovery rate using R's stats::p.adjust function. N and method are passed to R_stats.p_adjust: - N is the number of comparisons (if left unspecified, defaults to len(pvalue_list), I think) - method is the name of the adjustment method to use (inherited from R) Note that this MUST be done after all the p-values are already collected, on the full list of p-values at once: trying to do it on single p-values, even with adjusted N, will give different results! """ if not method in R_stats.p_adjust_methods: raise ValueError("Unknown method %s - method must be one of (%s)!" % (method, ', '.join(R_stats.p_adjust_methods))) if N is None: return R_stats.p_adjust(FloatVector(pvalue_list), method=method) else: return R_stats.p_adjust(FloatVector(pvalue_list), method=method, n=N)
def gray_plot(data, min=0, max=1, name=""): reshape = importr('reshape') gg = ggplot2.ggplot(reshape.melt(data, id_var=['x', 'y'])) pg = gg + ggplot2.aes_string(x='L1',y='L2')+ \ ggplot2.geom_tile(ggplot2.aes_string(fill='value'))+ \ ggplot2.scale_fill_gradient(low="black", high="white",limits=FloatVector((min,max)))+ \ ggplot2.coord_equal() + ggplot2.scale_x_continuous(name) return pg
def Parameter_Stability_plot(sample, alpha): #Parameter stability plot function #Defining Threshold array step = np.quantile(sample, .995) / 45 threshold = np.arange(0, np.quantile(sample, .999), step=step) #Transforming sample in a R array rdata = FloatVector(sample) #Initialization of some main arrays stdshape = [] #standard deviation of the shape parameter initialization shape = [] #shape parameter intialization scale = [] #scale paramter initilization mod_scale = [] #modified scale parameter initizaliation CI_shape = [] #confidence interval of the shape parameter CI_mod_scale = [] #confidence interval of the modified scale z = norm.ppf(1 - (alpha / 2)) #Getting parameters and CI's for both plots for u in threshold: fit = POT.fitgpd( rdata, u, est='mle' ) #fitting distribution using POT package with the MLE method shape.append( fit[0][1]) #adding the shape parameter to the respective array scale.append( fit[0][0]) #adding the scale parameter to the respective array stdshape.append( fit[1] [1]) #adding the shape standard deviation to the respective array CI_shape.append( fit[1][1] * z) #getting the values of the confidence interval for plotting mod_scale.append( fit[0][0] - (fit[0][1] * u)) #getting the modified scale parameter Var_mod_scale = (fit[3][0] - (u * fit[3][2]) - u * (fit[3][1] - (fit[3][3] * u)) ) #solving the Delta method #in order to get the variance to the modified scale parameter CI_mod_scale.append( (Var_mod_scale**0.5) * z) #getting the confidence interval for the #modified scale parameter #Plotting shape parameter against u vales plt.figure(2) plt.errorbar(threshold, shape, yerr=CI_shape, fmt='o') plt.xlabel('u') plt.ylabel('Shape Parameter') plt.title('Shape Parameter Stability Plot') #Plotting modified scale parameter against u values plt.figure(3) plt.errorbar(threshold, mod_scale, yerr=CI_mod_scale, fmt='o') plt.xlabel('u') plt.ylabel('Modified Scale Parameter') plt.title('Modified Scale Parameter Stability Plot') plt.show()
def fdr(p_value_list): p_adjust = stats.p_adjust(FloatVector(p_value_list), method='BH') q_adjust = qval.qvalue(p_adjust) p_adjust_value = [k for i, k in p_adjust.items()] for i in q_adjust.items(): if i[0] == "qvalues": q_adjust_value = i[1] return p_adjust_value, q_adjust_value
def get_content_group_enrichment2(control_frequencies, interest_frequencies, interest_frequencies_5p, interest_frequencies_3p, dic_p_val, set_number, name, reg_dic): """ :param control_frequencies: (dictionary of floats) a dictionary containing the amino acid nature frequencies of the control sets :param interest_frequencies: (dictionary of floats) a dictionary frequency of each amino acid nature in the user set of exons :param dic_p_val: (dictionary of floats a dictionary containing the p_values :param set_number: (int) the number of set to create :param name: (string) : the name of the first column :param reg_dic: (dictionary of list of string) : dictionary having keys corresponding to the group of interest and a list associated to those keys corresponding to the amino acid aggregated in those groups :return: (list of list of strings) the content of the nature sheet ! Each sublist correspond to a row in the nature sheet of the enrichment_report.xlsx file """ dic_padjust = {} content = [[ name, "frequencies_of_the_interest_set", "frequencies_interest_set_5p", "frequencies_interest_set_3p", "average_frequencies_of_the_" + str(set_number) + "_sets", "IC_90_of_the_" + str(set_number) + "_sets", "p_values_like", "FDR", "regulation_(p<=0.05)", "regulation(fdr<=0.05)", "nb_nt_group", "prop_nt_group", "ponderate_nt_group" ]] ic_90 = calculate_ic_90(control_frequencies) p_vals = list() for nature in dic_p_val.keys(): p_vals.append(dic_p_val[nature]) rstats = importr('stats') p_adjust = rstats.p_adjust(FloatVector(p_vals), method="BH") i = 0 for nature in dic_p_val.keys(): info_count, info_prop, count_pond = get_group_nt_info(reg_dic[nature]) regulation, regulation_fdr = check_regulation( interest_frequencies[nature], ic_90[nature], dic_p_val[nature], p_adjust[i]) content.append([ str(nature), str(interest_frequencies[nature]), str(interest_frequencies_5p[nature]), str(interest_frequencies_3p[nature]), str(np.mean(control_frequencies[nature])), str(ic_90[nature]), str(dic_p_val[nature]), str(p_adjust[i]), str(regulation), str(regulation_fdr), str(info_count), str(info_prop), str(count_pond) ]) dic_padjust[nature] = p_adjust[i] i += 1 return content, dic_padjust
def nbinom_cdf_fromfit(q, fit_dict): pnbinom = robj.r('pnbinom') if np.isscalar(q): return np.array( pnbinom(q=q,size=fit_dict['estimate'][0],mu=fit_dict['estimate'][1]) ) else: return np.array( pnbinom(q=FloatVector(q),size=fit_dict['estimate'][0],mu=fit_dict['estimate'][1]) )
def nbinom_pdf_fromfit(x, fit_dict): dnbinom = robj.r('dnbinom') if np.isscalar(x): return np.array( dnbinom(x=x,size=fit_dict['estimate'][0],mu=fit_dict['estimate'][1]) ) else: return np.array( dnbinom(x=FloatVector(x),size=fit_dict['estimate'][0],mu=fit_dict['estimate'][1]) )
def nbinom_cdf(q, size, mu): pnbinom = robj.r('pnbinom') if np.isscalar(q): return np.array( pnbinom(q=q,size=size,mu=mu) ) else: return np.array( pnbinom(q=FloatVector(q),size=size,mu=mu) )
def nbinom_pdf(x, size, mu): dnbinom = robj.r('dnbinom') if np.isscalar(x): return np.array( dnbinom(x=x,size=size,mu=mu) ) else: return np.array( dnbinom(x=FloatVector(x),size=size,mu=mu) )
def MWU_vs_groups(data, groups, genes, dview, BH=True, log=True): """ Performs MWU test for differential expression in cluster C compared to each other cluster. Returned is the maximal p-value. ---------- data: pd.DataFrame of m cells x n genes. groups: pd.Series of cluster identity in m cells. genes: list of selected genes. dview: ipyparallel dview object. BH: whether to perform Benjamini-Hochberg correction. Default: True log: whether to return -log10 transformed pvalues. ----------- returns p-values of genes in [genes] for all clusters in [groups]. """ ######################### def MWU_vs_groups_helper(data, groups, gene): output = pd.DataFrame(index=[gene], columns=return_unique(groups)) for gr1 in return_unique(groups): d1 = data.ix[groups[groups == gr1].index] pvals = [] for gr2 in [gr2 for gr2 in return_unique(groups) if gr2 != gr1]: d2 = data.ix[groups[groups == gr2].index] try: pval_tmp = mwu(d1, d2, alternative='greater')[1] except: pval_tmp = 1.0 pvals.append(pval_tmp) output.ix[gene, gr1] = np.max(pvals) return output.astype(float) ######################### l = len(genes) output_tmp = dview.map_sync(MWU_vs_groups_helper, [data.ix[g] for g in genes], [groups] * l, genes) output = pd.concat(output_tmp, axis=0) if BH == True: for col in output.columns: output[col] = stats.p_adjust(FloatVector(output[col]), method='BH') if log == True: output = -np.log10(output.astype(float)) return output
def identifyPPIs_chimericAdj(sorted_x_positive1_9, dicIntCount_positive1_9, dicProteinCount_positive1_9, coEff, pCutOff, oddsCutoff): factor = sum([x[1] for x in sorted_x_positive1_9]) / len(sorted_x_positive1_9) chimTotal = sum([x[1] for x in sorted_x_positive1_9]) pvalueList = [] sorted_x_1_select = [] selectList_1 = [] posRCList_1 = [] orList_1 = [] chiList = [] for ha in sorted_x_positive1_9: [gene1, gene2] = ha[0].split(';') a = dicIntCount_positive1_9[ha[0]] b = dicProteinCount_positive1_9[gene1] / 2 - a c = dicProteinCount_positive1_9[gene2] / 2 - a d = chimTotal - a - b - c b, c = max(0, b), max(0, c) oddsRatio = (a + 1) * (d + 1) / (b + 1) / (c + 1) chi2, p, dof, ex = stats.chi2_contingency([[a + 1, b + 1], [c + 1, d + 1]]) orList_1.append(oddsRatio) pvalueList.append(p) sorted_x_1_select.append(ha) selectList_1.append(ha[0]) posRCList_1.append(a) chiList.append(chi2) stats1 = importr('stats') pvalueList_adj_1 = stats1.p_adjust(FloatVector(pvalueList), method='BH') lolCount = 0 count = 0 list1 = [] rcList1 = [] pvalueSig_1 = [] orSig_1 = [] chiSig = [] for i in range(len(selectList_1)): ha = selectList_1[i] count += 1 gene1, gene2 = ha.split(';') pAdj = pvalueList_adj_1[i] rcc = posRCList_1[i] orr = orList_1[i] chichi = chiList[i] if pAdj <= pCutOff and rcc > coEff * factor and 'MTRNR' not in gene1 and 'MTRNR' not in gene2 and orr > oddsCutoff: lolCount += 1 list1.append(ha) pvalueSig_1.append(pAdj) orSig_1.append(orList_1[i]) rcList1.append(rcc) chiSig.append(chichi) print(len(set(list1))) return list1, rcList1, orSig_1, chiSig, pvalueSig_1
def calcOralEquiv(casrn,conc,q=0.5,species='Rat',units_in='uM', units_out='mg',rest_clear=False): if type(q)==list: q=FloatVector(q) if type(conc)==list or type(conc)==pd.Series: conc=FloatVector(conc) kwargs={'conc':conc, 'chem.cas':casrn, 'which.quantile':q, 'species':species, 'input.units':units_in, 'output.units':units_out, 'restrictive.clearance':rest_clear, 'suppress.messages':True } X = httk.calc_mc_oral_equiv(**kwargs) #return pandas2ri.ri2py_listvector(X) return X
def fit_generator_for_model(self, model, train_generator, train_steps, val_generator, val_steps, num_epochs): from rpy2.robjects.vectors import StrVector, IntVector, FactorVector, FloatVector x, y = self.collect_generator(train_generator, train_steps) self.model = self.bart.bartMachine(X=Baseline.to_data_frame(x), y=FloatVector([yy for yy in y]), mem_cache_for_speed=False, seed=909, run_in_sample=False)