def _start_r_interpreter(self): '''Find and start a R intepreter On windows systems use the packaged R env. On Posix systems use the system installed R env. Will raise RNotFoundException ''' if sys.platform.startswith('win32'): '''A MS Windows kind of system''' logger.info("Running on a MS Windows system") Rwbin = op.join(self.r_origo, 'R-3.3.1', 'bin', 'R.exe') Rlib = op.join(self.r_origo, 'R-3.3.1', 'library') logger.info("Try Windows R path: {0}".format(Rwbin)) if op.exists(Rwbin): logger.info("R.exe found") self.r = pyper.R(RCMD=Rwbin, use_pandas=True) self.r('.libPaths("{0}")'.format(Rlib)) else: '''Try to run R from system path Give message if R is not found ''' self.r = pyper.R(use_pandas=True) # raise RNotFoundException() else: '''Not Windows, assumed to be a POSIX system OS X Darwin or a Linux flavor ''' logger.info("Assumed to run on a POSIX system") Rxbin = self._find_posix_system_R() logger.info("System R found at path: {0}".format(Rxbin)) self.r = pyper.R(RCMD=Rxbin, use_pandas=True)
def surv_ci(data, pred_col, duration_col, event_col): """Concordance Index Parameters ---------- data : pandas.DataFrame Full survival data. pred_col : str Name of column indicating log hazard ratio. duration_col : str Name of column indicating time. event_col : str Name of column indicating event. Returns ------- `dict` Object of dict include details about CI. Examples -------- >>> surv_ci(data, 'Pred', 'T', 'E') """ X = data[pred_col].values T = data[duration_col].values E = data[event_col].values r = pr.R(use_pandas=True) r("library('survival')") r("library('Hmisc')") r.assign("t", T) r.assign("e", E) r.assign("x", X) r("src <- rcorr.cens(-x, Surv(t, e))") return r.src
def get_dict_correction(set_XC, designed_barcode, metric="seqlev", distance="2"): if type(distance) not in (int, long): raise InvalidArgumentError if distance not in (0, 1, 2): print("distance must be 0, 1, or 2") raise InvalidArgumentError r = pr.R() r("library(DNABarcodes)") r.assign("list_XC", list(set_XC)) r.assign("designed_barcode", designed_barcode) if metric == "seqlev": r("demultiplexed <- demultiplex(list_XC, designed_barcode, metric='seqlev')") elif metric == "hamming": r("demultiplexed <- demultiplex(list_XC, designed_barcode, metric='hamming')") else: print("metric must be 'seqlev' or 'hamming'") raise InvalidArgumentError df_correction = r.get("demultiplexed") df_correction.columns = [x.replace(" ", "") for x in df_correction.columns] df_correction_filt = (df_correction[df_correction.distance <= distance] [['read', 'barcode']]) dict_correct = df_correction_filt.set_index('read').to_dict()['barcode'] return dict_correct
def loess_res(ts, bass = 0): ''' Fit loess curve using Friedman's Super Smoother and return residuals Parameters ---------- ts: pandas series time series of values bass: int smoothing parameter of curve. values up to 10 for more smoothness Returns ------- list series of residuals from loess curve fit ''' ts = ts.tolist() # create a R instance r = pr.R(use_pandas = True) # pass ts from python to R as Y, and pass bass parameter r.assign("Y", ts) r.assign("bass", bass) # fit friedman's super smoother on ts and extract the fitted values r("fit = supsmu(x=1:length(Y), y=Y, bass=bass)$y") # pass fitted values from r to python fit = r.get("fit") # residuals from loess fit residuals = ts - fit return(residuals.tolist())
def icar(file_path, algo, n_components, wl_range_start, wl_range_end): data = pd.read_csv(file_path, index_col=0).T data = data.loc[:, wl_range_start:wl_range_end].T wl = data.index.values.astype('float32') # Rのインスタンスを作る r = pyper.R(use_pandas='True') # PythonのオブジェクトをRに渡す r.assign('data', data) r.assign('wl', wl) r.assign('n_components', n_components) # Rのコードを実行する r("library(ica)") r("X <- data") if algo == 'FastICA': r("a <- icafast(X, n_components)") elif algo == 'InfoMax': r("a <- icaimax(X, n_components)") else: r("a <- icajade(X, n_components)") r("ics <- cbind(wl, a$S)") # Pythonでrのオブジェクトを読む ics = r.get("ics") ics_df = pd.DataFrame(ics) return ics_df
def readModelFile(modelFileName, jobName): if os.path.isfile(modelFileName): r = pr.R() r(IMPORT_R_WRAPPER) r.assign('params', json.dumps({'rdsFile': modelFileName})) rOutput = r('model <- readFromRDS(params)') log.info(rOutput) results = r.get('model') del r rOutput = None if results: model = json.loads(results) maxTimePoint = model['maxTimePoint'][0] covariates = model['covariates'] if (len(model['jobName'])): jobName = model['jobName'][0] return { 'jobName': jobName, 'maxTimePoint': maxTimePoint, 'interceptOnly': True if len(covariates) == 0 else False } else: message = "Couldn't read Time Points from RDS file!" log.error(message) return message else: message = "Model file does not exist!" log.error(message) return message
def __init__(self, training_marginal_dist_matrix: np.matrix, family: str, param=None, dim=None): if family not in ['gumbel', 'clayton', 'frank', 'normal', 'indep']: print('Copula family "' + family + '" is not supported.') raise ValueError if training_marginal_dist_matrix.size == 0: if dim is None: raise ValueError self._dimension = dim else: self._dimension = training_marginal_dist_matrix.shape[1] self._r_engine = pyper.R() self._r_engine.assign("py.training.marginal.dist.matrix", training_marginal_dist_matrix) self._r_engine.assign("py.cop.name", family) self._r_engine.assign("py.param", param) self._r_engine.assign("py.dim", self._dimension) self._r_engine('source("copula/copula.R")') self._r_engine('source("copula/copula.R")') trained_param = self._r_engine.get("trained.param") self.trained_param = trained_param #asahi if trained_param is None: self._r_engine('trained <- indepCopula(dim=%d)' % self._dimension) print('indep') else: print(trained_param)
def arimaTemp(filename): processTemp() predictions = [] r = pr.R(RCMD="C:\\Program Files\\R\\R-3.1.2\\bin\\R", use_numpy=True, use_pandas=True) datetimes = np.arange('2008-07-01 00:00:00','2008-07-08 00:00:00',dtype='datetime64[h]') for j in range(1,12): data = pd.read_csv('../data/outputs/temp_history_processed_station_%s.csv'%j, parse_dates='datetime') subts = data["value"] print 'Predictions for zone %s'%j results = arima(subts,r) results = pd.DataFrame(results, columns=['value']) results['datetime'] = datetimes results['station_id'] = j predictions.append(results) concatPredictions = pd.concat(predictions) concatPredictions.to_csv(filename, index=False, date_format='%Y-%m-%d %H:%M:%S', mode='a')
def plot_scatter_polya(in_fisher, out_png): """plot a scatter plot of the expression of polya sites""" lhs, rhs = in_fisher.split('vs.') lhs, rhs = short_name(lhs), short_name(rhs) try: compare_type = in_fisher.replace('.fisher_test', '').split('polya.')[1] except: compare_type = 'ALL' R_script = r""" library(lattice) d<-read.table(file="%(in_fisher)s", header=TRUE, sep="\t") png('%(out_png)s') exp1 <- c(d$exp1_upstream_count, d$exp1_downstream_count) exp2 <- c(d$exp2_upstream_count, d$exp2_downstream_count) sig_sites <- d$fisher_p_two_sided < .05 exp1_sig = c(d$exp1_upstream_count[sig_sites], d$exp1_downstream_count[sig_sites]) exp2_sig = c(d$exp2_upstream_count[sig_sites], d$exp2_downstream_count[sig_sites]) plot(log2(exp1), log2(exp2), cex=.8, col='lightgray', pch=20, xlab="%(xlab)s", ylab="%(ylab)s", main="All sites for %(lhs)s vs. %(rhs)s in %(compare_type)s", sub=paste("R^2 is ", cor(exp1, exp2))) dev.off() """ % dict(plot_label=r'Poly-A for\n%s' % in_fisher, xlab="log2(%s)" % (lhs), ylab="log2(%s)" % (rhs), in_fisher=in_fisher, out_png=out_png, lhs=lhs, rhs=rhs, compare_type=compare_type) # print R_script r = pyper.R() r(R_script)
def plot_differential_polya(in_fisher, out_pattern, out_template): """plot a scatter plot of the log-expression difference of polya sites""" lhs, rhs = in_fisher.split('vs.') lhs, rhs = short_name(lhs), short_name(rhs) try: compare_type = in_fisher.replace('.fisher_test', '').split('polya.')[1] except: compare_type = 'ALL' for max_pval in [.05, .01, .001]: out_png = out_template % ('pval_%s' % max_pval) R_script = r""" library(lattice) d<-read.table(file="%(in_fisher)s", header=TRUE, sep="\t") png('%(out_png)s') sig_sites <- d$fisher_p_two_sided < %(max_pval)s exp1_proximal = d$exp1_upstream_count[sig_sites] exp1_distal = d$exp1_downstream_count[sig_sites] exp2_proximal = d$exp2_upstream_count[sig_sites] exp2_distal = d$exp2_downstream_count[sig_sites] plot(log2(d$exp1_upstream_count/d$exp2_upstream_count), log2(d$exp1_downstream_count/d$exp2_downstream_count), cex=.8, col='lightgray', pch=20, xlab="%(xlab)s", ylab="%(ylab)s", main="Significant sites for %(lhs)s vs. %(rhs)s in %(compare_type)s", sub=paste("Significant sites:", sum(sig_sites), "/", dim(d)[1])) points(log2(exp1_proximal/exp2_proximal), log2(exp1_distal/exp2_distal), col='red', cex=.8, pch=20) dev.off() """ % dict(plot_label=r'Differential Poly-A for\n%s' % in_fisher, xlab="log2(%s/%s)-proximal" % (lhs, rhs), ylab="log2(%s/%s)-distal" % (lhs, rhs), in_fisher=in_fisher, out_png=out_png, lhs=lhs, rhs=rhs, compare_type=compare_type, max_pval=max_pval) # print R_script r = pyper.R() r(R_script)
def on_get(self, req, resp): r = pr.R() # https://www.r-tutor.com/elementary-statistics/quantitative-data/frequency-distribution-quantitative-data r("duration = faithful$eruptions") r("breaks = seq(1.5, 5.5, by=0.5)") r("duration.cut = cut(duration, breaks, right=FALSE)") r("duration.freq = table(duration.cut)") resp.body = r("duration.freq")
def pc_rlib(d_dt, threshold, skel_method, verbose): import pandas import pyper if skel_method == "default": method = "original" else: method = skel_method input_data = d_dt #input_data = {} #for nid, ns in nsdict.iteritems(): # input_data[nid] = ns.get_values() r = pyper.R(use_pandas='True') r("library(pcalg)") r("library(graph)") df = pandas.DataFrame(input_data) r.assign("input.df", df) r.assign("method", method) r("evts = as.matrix(input.df)") #print r("evts") #r("t(evts)") #r("save(evts, file='rtemp')") r.assign("event.num", len(input_data)) r.assign("threshold", threshold) r.assign("verbose.flag", verbose) print r(""" pc.result <- pc(suffStat = list(dm = evts, adaptDF = FALSE), indepTest = binCItest, alpha = threshold, skel.method = method, labels = as.character(seq(event.num)-1), verbose = verbose.flag) """) #print r(""" # pc.result <- pc(suffStat = list(dm = evts, adaptDF = FALSE), # indepTest = binCItest, alpha = threshold, # labels = as.character(seq(event.num)-1), verbose = TRUE) #""") r("node.num <- length(nodes(pc.result@graph))") g = nx.DiGraph() for i in range(r.get("node.num")): r.assign("i", i) edges = r.get("pc.result@graph@edgeL[[as.character(i)]]$edges") if edges is None: pass elif type(edges) == int: g.add_edge(i, edges - 1) elif type(edges) == np.ndarray: for edge in edges: g.add_edge(i, edge - 1) else: raise ValueError("edges is unknown type {0}".format(type(edges))) return g
def ping(): try: r = pr.R() return r['"true"'] except Exception as e: log.exception("Exception occurred") return buildFailure({ "status": False, "statusMessage": "Call R failed!" })
def RdataToHDF5(fileName, variableName, path=None): r = pyper.R() if path is not None: r["setwd('%s')" % path] r['load("%s")' % fileName] r['library(rhdf5)'] try: r['h5createFile("%s.h5")' % fileName] except pyper.RError: pass # typically this is because the file already exists # TODO: determine if something else went wrong r['h5write(%s, "%s.h5","%s")' % (variableName, fileName, variableName)]
def make_basic_colormap(todir: str, width: int, height: int, encoding="CP932") -> None: '''shapeファイルから行政地区単位でカラーマップを作成 行政地区単位で異なる色で配色される. しかしあくまで色と行政地区が対応しているのみで 行政地区とその名前・コードとの対応はなされない. Parameters ---------- todir : str shapeファイルまでのディレクトリ width : int 出力svgファイルの幅 height : int 出力svgファイルの高さ encoding : str 入力svgファイルに使用されている文字コード ''' #create R object r = pyper.R() r("library(sf)") r("library(ggplot2)") #input shapefile shapefile = (glob.glob(os.path.join(todir, "*.shp")))[0] r.assign('shapefile', shapefile) #make tmporary directory. os.makedirs('./tmp', exist_ok=True) #set output svg file name svgfile = './tmp/tmp.svg' r.assign('svgfile', svgfile) #output OsakaMap #Each area are colored according to municipality code. r.assign('param1', width) r.assign('param2', height) encoding_to_r = "ENCODING=" + encoding + '"' r.assign('option', encoding_to_r) r('shp <- sf::st_read(shapefile,options=option)') r('svg(svgfile, width=param1, height=param2)') r('ggplot()+geom_sf(data=shp,aes(fill=N03_007))') r('dev.off()')
def __pipeData(self): ''' This function pipes data to R environment and declares R variables NOTE: R must be installed in this script's environment ''' try: self.r = pyper.R(use_numpy = True) # For data input as numpy array self.r("chooseCRANmirror(ind=10)") # Choose Canadian host as R mirror (for package downloads) self.r.assign('data', self.data) except: print 'There was an error piping data to R environment. Please ensure that R is properly installed and all PATH variables are correct.'
def plot_balls(balls, depth): global save_dir r = pyper.R() r_code = ''' par(pty="s") png("{0}/depth_{1}.png") data <- read.csv('{0}/depth_{1}.csv', header = FALSE) p = data$V1 q = data$V2 col1 <- densCols(p, q, colramp = colorRampPalette(c("white", "orange", "red"))) plot(0, 0, type = "n", xlim = c(0, 100), ylim = c(0, 100),xlab = "x", ylab = "y") points(p, q, col = col1, pch = 3) dev.off() '''.format(save_dir, depth) r(r_code)
def draw_expression_correlation(in_data, out_png): """Correlation test to see if the correlation between expression values and peak quality (score column from peak file). """ R_script = r""" png('%(out_png)s') d<-read.table(file="%(in_data)s", header=TRUE, sep="\t"); library(lattice); r <- cor.test(d$expression_val, d$peak_score) plot(d$expression_val, d$peak_score, xlab="expression value", ylab="peak score") title(paste("R^2 = ", r$estimate, ", p-value = ", r$p.value)); dev.off() """ % dict(in_data=in_data, out_png=out_png) #print R_script r = pyper.R() r(R_script)
def youden_onecut(data, pred_col, duration_col, event_col, pt=None): """Cutoff maximize Youden Index. Parameters ---------- data : pandas.DataFrame full survival data. pred_col : str Name of column to reference for dividing groups. duration_col : str Name of column indicating time. event_col : str Name of column indicating event. pt : int, default None Predicted time. Returns ------- float Value indicating cutoff for pred_col of data. Examples -------- >>> youden_onecut(data, 'X', 'T', 'E') """ X = data[pred_col].values T = data[duration_col].values E = data[event_col].values if pt is None: pt = T.max() r = pr.R(use_pandas=True) r.assign("t", T) r.assign("e", E) r.assign("mkr", np.reshape(X, E.shape)) r.assign("pt", pt) r.assign("mtd", "KM") r.assign("nobs", X.shape[0]) r("library(survivalROC)") r("src <- survivalROC(Stime = t, status = e, marker = mkr, predict.time = pt, span = 0.25*nobs^(-0.20))") r("Youden <- src$TP-src$FP") r("cutoff <- src$cut.values[which(Youden == max(Youden), arr.ind = T)]") r("abline(0,1)") return r.cutoff
def exec_r(self): import pyper r = pyper.R(use_pandas='True') r("suppressWarnings(require(tseries,warn.conflicts = FALSE,quietly=TRUE))" ) #r("source(file='mswm.R')")#, encoding='utf-8' #r("result<-m.lm$coefficients[2]") #r("result1<-y1") #r("result2<-y2") #print(r.get("result")) #print("completed") for i in range(self._term, self._return_df.shape[0]): self._return_df.iloc[i - self._term:i].to_csv('run.csv') r("df <- read.csv('run.csv', header = T)") r("d <- diff(df$Last)") r("x <- factor(sign(d[-which(d %in% 0)]))") r("run_result <- runs.test(x)") r("p_value <- run_rsult$p.value") print(r.get('p_value'))
def fit_predict(self, X_train, y_train, X_test, y_test): X = np.vstack((X_train, X_test)) df = DataFrame(X) r = pr.R(use_pandas=True) r.assign("X", df) # r('print(X)') # if I remove this line pyper get stuck..... install_dir = os.path.dirname( os.path.abspath(inspect.getfile( inspect.currentframe()))) # script directory r('source("' + install_dir + '/RF.R")') r('set.seed(0)') r('no.forests=' + str(int(self.nforests))) r('no.trees=' + str(int(self.ntree))) r('rfdist <- RFdist(X, mtry1=3, no.trees, no.forests, ' 'addcl1=T, addcl2=F, imp=T, oob.prox1=T)') r('labelRF=outlier(rfdist$cl1)') return -np.array(r.get('labelRF'))[X_train.shape[0]:]
def surv_roc(data, pred_col, duration_col, event_col, pt=None): """Get survival ROC at predicted time. Parameters ---------- data : pandas.DataFrame Full survival data. pred_col : str Name of column to reference for dividing groups. duration_col : str Name of column indicating time. event_col : str Name of column indicating event. pt : int Predicted time. Returns ------- `dict` Object of dict include "FP", "TP" and "AUC" in ROC. Examples -------- >>> surv_roc(data, 'X', 'T', 'E', pt=5) """ X = data[pred_col].values T = data[duration_col].values E = data[event_col].values if pt is None: pt = T.max() r = pr.R(use_pandas=True) r.assign("t", T) r.assign("e", E) r.assign("mkr", np.reshape(X, E.shape)) r.assign("pt", pt) r.assign("mtd", "KM") r.assign("nobs", X.shape[0]) # different predict.time may plot 1, 5, or 10 year ROC r("src<-survivalROC::survivalROC(Stime = t, status = e, marker = mkr, predict.time = pt, span = 0.25*nobs^(-0.20))") # r.src['AUC'] r.src['FP'], r.src['TP'] return r.src
def get_quantile_errors(data, quantiles): """ Gets arbitrary quantile values for given data This function uses the PypeR library (download from http://sourceforge.net/projects/rinpy/) to call commands from the R language used to calculate an arbitrary number of quantile intervals of a numpy array. In the case of bi-dimensional numpy arrays, it will calculate these statistics along the first axis, meaning that each line corresponds to an independent data set. Input: - data numpy.ndarray : data input, each line being a data set (ndim=2,dtype=float) - quantiles numpy.ndarray : percentages (ndim=1,dtype=float) Ex: quartiles : array([0.25,0.5,0.75]) Output: - qtl_errors numpy.ndarray : quantile error values (ndim=2,dtype=float) --- """ myR = pyper.R() myR['data'] = np.transpose(data) myR['quantiles'] = quantiles # Calculate the quantiles for each data line myR("""qtls <- t(sapply(as.data.frame(data),function(x) quantile(x, quantiles,names=FALSE)))""") qtl_errors = myR['qtls'] del myR return qtl_errors
def plot_nearest_features(in_distances, out_png, test_out, window_size=20): """Plot a density of the distance to the nearest features""" print out_png print test_out R_script = r""" png('%(out_png)s') d<-read.table(file="%(in_data)s", header=TRUE, sep="\t"); d = d / 1000; library(lattice); plot(density(unlist(d[1])[unlist(d[1]) < %(window_size)s & unlist(d[1]) > -%(window_size)s], na.rm=TRUE), main="Feature densities around peaks", xlab="Distance (kb)", ylab="Density", xlim=c(-%(window_size)s,%(window_size)s)) index = 1 r = rainbow(length(d)) for (i in d) { i = i[i < %(window_size)s & i > -%(window_size)s] lines(density(i, from=-%(window_size)s, to=%(window_size)s, na.rm=TRUE), col=r[index]) index = index + 1 } legend("topleft", legend=names(d), col=r, lty=1) dev.off() """ % dict(in_data=in_distances, out_png=out_png, window_size=window_size) print R_script r = pyper.R() r(R_script)
def plot_ttest_polya(in_ttest, out_png): """plot the t-test averages used as a scatter plot of the expression of polya sites""" lhs, rhs = in_ttest.split('vs.') lhs, rhs = short_name(lhs), short_name(rhs) try: compare_type = in_ttest.replace('.t_test', '').split('polya.')[1] except: compare_type = 'ALL' R_script = r""" library(lattice) d<-read.table(file="%(in_ttest)s", header=TRUE, sep="\t") png('%(out_png)s') exp1 <- unlist(lapply(lapply(strsplit(gsub("]", "", gsub("[", "", d$exp1_count, fixed=TRUE), fixed=TRUE), ", ", fixed=TRUE), as.numeric), mean)) exp2 <- unlist(lapply(lapply(strsplit(gsub("]", "", gsub("[", "", d$exp2_count, fixed=TRUE), fixed=TRUE), ", ", fixed=TRUE), as.numeric), mean)) sig_sites <- d$ttest_pvalue < .05 # upregulated means t-statistic is positive => exp1 < exp2 exp1_bigger <- d$ttest_pvalue < .05 & d$ttest_stat > 0 exp2_bigger <- d$ttest_pvalue < .05 & d$ttest_stat < 0 exp1_sig = exp1[sig_sites] exp2_sig = exp2[sig_sites] plot(log2(exp1), log2(exp2), cex=.8, col='lightgray', pch=20, xlab="", ylab="%(ylab)s", main="All sites for %(lhs)s vs. %(rhs)s in %(compare_type)s", sub=paste("R^2 is ", cor(exp1, exp2),"\nSig sites x > y: ", sum(exp1_bigger), "\nSig sites x < y: ", sum(exp2_bigger))) points(log2(exp1_sig), log2(exp2_sig), col='red', cex=.8, pch=20) dev.off() """ % dict(plot_label=r'Poly-A for\n%s' % in_ttest, xlab="log2(%s)" % (lhs), ylab="log2(%s)" % (rhs), in_ttest=in_ttest, out_png=out_png, lhs=lhs, rhs=rhs, compare_type=compare_type) # print R_script r = pyper.R() r(R_script)
def post(self): #データベース db = pd.read_csv("data.csv") params = pd.read_csv("params.csv") #入力されたデータ(値は0~5の6段階評価) user_data = pd.DataFrame([[int(self.get_argument("q1")), int(self.get_argument("q2")),int(self.get_argument("q3")),int(self.get_argument("q4")),int(self.get_argument("q5")),int(self.get_argument("q6")),int(self.get_argument("q7")),int(self.get_argument("q8")),int(self.get_argument("q9")),int(self.get_argument("q10")),int(self.get_argument("q11")),int(self.get_argument("q12")),int(self.get_argument("q13")),int(self.get_argument("q14")),int(self.get_argument("q15")),int(self.get_argument("q16")),"偏差値"]], columns=["V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11","V12","V13","V14","V15","V16","偏差値"]) data = (db.append(user_data, ignore_index=True)) #Rでの分析 r = pyper.R(use_pandas='True') r.assign("data", data) r.assign("params", params) r("source('myfunc/myfunc.R')") r("library(psych)") r("library(irtoys)") r("library(ltm)") #17列目の偏差値は分析データから外し、最新行のみ分析 r("data<-data[nrow(data),1:16]") #母数の推定 r("a <- grm.theta(data,a=params[,6]/1.7,bc=params[,c(1,2,3,4,5)],D=1.7,method ='ML')") r("偏差値<-round(a[,1],4)*10+50") #ユーザーの偏差値 value = r.get("偏差値") data.iat[len(data)-1,16] = value print(data) ranking = data.rank(ascending=False,method='max') print(ranking) #順位 rank = ranking.iat[len(data)-1,16].astype(int) #受験人口 every = len(data) self.render("result.html", value=value,rank=rank ,every=every)
@author: narrowly """ import numpy as np import pandas as pd import scipy.stats as sct import random import pyper import matplotlib.pyplot as plt plt.style.use('ggplot') # 前処理 np.random.seed(4521) N = 10000 # N_particle r = pyper.R(use_numpy='True', use_pandas='True') r("load('data/ArtifitialLocalLevelModel.RData')") y = r.get('y').astype(np.float64) t_max = r.get('t_max') mod = r.get('mod') # %% コード11-1 # データの整形 y = np.r_[np.nan, y] # リサンプリング用のインデックス列 k = np.repeat(np.arange(N)[:, np.newaxis], t_max + 1, axis=1) # 事前分布の設定 # 粒子 (実現値) x = np.zeros(shape=(t_max + 1, N))
async def plottemp(ctx, self): r = pyper.R() r("source(file='/home/rito/Programming/Python/discord_bot/cogs/plot.R')" ) await self.send("部屋の温湿度と明るさはこんな感じです。") await self.send(file=discord.File('/home/rito/image/plot.png'))
x[i] = 0.4 * x[i - 1] + 0.8 * x[i - 1] * z[i - 1] + z[i] halfWindowSize = 20 ret = {} ret[0] = pyHampel.hampel(x, halfWindowSize, method="center") #set the center of window at target value ret[1] = ret[0] # just copy ret[2] = pyHampel.hampel(x, halfWindowSize, method="same") #same window ret[3] = pyHampel.hampel(x, halfWindowSize, method="ignore") # ignore in end ret[4] = pyHampel.hampel(x, halfWindowSize, method="nan") #set nan in end print(ret[0][0]) #filtered data print(ret[0][1]) #indices of outliers #compare pyhampel with R r = pyper.R() r("library(pracma);") r.assign("x", x) r("omad <- hampel(x, k=20);") retR = r.get("omad") fig, ax = plt.subplots(5, 1, figsize=(8, 8)) ax[0].plot(t, x, "b", label="original data") for i in range(5): ax[i].plot(t, retR["y"], "orange", label="R") ax[i].plot(t, ret[i][0], "r.", label="python") ax[i].legend(loc="lower center") ax[0].set_title("overall") ax[1].set_title("center") ax[2].set_title("same") ax[3].set_title("ignore")
data = pd.read_excel("F:\GitHub\FOF\Global Allocation\SBG_US_M.xlsx") data = pd.read_excel("/Users/WangBin-Mac/FOF/Global Allocation/SBG_US_M.xlsx") data = data.interpolate() data = data.dropna().pct_change().dropna() data #data_W = data.pct_change().dropna()*100 rp_result_list = [] mu_result_list = [] index_list = [] r = pr.R(use_pandas=True) r("library(MSwM)") for each in range(119, len(data) - 1): #each = 95 #data_M.index[each] #data_frame = data[:data.index[each]] data_frame = data[data.index[each - 119]:data.index[each]] #data_frame = data_frame[['SP500', 'Barclays_US_bond']] ''' mu_wgt = Ms_MU(data_frame, {'SP500':True, 'Barclays_US_bond':False}, 2) print each print mu_wgt rp_wgt = Ms_RP(data_frame, {'SP500':True, 'Barclays_US_bond':False})