Esempi in Python per assign, esempi in Python per rpy2.robjects.r.assign

Esempio n. 1

0

Mostra file

File: gamr.py Progetto: zyfang/PythonCollection

    def predict(self, x):
        """
        Get the predicted values for the given input values
        Returns an nxm np.array with the predicted y values corresponding to the given x, with m being the number of dependent variables and n the number of observations in x
        
        NOTE: assumes that the dimensions for the predicted y are the same as what was expected from the training, e.g. same amount of dependent variables
        """
        ## Check the estimator has been fit before calling this function
        check_is_fitted(self, "gammodels")

        # input is converted to an at least 2nd numpy array by sklearn util function, this is necessary for handling 1-dimensional x inputs etc. correctly (otherwise also doesn't convert to right amount of columns and rows)
        x = check_array(x, accept_sparse=["csr", "csc", "coo"])

        # Convert to R matrices
        if (
            x.ndim == 1
        ):  # If we're only looking at 1 x at a time, shape[1] will give an error for one-dimensional arrays. Sklearn input validation doesn't change that.
            rx = r.matrix(x, nrow=x.shape[0], ncol=1)
        else:
            rx = r.matrix(x, nrow=x.shape[0], ncol=x.shape[1])
        r.assign("newxdata", rx)  # Put data in R environment for the functions to use
        r("newxdataframe<-data.frame(newxdata)")

        # Use gammodels list to predict each dependent variable and put together in R matrix
        for i, gammodel in enumerate(self.gammodels):
            r.assign("gmodel", gammodel)
            if i == 0:  # array is empty
                r("predmatrix<-predict(gmodel, newxdataframe)")
            else:
                r("predmatrix<-cbind(predmatrix,predict(gmodel,newxdataframe))")
        result = np.asarray(r["predmatrix"])
        return result

Esempio n. 2

0

Mostra file

File: timeseries.py Progetto: pmourlanne/seclab_project

	def plot(self, filename):
		l = len(self.ts)
		r.assign('l', l)
		r.assign('rfilename', filename)
		r.assign('img', filename + '.png')
		
		#Label creation
		r('lbl <- rep(NA, l)')
		lastindex = dict()
		
		for key in range(0, l):
			if self.ts[key] > 0:
				value = self.ts[key]
				if not value in lastindex or (key - lastindex[value]) > 8:
					r.assign('stamp', self.getLabel(key))
					r.assign('i', key+1)	#Indexes start at 1 in R
					r('lbl[i] <- stamp')
					lastindex[value] = key
		
		
		r('temp <- scan(rfilename)')
		r('timeseries <- ts(temp)')
		r('png(img, width = 800, height = 800)')
		r('plot.ts(timeseries, type = "p")')
		r('text(timeseries, labels = lbl, pos = 3)')
		r('dev.off()')

Esempio n. 3

0

Mostra file

File: gamr.py Progetto: zyfang/PythonCollection

 def computeGAM(self, rX, ry):
     """
     Put together R code iteratively for getting GAM models for each dependent variable in ry
     Returns list of R GAM models    
     """
     # For every dependent variable, construct model and put into list
     models = list()
     r.assign("xdata", rX)  # Put data in R environment for the functions to use
     r.assign("ydata", ry)  # Put data in R environment for the functions to use
     r("alldataframe<-data.frame(cbind(xdata,ydata))")
     self.xcolnames = np.asarray(r("colnames(alldataframe)[1:ncol(xdata)]"))
     self.ycolnames = np.asarray(
         r("colnames(alldataframe)[-(1:ncol(xdata))]")
     )  # All the cols that are after xcol are ycol
     for ycolname in self.ycolnames:
         rcode = "gam(%s~" % ycolname.replace(
             "-", "."
         )  # replace - by . because R doesn't take - in columnnames and converts them to . instead
         for xcolname in self.xcolnames:
             rcode += "s(%s" % xcolname.replace(
                 "-", "."
             )  # replace - by . because R doesn't take - in columnnames and converts them to . instead
             n_unique = r("length(unique(alldataframe$%s))" % xcolname)[0]  # because R starts indexes at 1
             if n_unique < 10:  # If there aren't enough levels, need to set k lower
                 rcode += ",k=5"
             rcode += ")+"
         rcode = rcode[:-1]
         rcode += ",data=alldataframe)"
         gammodel = r(rcode)
         models.append(gammodel)
     return models

Esempio n. 4

0

Mostra file

File: test_autocorr5.py Progetto: rmcgibbo/pyhmc

def test_1():
    x = generate_AR1(0.95, 1, 50**2+25, random_state=0)
    val = integrated_autocorr5(x, size='sqrt')[0]

    r.assign('x', x)
    ref = r('(bm(x)$se)^2 * length(x) / var(x)')[0]
    np.testing.assert_almost_equal(val, ref)

Esempio n. 5

0

Mostra file

File: copula_rpy.py Progetto: neel9102/ambhas

 def gof(self, method='Sn', simulation='pb'):
     """
     Goodness-of-fit tests for copulas 
     gofCopula from the R copula package
     
     Input:
         method: "Sn" -> test statistic from Genest, Rémillard, Beaudoin (2009)
                 "SnB"-> test statistic from Genest, Rémillard, Beaudoin (2009)
                 "SnC" -> test statistic from Genest et al. (2009).
                 "AnChisq" -> Anderson-Darling test statistic
                 "AnGamma -> similar to "AnChisq" but based on the gamma distribution 
         simulation: "pb" -> parametric bootstrap
                     "mult" -> multiplier
     Output:
         statistic -> test statistic
         p_value -> p_value of the test
         parameter -> estimates of the parameters for the hypothesized copula family
         
     """
     family = self.family
             
     xy = self.xy
     r.assign('xy',xy.T)
     theta = self.theta
     r('foo <- gofCopula(%sCopula(%f), xy, estim.method="itau", method="%s", simulation="%s")'%(family,theta,method,simulation))
     p_value = float(r('foo$p.value')[0])
     statistic = float(r('foo$statistic')[0])
     parameter = float(r('foo$parameter')[0])
     
     self.p_value = p_value
     self.statistic = statistic 
     self.parameter = parameter
     
     return statistic, p_value, parameter

Esempio n. 6

0

Mostra file

File: RnaseqqcReport.py Progetto: sudlab/CGATPipelines

    def getCorrelations(self, dataframe):
        """
        Perform hierarchical clustering on a
        dataframe of expression values

        Arguments
        ---------
        dataframe: pandas.Core.DataFrame
          a dataframe containing gene IDs, sample IDs
          and gene expression values

        Returns
        -------
        corr_frame: pandas.Core.DataFrame
          a dataframe of a pair-wise correlation matrix
          across samples.  Uses the Pearson correlation.
        """

        # set sample_id to index
        pivot = dataframe.pivot(index="sample_name", columns="transcript_id", values="TPM")
        transpose = pivot.T
        # why do I have to resort to R????
        r_df = py2ri.py2ri_pandasdataframe(transpose)
        R.assign("p.df", r_df)
        R("""p.mat <- apply(p.df, 2, as.numeric)""")
        R("""cor.df <- cor(p.mat)""")
        r_cor = R["cor.df"]
        py_cor = py2ri.ri2py_dataframe(r_cor)
        corr_frame = py_cor

        return corr_frame

Esempio n. 7

0

Mostra file

File: np_r.py Progetto: inpho/inphosemantics

def save_matrix_R(filename, matrix):

    rmatrix = npr.numpy2ri(matrix)

    r.assign('data', rmatrix)

    r.save('data', file=filename)

Esempio n. 8

0

Mostra file

File: __init__.py Progetto: BioXiao/cgat

def covarFilter(infile,
                time_points,
                replicates,
                quantile):
    '''
    Filter gene list based on the distribution of the
    sums of the covariance of each gene.  This is highly
    recommended to reduce the total number of genes used
    in the dynamic time warping clustering to reduce the
    computational time.  The threshold is placed at the
    intersection of the expected and observed value
    for the given quantile.
    '''

    time_points.sort()
    time_rep_comb = [x for x in itertools.product(time_points, replicates)]
    time_cond = ro.StrVector([x[0] for x in time_rep_comb])
    rep_cond = ro.StrVector([x[1] for x in time_rep_comb])
    df = pd.read_table(infile, sep="\t", header=0, index_col=0)

    df.drop(['replicates'], inplace=True, axis=1)
    df.drop(['times'], inplace=True, axis=1)
    df = df.fillna(0.0)

    R.assign('diff_data', df)

    E.info("loading data frame")

    # need to be careful about column headers and transposing data frames

    R('''trans_data <- data.frame(diff_data)''')
    R('''times <- c(%s)''' % time_cond.r_repr())
    R('''replicates <- c(%s)''' % rep_cond.r_repr())

    # calculate the covariance matrix for all genes
    # sum each gene's covariance vector

    E.info("calculating sum of covariance of expression")

    R('''covar.mat <- abs(cov(trans_data))''')
    R('''sum.covar <- rowSums(covar.mat)''')
    R('''exp.covar <- abs(qnorm(ppoints(sum.covar),'''
      '''mean=mean(sum.covar), sd=sd(sum.covar)))''')
    R('''sum.covar.quant <- quantile(sum.covar)''')
    R('''exp.covar.quant <- quantile(exp.covar)''')

    E.info("filter on quantile")

    R('''filtered_genes <- names(sum.covar[sum.covar > '''
      '''sum.covar.quant[%(quantile)i]'''
      ''' & sum.covar > exp.covar.quant[%(quantile)i]])''' % locals())
    R('''filtered_frame <- data.frame(diff_data[, filtered_genes],'''
      '''times, replicates)''')

    filtered_frame = com.load_data('filtered_frame').T

    return filtered_frame

Esempio n. 9

0

Mostra file

File: test_autocorr2.py Progetto: olivares-j/pyhmc

def test_1():
    r("require('coda')")

    random = np.random.RandomState(1)
    for i in range(10):
        x = generate_AR1(phi=0.95, sigma=1, n_steps=1000, c=0, y0=0, random_state=random)
        r.assign('x', x)
        tau = r('nrow(x)/effectiveSize(x)')[0]
        np.testing.assert_approx_equal(tau, integrated_autocorr2(x))

Esempio n. 10

0

Mostra file

File: utils.py Progetto: akastrin/rcna

def save_to_R(X, filename):
	import numpy as np
	from rpy2.robjects import r
	import pandas.rpy.common as com
	from pandas import DataFrame
	df = DataFrame(np.array(X))
	df = com.convert_to_r_dataframe(df)
	r.assign("X", df)
	r("save(X, file='%s.gz', compress=TRUE)"%(filename))

Esempio n. 11

0

Mostra file

File: __init__.py Progetto: CGATOxford/cgat

def treeCutting(infile,
                expression_file,
                cluster_file,
                cluster_algorithm,
                deepsplit=False):
    '''
    Use dynamic tree cutting to derive clusters for each
    resampled distance matrix
    '''
    wgcna_out = "/dev/null"

    E.info("loading distance matrix")

    df = pd.read_table(infile, sep="\t",
                       header=0, index_col=0)
    df = df.fillna(0.0)
    genes = df.index
    genes_r = ro.StrVector([g for g in genes])

    # py2ri requires activation
    pandas2ri.activate()
    rdf = pandas2ri.py2ri(df)

    R.assign("distance_data", rdf)
    R.assign("gene_ids", genes_r)

    R('''sink(file='%(wgcna_out)s')''' % locals())
    R('''suppressPackageStartupMessages(library("WGCNA"))''')
    R('''suppressPackageStartupMessages(library("flashClust"))''')
    E.info("clustering data by %s linkage" % cluster_algorithm)
    R('''rownames(distance_data) <- gene_ids''')
    R('''clustering <- flashClust(as.dist(distance_data),'''
      ''' method='%(cluster_algorithm)s')''' % locals())
    if deepsplit:
        R('''cluster_cut <- cutreeDynamic(dendro=clustering, '''
          '''minClusterSize=50, deepSplit=T)''')
    else:
        R('''cluster_cut <- cutreeDynamic(dendro=clustering, '''
          '''minClusterSize=50, deepSplit=F)''')

    R('''color_cut <- labels2colors(cluster_cut)''')
    R('''write.table(color_cut, file = '%(cluster_file)s','''
      '''sep="\t")''' % locals())
    R('''cluster_matched <- data.frame(cbind(rownames(distance_data),'''
      '''color_cut))''')
    R('''colnames(cluster_matched) = c("gene_id", "cluster")''')
    R('''cluster_matched <- data.frame(cluster_matched$gene_id,'''
      '''cluster_matched$cluster)''')
    R('''sink(file=NULL)''')

    cluster_frame = pandas2ri.ri2py(R["cluster_matched"])
    cluster_frame.columns = ['gene_id', 'cluster']
    cluster_frame.index = cluster_frame['gene_id']
    cluster_frame.drop(['gene_id'], inplace=True, axis=1)

    return cluster_frame

Esempio n. 12

0

Mostra file

File: test_autocorr3.py Progetto: rmcgibbo/pyhmc

def test_1():
    y = generate_AR1(0.95, 1, 10000)
    tau = integrated_autocorr3(y)

    r.assign('x', y)
    r('popvar = (var(x)*(nrow(x)-1)/nrow(x))')
    r('init = initseq(x)')
    tau_ref = r('initseq(x)$var.pos / popvar')[0]
    print(tau, tau_ref)
    np.testing.assert_array_almost_equal(tau, tau_ref)

Esempio n. 13

0

Mostra file

File: timeser.py Progetto: gsVAM/CorrTimeSerPrd

def create_model(input_json):
    global hourly_volume

    #Loads JSON file
    print 'Loading Data...'
    json = loads(input_json)

    #Converts to Pandas Time Series Dataframe which can be converted to be used by R
    df = pd.DataFrame(json)
    df.columns = ['time']
    df['time'] = df['time'].apply(dateutil.parser.parse)
    df.set_index('time', inplace=True)
    df['t'] = 1

    #Resamples Dataframe into hourly (for weekly model) and daily (for monthly model) buckets
    hourly_volume = df.resample('1H', how=np.count_nonzero)
    daily_volume = df.resample('1D', how=np.count_nonzero)

    print 'Creating Model...'

    #Converts Pandas Dataframe to R Dataframe
    demand_data_daily = com.convert_to_r_dataframe(daily_volume)
    demand_data_hourly = com.convert_to_r_dataframe(hourly_volume)

    #Brings Dataframes into R workspace
    r.assign('train_data_hourly', demand_data_hourly)
    r.assign('train_data_daily', demand_data_daily)
    #Assigns values to required input variables in R
    r('start_index = ' + str(get_friday_index(hourly_volume)))
    r('month_index = ' + str(get_first_of_month(hourly_volume)))

    #Reorganizes hourly dataframe to seasonal time series w/ 168 hr weekly intervals starting at the 1st Fri
    r('train_data_ts <- ts(train_data_hourly[,1],start=c(1,(168-start_index+2)),frequency=168)'
      )
    #Adds 0.01 as model input data must be non-zero
    r('train_data_ts = train_data_ts+ 0.01')
    #R creates hourly model we set beta=0 as we assume no global trend (HOLTZ-WINTERS MODEL)
    r('hr_model <- HoltWinters(train_data_ts,beta=0,seasonal="m",start.periods=(168+start_index-1))'
      )

    #R creates a monthly model IFF there is enough data (min 8 weeks)
    r('dy_model = NULL')
    #1st Fri of hourly dataset translated for daily dataset
    r('start_index = (start_index-1)/24+1')
    if (r('length(train_data_daily[,1])>(28*2+start_index-1)')[0]):
        #if the first fri of the month of the dataset proceeds start date of dataset, sets to prior month's first fri
        r('if(month_index<1){month_index = 28-month_index }')
        #Reorganizes daily dataframe to seasonal time series
        r('train_data_ts <- ts(train_data_daily[,1],start=c(1,month_index),frequency=28)'
          )
        #R creates monthly model, again we assume no global trend
        r('dy_model <- HoltWinters(train_data_ts,seasonal="m",start.periods=(28+start_index-1))'
          )

        print 'Model Created!'

Esempio n. 14

0

Mostra file

File: __init__.py Progetto: zpeng1989/cgat

def treeCutting(infile,
                expression_file,
                cluster_file,
                cluster_algorithm,
                deepsplit=False):
    '''
    Use dynamic tree cutting to derive clusters for each
    resampled distance matrix
    '''
    wgcna_out = "/dev/null"

    E.info("loading distance matrix")

    df = pd.read_table(infile, sep="\t", header=0, index_col=0)
    df = df.fillna(0.0)
    genes = df.index
    genes_r = ro.StrVector([g for g in genes])

    # py2ri requires activation
    pandas2ri.activate()
    rdf = pandas2ri.py2ri(df)

    R.assign("distance_data", rdf)
    R.assign("gene_ids", genes_r)

    R('''sink(file='%(wgcna_out)s')''' % locals())
    R('''suppressPackageStartupMessages(library("WGCNA"))''')
    R('''suppressPackageStartupMessages(library("flashClust"))''')
    E.info("clustering data by %s linkage" % cluster_algorithm)
    R('''rownames(distance_data) <- gene_ids''')
    R('''clustering <- flashClust(as.dist(distance_data),'''
      ''' method='%(cluster_algorithm)s')''' % locals())
    if deepsplit:
        R('''cluster_cut <- cutreeDynamic(dendro=clustering, '''
          '''minClusterSize=50, deepSplit=T)''')
    else:
        R('''cluster_cut <- cutreeDynamic(dendro=clustering, '''
          '''minClusterSize=50, deepSplit=F)''')

    R('''color_cut <- labels2colors(cluster_cut)''')
    R('''write.table(color_cut, file = '%(cluster_file)s','''
      '''sep="\t")''' % locals())
    R('''cluster_matched <- data.frame(cbind(rownames(distance_data),'''
      '''color_cut))''')
    R('''colnames(cluster_matched) = c("gene_id", "cluster")''')
    R('''cluster_matched <- data.frame(cluster_matched$gene_id,'''
      '''cluster_matched$cluster)''')
    R('''sink(file=NULL)''')

    cluster_frame = pandas2ri.ri2py(R["cluster_matched"])
    cluster_frame.columns = ['gene_id', 'cluster']
    cluster_frame.index = cluster_frame['gene_id']
    cluster_frame.drop(['gene_id'], inplace=True, axis=1)

    return cluster_frame

Esempio n. 15

0

Mostra file

File: np_r.py Progetto: inpho/inphosemantics

def save_simmat_R(filename, simmat):

    rmatrix = npr.numpy2ri(simmat.matrix)

    r.assign('data', rmatrix)
    
    r("rownames(%s) <- c%s" % ('data', tuple(simmat.labels)))

    r("colnames(%s) <- c%s" % ('data', tuple(simmat.labels)))

    r.save('data', file=filename)

Esempio n. 16

0

Mostra file

File: 5bf2da65bd30e1124affc20eb38b08b100b41a80np_r.py Progetto: vassalos/deep-learning-lang-detection

def save_simmat_R(filename, simmat):

    rmatrix = npr.numpy2ri(simmat.matrix)

    r.assign('data', rmatrix)

    r("rownames(%s) <- c%s" % ('data', tuple(simmat.labels)))

    r("colnames(%s) <- c%s" % ('data', tuple(simmat.labels)))

    r.save('data', file=filename)

Esempio n. 17

0

Mostra file

def determineNumberOfFactorsToExtract(data):
    # Given some data, determine the number of factors you should extract.
    # Creates a graph and displays it so you can choose
    # Stolen from
    # http://www.statmethods.net/advstats/factor.html
    r('library(nFactors)')
    r.assign('data', data)
    r('ev = eigen(cor(data))')
    r('ap = parallel(subject=nrow(data),var=ncol(data),rep=100,cent=0.5')
    r('nS = nScree(x=ev$values, aparallel=ap$eigen$qevpea)')
    r('plotnScree(nS)')
    return

Esempio n. 18

0

Mostra file

File: Stats_test.py Progetto: logust79/cgat-apps

    def testAgainstQValue(self):

        R.assign("pvalues", self.pvalues)
        qvalue = R('''qvalue( pvalues )''')
        r_qvalues = qvalue[2]
        r_pi0 = qvalue[1][0]

        new = Stats.doFDRPython(self.pvalues)
        self.assertTrue(getRelativeError(r_pi0, new.mPi0) < self.max_error)

        for a, b in zip(r_qvalues, new.mQValues):
            self.assertAlmostEqual(a, b, places=self.nplaces)

Esempio n. 19

0

Mostra file

File: pipeline_pb_strains.py Progetto: YanLiu2018/weinstock_full_length_16s

def plotFilteredSamples(infiles, outfiles):
    '''Create a plot of the SNP profiles for each filtered sample'''

    error_profile, otu_assignment = infiles

    otu_assignment = P.snip(otu_assignment, '.fasta') + '_up.txt'
    otu_dict = {}
    for row in open(otu_assignment):
        sample_id = row.split()[0].split(';')[0]
        otu_id = row.split().pop()
        otu_dict[sample_id] = otu_id

    def _fetch_loci(infile):
        # Some samples have no snps...
        if not open(infile).readline():
            L.warn('Sample %s has no SNPs' % infile)
            idx = [i for i in range(1, 1501)]
            snp = [
                0,
            ] * 1500
            df = pd.DataFrame([idx, snp]).transpose()
        else:
            df = pd.DataFrame(
                [x.split(',') for x in open(infile).readline().split('\t')])

        df.columns = ['Locus', 'Frequency']
        df = df.applymap(float)

        return df

    sample_id = P.snip(error_profile, '_true_snps.tsv', strip_path=True)
    otu_id = otu_dict[sample_id]
    outfile = os.path.join('14_filter_sample_error_profiles.dir',
                           otu_id + '_' + \
                           sample_id + '.pdf')

    R('''rm(list=ls())''')
    R('''require('ggplot2')''')
    df = _fetch_loci(error_profile)
    R.assign('df', df)

    R('''require('ggplot2')
         pl <- ggplot(df, aes(x=Locus, xend=Locus, y=0, yend=Frequency)) + geom_segment()
         pl <- pl + theme_bw() + theme(panel.grid=element_blank())
         pl <- pl + xlim(0, 1500) + scale_y_continuous(expand=c(0,0), limits=c(0, 100))
         pl <- pl + xlab('Position Along 16S Gene') + ylab('Frequency (%%)')
         pl <- pl + ggtitle('%s\n%s')
         pdf('%s', height=3, width=5)
         plot(pl)
         dev.off()
      ''' % (otu_id, sample_id, outfile))

    R('''rm(list=ls())''')

Esempio n. 20

0

Mostra file

 def kl_s(self, thres=float("inf")):
     r.assign('X', self.data_a)
     r.assign('Y', self.data_b)
     r_ret = r('''
             X = as.numeric(t(X))
             Y = as.numeric(t(Y))
             library(FNN)
             kl = na.omit(KL.divergence(X, Y, k = 10, algorithm=c("kd_tree", "cover_tree", "brute")))
             mean(kl[is.infinite(kl) == 0])
             ''')
     r_ret_str = str(r_ret)
     return d_close, float(r_ret_str[4:])

Esempio n. 21

0

Mostra file

def plot_mutrate(wt, ga):

    ratemat = np.zeros(( 7, 2 ))

    for i in range(7):
        ratemat[i,0] = mutation_rate(wt[:,:,i])
        ratemat[i,1] = mutation_rate(ga[:,:,i])

    d = numpy2ri(ratemat)
    r.assign('data', d)

    r(' source("src/R/figure_mutrate.R") ')

Esempio n. 22

0

Mostra file

File: Stats_test.py Progetto: lesheng/cgat

    def testAgainstQValue(self):

        R.assign("pvalues", self.pvalues)
        qvalue = R('''qvalue( pvalues )''')
        r_qvalues = qvalue[2]
        r_pi0 = qvalue[1][0]

        new = Stats.doFDRPython(self.pvalues)
        self.assertTrue(getRelativeError(r_pi0, new.mPi0) < self.max_error)

        for a, b in zip(r_qvalues, new.mQValues):
            self.assertAlmostEqual(a, b, places=self.nplaces)

Esempio n. 23

0

Mostra file

File: factorAnalysis.py Progetto: nathand8/twitter

def determineNumberOfFactorsToExtract(data):
	# Given some data, determine the number of factors you should extract.
	# Creates a graph and displays it so you can choose
	# Stolen from
	# http://www.statmethods.net/advstats/factor.html
	r('library(nFactors)')
	r.assign('data', data)
	r('ev = eigen(cor(data))')
	r('ap = parallel(subject=nrow(data),var=ncol(data),rep=100,cent=0.5')
	r('nS = nScree(x=ev$values, aparallel=ap$eigen$qevpea)')
	r('plotnScree(nS)')
	return

Esempio n. 24

0

Mostra file

File: benchmarks.py Progetto: baudrly/serpentine

def misha2csv(misha=None, binning=DEFAULT_BINNING, output=None):

    from rpy2.robjects import r

    r_library_expression = """
    library("shaman");
    library("misha")
    """

    if misha is None:
        r_import_expression = """
        gsetroot(shaman_get_test_track_db());
        contact_map <- gextract("hic_obs", gintervals.2d(2, 175e06, 
        178e06, 2, 175e06, 178e06), colnames="score")
        """
    else:
        r.assign("path", misha)
        r_import_expression = """
        contact_map <- gextract("hic_obs", gintervals.2d(2, 0, 
        178e06, 2, 175e06, 178e06), colnames="score")
        """

    r(r_library_expression)
    r(r_import_expression)
    r("write.table(contact_map, 'exported_map.csv')")

    matrix = np.genfromtxt("exported_map.csv", dtype=None, skip_header=True)

    (_, _, start1, end1, _, start2, end2, contacts, _) = zip(*matrix)

    pos1 = (np.array(start1) + np.array(end1)) // 2
    pos2 = (np.array(start2) + np.array(end2)) // 2

    positions1 = np.array(pos1) // binning
    positions2 = np.array(pos2) // binning

    minimum = min(np.amin(positions1), np.amin(positions2))
    positions1 -= minimum
    positions2 -= minimum

    n = int(max(np.amax(positions1), np.amax(positions2))) + 1
    assert len(positions1) == len(
        positions2), "Mismatch between lengths {} and {}".format(
            len(positions1), len(positions2))
    sparse_matrix = sparse.coo_matrix((contacts, (positions1, positions2)),
                                      shape=(n, n))

    dense_matrix = np.array(sparse_matrix.todense(), dtype=np.int32)
    if output is not None:
        np.savetxt(output, dense_matrix, fmt="%i")

    return dense_matrix

Esempio n. 25

0

Mostra file

def do_multivariate_cox(time, censor, var, additional_vars, float_vars=False):
  df = pd.DataFrame({
    'time': time,
    'censor': censor,
    'var': var})
  df = df.join(additional_vars, how='inner')

  surv = importr('survival')
  r.assign('time',robjects.IntVector(np.array(df['time'])))
  r.assign('censor', robjects.IntVector(np.array(df['censor'])))
  r.assign('var', robjects.FloatVector(np.array(df['var'])))

  for i in additional_vars:
    if float_vars:
      r.assign(i, robjects.FloatVector(np.array(df[i])))
    else:
      r.assign(i, robjects.IntVector(np.array(df[i])))

  additional_vars_str = ' + '.join(additional_vars.columns)

  formula = 'Surv(time, censor) ~ var + ' + additional_vars_str
  try:
    coxuh_output = r('summary( coxph(formula = ' + formula + ', model=FALSE, x=FALSE, y=FALSE))')
  except ri.RRuntimeError as e:
    print(e)
    print(var.describe())
    print(additional_vars.columns)
    return {}

  coef_ind = list(coxuh_output.names).index('coefficients')
  coeffs = coxuh_output[coef_ind]

  patient_count_ind = list(coxuh_output.names).index('n')
  patient_count = coxuh_output[patient_count_ind][0]

  var_zscore = get_zscore('var', coeffs)
  var_pvalue = get_pvalue('var', coeffs)
  hazards_dict = get_hazards('var', coxuh_output)

  cox_dict = {
      'var-n': patient_count,
      'var-z': var_zscore,
      'var-p': var_pvalue,
      }
  cox_dict.update(hazards_dict)

  for i in additional_vars.columns:
    cox_dict[i + '-z'] = get_zscore(i, coeffs)
    cox_dict[i + '-p'] = get_pvalue(i, coeffs)
    cox_dict.update(get_hazards(i, coxuh_output))

  return cox_dict

Esempio n. 26

0

Mostra file

File: metrics_0301.py Progetto: zhekunz2/c4pp

 def hell_s(self, thres=1):
     r.assign('X', self.data_a)
     r_ret = r('''
             X = as.numeric(t(X))
             Y = r{}({}, {})
             min2 = min(c(min(X),min(Y)))
             max2 = max(c(max(X),max(Y)))
             library(statip)
             hellinger(X, Y, min2, max2)
             '''.format(self.pyr_dist("r"), len(self.data_a),\
                     str(self.dist_args)[1:-1]))
     r_ret_str = str(r_ret)
     return d_close, float(r_ret_str[4:])

Esempio n. 27

0

Mostra file

File: copula_rpy.py Progetto: rkawsar/ambhas

 def _get_parameter(self):
     """ estimate the parameter (theta) of copula
     """     
     r.assign('tau.',self.tau)
     
     if self.family == 'clayton':
         self.theta = r('iTau(claytonCopula(), tau.)')[0]
         
     elif self.family == 'frank':
         self.theta = r('iTau(frankCopula(), tau.)')[0]
         
     elif self.family == 'gumbel':
         self.theta = r('iTau(gumbelCopula(), tau.)')[0]

Esempio n. 28

0

Mostra file

File: generator.py Progetto: zclab/ADPPortfolioSelection

    def generate(self, S: int) -> np.ndarray:
        from rpy2.robjects import pandas2ri, r as R
        pandas2ri.activate()
        R.assign('Data', self.Data)
        R.assign('N', N)
        R("""
library(rmgarch)
xspec = ugarchspec(mean.model = list(armaOrder = c(1, 1)), variance.model = list(garchOrder = c(1,1), model = 'sGARCH'), distribution.model = 'norm')
uspec = multispec(replicate(N, xspec))
spec = dccspec(uspec = uspec, dccOrder = c(1, 1), distribution = 'mvnorm')
speca = dccspec(uspec = uspec, dccOrder = c(1, 1), model='aDCC', distribution = 'mvnorm')
fit_adcc = dccfit(spec1a, data = Data)
""")

Esempio n. 29

0

Mostra file

 def hell_s(self, thres=1):
     r.assign('X', self.data_a)
     r.assign('Y', self.data_b)
     r_ret = r('''
             X = as.numeric(t(X))
             Y = as.numeric(t(Y))
             min2 = min(c(min(X),min(Y)))
             max2 = max(c(max(X),max(Y)))
             library(statip)
             hellinger(X, Y, min2, max2)
             ''')
     r_ret_str = str(r_ret)
     return d_close, float(r_ret_str[4:])

Esempio n. 30

0

Mostra file

File: timeser.py Progetto: GSng/Uber

def create_model(input_json):
    global hourly_volume
    
    #Loads JSON file
    print 'Loading Data...'
    json = loads(input_json)
    
    #Converts to Pandas Time Series Dataframe which can be converted to be used by R
    df = pd.DataFrame(json)
    df.columns = ['time']
    df['time'] = df['time'].apply(dateutil.parser.parse)
    df.set_index('time', inplace=True)
    df['t'] = 1
    
    #Resamples Dataframe into hourly (for weekly model) and daily (for monthly model) buckets
    hourly_volume = df.resample('1H', how=np.count_nonzero)
    daily_volume =  df.resample('1D', how=np.count_nonzero)
    
    print 'Creating Model...'
	
    #Converts Pandas Dataframe to R Dataframe
    demand_data_daily = com.convert_to_r_dataframe(daily_volume)
    demand_data_hourly = com.convert_to_r_dataframe(hourly_volume)
    
    #Brings Dataframes into R workspace
    r.assign('train_data_hourly',demand_data_hourly)
    r.assign('train_data_daily',demand_data_daily)
	#Assigns values to required input variables in R
    r('start_index = ' +str(get_friday_index(hourly_volume)))
    r('month_index = ' +str(get_first_of_month(hourly_volume)))
    
    #Reorganizes hourly dataframe to seasonal time series w/ 168 hr weekly intervals starting at the 1st Fri    
    r('train_data_ts <- ts(train_data_hourly[,1],start=c(1,(168-start_index+2)),frequency=168)')
    #Adds 0.01 as model input data must be non-zero
    r('train_data_ts = train_data_ts+ 0.01')
	#R creates hourly model we set beta=0 as we assume no global trend (HOLTZ-WINTERS MODEL)
    r('hr_model <- HoltWinters(train_data_ts,beta=0,seasonal="m",start.periods=(168+start_index-1))')
    
    #R creates a monthly model IFF there is enough data (min 8 weeks)
    r('dy_model = NULL')
    #1st Fri of hourly dataset translated for daily dataset
    r('start_index = (start_index-1)/24+1')
    if (r('length(train_data_daily[,1])>(28*2+start_index-1)')[0]):
        #if the first fri of the month of the dataset proceeds start date of dataset, sets to prior month's first fri
        r('if(month_index<1){month_index = 28-month_index }')
        #Reorganizes daily dataframe to seasonal time series
        r('train_data_ts <- ts(train_data_daily[,1],start=c(1,month_index),frequency=28)')
		#R creates monthly model, again we assume no global trend
        r('dy_model <- HoltWinters(train_data_ts,seasonal="m",start.periods=(28+start_index-1))')
    
	print 'Model Created!'

Esempio n. 31

0

Mostra file

File: __init__.py Progetto: Princeon/HFTrader

 def getExpertsPrediction(self, verbose, tomo, day_after):
     long_sum = 0.0
     total_sum = 0.0
     test_file = 'experts.test'
     spec_file = 'spec.spec'
     
     # Create "test set" from r DB
     if verbose: print "Generating test-set file... \n"
     test_date=self.date
     r.assign('testDate', test_date.strftime('%Y-%m-%d'))
     r('testDB<-DB[testDate]')
     r('testDB<-subset(testDB, select = -c(ticker) )')
     r.assign('remoteFilename', test_file)
     r('write.table(testDB, file=remoteFilename,quote=FALSE,sep=",",eol=";\n",row.names=FALSE,col.names=FALSE)')
     for expert in self.experts:
         classifier = expert.module(test_file, spec_file)
         score = classifier.get_scores()[0][0]
         expert.prediction = score
         if score>0: long_sum+=expert.weight
         total_sum += expert.weight
     fraction_long = long_sum/float(total_sum)
     fraction_short = 1-fraction_long
     # Delete test file
     os.remove(test_file)
     # next close
     r.assign('remoteTomorrow',tomo.strftime('%Y-%m-%d'))
     next_close = float(r('DB[remoteTomorrow]$close')[0])
     r.assign('remoteDayAft',day_after.strftime('%Y-%m-%d'))
     close_after = float(r('DB[remoteDayAft]$close')[0])
     return fraction_long-fraction_short, next_close, close_after

Esempio n. 32

0

Mostra file

    def getExpertsPrediction(self, verbose, tomo, day_after):
        long_sum = 0.0
        total_sum = 0.0
        test_file = 'experts.test'
        spec_file = 'spec.spec'

        # Create "test set" from r DB
        if verbose: print "Generating test-set file... \n"
        test_date = self.date
        r.assign('testDate', test_date.strftime('%Y-%m-%d'))
        r('testDB<-DB[testDate]')
        r('testDB<-subset(testDB, select = -c(ticker) )')
        r.assign('remoteFilename', test_file)
        r('write.table(testDB, file=remoteFilename,quote=FALSE,sep=",",eol=";\n",row.names=FALSE,col.names=FALSE)'
          )
        for expert in self.experts:
            classifier = expert.module(test_file, spec_file)
            score = classifier.get_scores()[0][0]
            expert.prediction = score
            if score > 0: long_sum += expert.weight
            total_sum += expert.weight
        fraction_long = long_sum / float(total_sum)
        fraction_short = 1 - fraction_long
        # Delete test file
        os.remove(test_file)
        # next close
        r.assign('remoteTomorrow', tomo.strftime('%Y-%m-%d'))
        next_close = float(r('DB[remoteTomorrow]$close')[0])
        r.assign('remoteDayAft', day_after.strftime('%Y-%m-%d'))
        close_after = float(r('DB[remoteDayAft]$close')[0])
        return fraction_long - fraction_short, next_close, close_after

Esempio n. 33

0

Mostra file

def MHtest(data):
    """
    perform MH test in R (faster)
    """ 

    data = numpy2ri(data)
    r.assign('D', data)
    r(' result <- mantelhaen.test(D) ')
    r(' pval <- result$p.value ')
    p = np.array(r.pval)
    r(' odds <- result$estimate ')
    OR = np.array(r.odds)

    return round(p[0], 10), OR[0]

Esempio n. 34

0

Mostra file

def plot_average_coverage(wt, ga):

    wt = np.sum(wt, 1)
    ga = np.sum(ga, 1)

    data_wt = numpy2ri(wt)
    data_ga = numpy2ri(ga)

    r.assign('wt', data_wt)
    r.assign('ga', data_ga)

    r(' wt <- as.matrix(wt) ')
    r(' ga <- as.matrix(ga) ')
    r(' source("src/R/figure_coverage.R") ')

Esempio n. 35

0

Mostra file

File: SSE.py Progetto: chaneyn/SSE

def Add_Semivariogram_Info(vg_clim,rtime):
 #Add semivariogram information to the climatology
 if vg_clim[rtime.month]['svg'] != 0:
  vg = vg_clim[rtime.month]['svg']
  vg_clim[rtime.month]['count'] = vg_clim[rtime.month]['count'] + 1.0
  r.assign("vold",vg)
  r("vnew<-rbind(vold,vnew)")
  #w2 = 1/vg_clim[rtime.month]['count']
  #w1 = 1-w2
  #r('vnew["gamma"] = ' + str(w1) + '*vold["gamma"] + ' + str(w2) + '*vnew["gamma"]')
 #r("vold<-vnew")
 vg = r("vnew")
 vg_clim[rtime.month]['svg'] = vg
 return vg_clim

Esempio n. 36

0

Mostra file

File: Stats_test.py Progetto: logust79/cgat-apps

    def testHochberg(self):

        # code for checking
        R.assign("p", self.pvalues)
        R('''
        lp = length(p)
        n = length(p)
        i <- lp:1L
        o <- order(p, decreasing = TRUE)
        ro <- order(o)
        pmin(1, cummin((n - i + 1L) * p[o]))[ro]
        ''')

        self.check("hochberg")

Esempio n. 37

0

Mostra file

File: analysis.py Progetto: rajivnarayan/genomic-features-survival

def do_metagene_cox(time, censor, split, metagene):
    df = pd.DataFrame({'time': time, 'censor': censor, 'split': split})
    df = df.join(metagene, how='inner')

    surv = importr('survival')
    r.assign('time', robjects.IntVector(np.array(df['time'])))
    r.assign('censor', robjects.IntVector(np.array(df['censor'])))
    r.assign('split', robjects.FloatVector(np.array(df['split'])))
    r.assign('metagene', robjects.FloatVector(np.array(df['metagene'])))

    coxuh_output = r(
        'summary( coxph(formula = Surv(time, censor) ~ split + metagene, model=FALSE, x=FALSE, y=FALSE))'
    )

    coef_ind = list(coxuh_output.names).index('coefficients')
    coeffs = coxuh_output[coef_ind]

    patient_count_ind = list(coxuh_output.names).index('n')
    patient_count = coxuh_output[patient_count_ind][0]

    split_zscore = get_zscore('split', coeffs)
    split_pvalue = get_pvalue('split', coeffs)
    metagene_zscore = get_zscore('metagene', coeffs)
    metagene_pvalue = get_pvalue('metagene', coeffs)

    cox_dict = {
        'n': patient_count,
        'z': split_zscore,
        'p': split_pvalue,
        'metagene-z': metagene_zscore,
        'metagene-p': metagene_pvalue,
    }
    return cox_dict

Esempio n. 38

0

Mostra file

File: SSE.py Progetto: chaneyn/SSE

def Add_Semivariogram_Info(vg_clim, rtime):
    #Add semivariogram information to the climatology
    if vg_clim[rtime.month]['svg'] != 0:
        vg = vg_clim[rtime.month]['svg']
        vg_clim[rtime.month]['count'] = vg_clim[rtime.month]['count'] + 1.0
        r.assign("vold", vg)
        r("vnew<-rbind(vold,vnew)")
        #w2 = 1/vg_clim[rtime.month]['count']
        #w1 = 1-w2
        #r('vnew["gamma"] = ' + str(w1) + '*vold["gamma"] + ' + str(w2) + '*vnew["gamma"]')
    #r("vold<-vnew")
    vg = r("vnew")
    vg_clim[rtime.month]['svg'] = vg
    return vg_clim

Esempio n. 39

0

Mostra file

File: Stats_test.py Progetto: lesheng/cgat

    def testHochberg(self):

        # code for checking
        R.assign("p", self.pvalues)
        R('''
        lp = length(p)
        n = length(p)
        i <- lp:1L
        o <- order(p, decreasing = TRUE)
        ro <- order(o)
        pmin(1, cummin((n - i + 1L) * p[o]))[ro]
        ''')

        self.check("hochberg")

Esempio n. 40

0

Mostra file

File: test_autocorr2.py Progetto: xuhuifan/pyhmc

def test_1():
    r("require('coda')")

    random = np.random.RandomState(1)
    for i in range(10):
        x = generate_AR1(phi=0.95,
                         sigma=1,
                         n_steps=1000,
                         c=0,
                         y0=0,
                         random_state=random)
        r.assign('x', x)
        tau = r('nrow(x)/effectiveSize(x)')[0]
        np.testing.assert_approx_equal(tau, integrated_autocorr2(x))

Esempio n. 41

0

Mostra file

File: firststage.py Progetto: PhilErickson/LawStructural

 def _r_tobit(self, data, xvars, rbar):
     """ Estimate tobit with function from r """
     r.assign('data', com.convert_to_r_dataframe(data))
     rhs = '+'.join(xvars)
     model = r("vglm(OverallRank ~ "+ rhs +", \
                       family=tobit(Upper=" + str(rbar) + ", Lower=1), \
                       data=data, crit='coeff')")
     if self.opts['verbose']:
         print(r.summary(model))
     out = r.coef(model, matrix=True)
     out = np.array(out)
     index = deepcopy(xvars)
     index.insert(0, 'const')
     beta = pd.Series(out[:, 0], index=index)
     return {'beta': beta, 'sigma': out[0, 1]}

Esempio n. 42

0

Mostra file

File: Matri-seqFinal.py Progetto: steffen12/scRutiNy

def saveData(synthscRNAseq, cellTypesRecord, gc, projMatrix, constMatrix, cellSizeFactors, synthPseudotimes, copyFolder, targetDirectory, R_Directory):
	
	os.chdir(copyFolder)

	itemsToSave = [synthscRNAseq, cellTypesRecord, gc, projMatrix, constMatrix, cellSizeFactors, synthPseudotimes]
	itemNames = ["synthscRNAseq", "cellTypesRecord", "gc", "projMatrix", "constMatrix", "cellSizeFactors", "synthPseudotimes"]

	for i in range(len(itemsToSave)):
		np.save(itemNames[i], itemsToSave[i])
		np.save(targetDirectory+itemNames[i], itemsToSave[i])
		ro = numpy2ri(itemsToSave[i])
		r.assign(itemNames[i], ro)
		rSaveString = "save(" + itemNames[i] + ", file='"+ itemNames[i] +".gzip', compress=TRUE)"
		r(rSaveString)
		rSaveString = "save(" + itemNames[i] + ", file='" + R_Directory + ".gzip', compress=TRUE)"
		r(rSaveString)

Esempio n. 43

0

Mostra file

File: generate_fraction_file.py Progetto: pacificclimate/routomator

def main(args):
    from rpy2.robjects import r
    raster = importr('raster')
    rgdal = importr('rgdal')

    # import the previously created watershed area into R
    r('frac_sum <- function(x, ...){sum(x, na.rm=TRUE)/225}')

    print 'Loading Catchement Raster'
    catch = raster.raster(args.catchment)
    
    print 'Creating Fraction File'
    fraction = raster.aggregate(catch, fact=15, fun=r('frac_sum'))
    r.assign('fraction', fraction)
    print '\tSaving Fraction File'
    raster.writeRaster(fraction, filename=os.path.join(args.outdir, 'fraction.asc'), format='ascii', overwrite=True, NAflag=0)

Esempio n. 44

0

Mostra file

File: metrics_0301.py Progetto: zhekunz2/c4pp

 def smkl_s(self, thres=float("inf")):
     r.assign('X', self.data_a)
     r.assign('Y', self.data_b)
     try:
         r_ret = r('''
                 X = as.numeric(t(X))
                 Y = as.numeric(t(Y))
                 library(FNN)
                 klxy = na.omit(KL.divergence(X, Y, k = 10, algorithm=c("kd_tree", "cover_tree", "brute")))
                 klyx = na.omit(KL.divergence(Y, X, k = 10, algorithm=c("kd_tree", "cover_tree", "brute")))
                 mean(klxy[is.infinite(klxy) == 0]) + mean(klyx[is.infinite(klyx) == 0])
                 ''')
         r_ret_str = str(r_ret)
         statistics = float(r_ret_str[4:])
     except:
         statistics = np.nan
     return d_close(statistics, thres), statistics

Esempio n. 45

0

Mostra file

def r_MLtheta(y, mu, max_iter=10):
    '''Computes ML estimate for overdispersion theta in R, given predicted and observed counts.'''

    robjects.numpy2ri.activate()
    r.assign("y", y)
    r.assign("mu", mu)
    robjects.numpy2ri.deactivate()

    r_define_warning_handler()

    #convert mu and y to correct format and compute
    r("mu <- as.matrix(mu)")
    r("y <- as.matrix(y)")
    r("res = withWarnings(as.numeric(x = theta.ml(y = y, mu = mu, limit = %u)))"
      % (max_iter))

    return r_extract_results()

Esempio n. 46

0

Mostra file

def save_counts(
    inFile,
    tag="taul",
    pcaFile=None,
    outFile=None,
    logscale=True,
    model=None,
    pathOut="/mnt/fhgfs_ribdata/user_worktmp/dominik.otto/PCa-2016",
    counts_df=None,
    tenplate=None,
):
    if outFile is None:
        outFile = inFile.replace("_params.hdf5", "_tumor_counts.csv")
    if counts_df is None:
        xt_df = get_counts(
            inFile,
            pcaFile=pcaFile,
            logscale=logscale,
            tumor_free=False,
            add_dev=False,
            xt_df=tenplate,
        )
    else:
        xt_df = counts_df
    xt_df.to_csv(outFile)
    if model is not None:
        # save as R data per cohort
        assembly = model.assembly
        pheno = model.pheno
        cohorts = pheno.CohortAbb.unique()
        for cohort in tqdm(cohorts, desc="cohorts"):
            print(f"Saving {cohort} ...")
            samp_names = pheno.index[pheno.CohortAbb == cohort]
            Counts = xt_df.loc[:, samp_names]
            outFile = (
                f"{pathOut}/"
                f"{tag}-normalized-none-{cohort}-{assembly}-kalGene-counts.csv"
            )
            print(f"... as {outFile}")
            Counts.to_csv(outFile)
            outFile = (f"{pathOut}/"
                       f"{tag}-normalized-none-{cohort}-{assembly}-"
                       f"kalGene-counts.RData")
            print(f"... as {outFile}")
            r.assign("Counts", Counts)
            r(f"save(Counts, file='{outFile}')")

Esempio n. 47

0

Mostra file

File: metrics_0301.py Progetto: zhekunz2/c4pp

 def hell_s(self, thres=1):
     r.assign('X', self.data_a)
     r.assign('Y', self.data_b)
     try:
         r_ret = r('''
                 X = as.numeric(t(X))
                 Y = as.numeric(t(Y))
                 min2 = min(c(min(X),min(Y)))
                 max2 = max(c(max(X),max(Y)))
                 library(statip)
                 hellinger(X, Y, min2, max2)
                 ''')
         r_ret_str = str(r_ret)
         statistics = float(r_ret_str[4:])
     except:
         statistics = np.inf
     return d_close(statistics, thres), statistics

Esempio n. 48

0

Mostra file

File: GBM_xspec_analysis_lib.py Progetto: zoujinhang/Fermi_Spectral_fitting_kernel

    def base(self, baset1=-50, baset2=300, binwidth=0.1):
        self.baset1 = np.max([self.GTI1, baset1])
        self.baset2 = np.min([self.GTI2, baset2])
        self.binwidth = binwidth
        self.tbins = np.arange(self.baset1, self.baset2 + self.binwidth,
                               self.binwidth)
        assert self.baset1 < self.baset2, self.bnname + ': Inappropriate base times!'
        if not os.path.exists(self.baseresultdir):
            os.makedirs(self.baseresultdir)
            expected_pvalue = norm_pvalue()
            f = h5py.File(self.baseresultdir + '/base.h5', mode='w')
            for i in range(14):  #14个探头
                grp = f.create_group(Det[i])  #创建一个探头的组
                ttefile=glob(self.datadir+'/'+'glg_tte_'+Det[i]+'_'+\
                                        self.bnname+'_v*.fit')#找到相应探头的文件名
                hdu = fits.open(ttefile[0])  #打开相应探头文件
                trigtime = hdu['Primary'].header['TRIGTIME']
                data = hdu['EVENTS'].data
                timedata = data.field(0) - trigtime
                chdata = data.field(1)
                for ch in range(128):  #这里应该就是大名鼎鼎的分能道扣除背景
                    time_selected = timedata[chdata == ch]
                    histvalue, histbin = np.histogram(time_selected,
                                                      bins=self.tbins)
                    rate = histvalue / binwidth
                    r.assign('rrate', rate)
                    r("y=matrix(rrate,nrow=1)")
                    fillPeak_hwi = str(int(5 / binwidth))
                    fillPeak_int = str(int(len(rate) / 10))
                    r("rbase=baseline(y,lam = 6, hwi=" + fillPeak_hwi +
                      ", it=10,\
								 int =" + fillPeak_int + ", method='fillPeaks')")
                    r("bs=getBaseline(rbase)")
                    r("cs=getCorrected(rbase)")
                    bs = r('bs')[0]
                    cs = r('cs')[0]
                    corrections_index = (bs < 0)
                    bs[corrections_index] = 0  #baseline小于0的部分强制为0
                    cs[corrections_index] = rate[
                        corrections_index]  #扣除背景的部分等于原来的部分。
                    f['/' + Det[i] + '/ch' + str(ch)] = np.array(
                        [rate, bs, cs])  #将每一个探头中每一个能道中的背景分别保存。
            f.flush()
            f.close()

Esempio n. 49

0

Mostra file

def plot_tediff_supplement(r1_hela, r1_human, r2_hela, r2_human):

    r1hela = numpy2ri(r1_hela)
    r.assign('r1hela', r1hela)
    r1human = numpy2ri(r1_human)
    r.assign('r1human', r1human)
    r2hela = numpy2ri(r2_hela)
    r.assign('r2hela', r2hela)
    r2human = numpy2ri(r2_human)
    r.assign('r2human', r2human)

    r(' source("src/R/figure_supplement_nopt.R") ')

Esempio n. 50

0

Mostra file

File: melk.py Progetto: joan-smith/survival-analysis-scripts

def do_cox(time, censor, variable, melk=[]):
    surv = importr("survival")

    time = np.array(time, dtype=np.float)
    censor = np.array(censor, dtype=np.float)
    variable = np.array(variable, dtype=np.float)
    if len(melk):
        melk = np.array(melk, dtype=np.float)
    # remove missing data
    skip_cols = []
    for i in range(len(variable)):
        if np.isnan(variable[i]):
            skip_cols.append(i)
        elif np.isnan(time[i]):
            skip_cols.append(i)
        elif np.isnan(censor[i]):
            skip_cols.append(i)
        elif len(melk) and np.isnan(melk[i]):
            skip_cols.append(i)

    variable = np.delete(variable, skip_cols)
    time = np.delete(time, skip_cols)
    censor = np.delete(censor, skip_cols)
    if len(melk):
        melk = np.delete(melk, skip_cols)
    r.assign('time', robjects.FloatVector(time))
    r.assign('censor', robjects.IntVector(censor))
    r.assign('variable', robjects.FloatVector(variable))
    if len(melk):
        r.assign('melk', robjects.FloatVector(melk))

    if len(melk):
        coxuh_output = r(
            'summary(coxph(formula = Surv(time, censor) ~ variable + melk))')
    else:
        coxuh_output = r(
            'summary(coxph(formula = Surv(time, censor) ~ variable))')

    coef_ind = list(coxuh_output.names).index('coefficients')
    coeffs = coxuh_output[coef_ind]

    patient_count_ind = list(coxuh_output.names).index('n')
    patient_count = coxuh_output[patient_count_ind][0]

    cox_dict = {
        'n': patient_count,
        'z': coeffs.rx('variable', 'z')[0],
        'p': coeffs.rx('variable', 'Pr(>|z|)')[0]
    }
    if len(melk):
        cox_dict['melk-z'] = coeffs.rx('melk', 'z')[0]
        cox_dict['melk-p'] = coeffs.rx('melk', 'Pr(>|z|)')[0]
    return cox_dict

Esempio n. 51

0

Mostra file

File: generate_vel_diff_file.py Progetto: pacificclimate/routomator

def main(args):
    from rpy2.robjects import r
    raster = importr('raster')
    rgdal = importr('rgdal')

    print 'Loading Catchement Raster'
    fraction = raster.raster(args.fraction)
    r.assign('fraction', fraction)

    # Calculate Velocity Raster
    print 'Calculating Velocity Layer'
    threshold = 30
    velocity = fraction
    r.assign('velocity', velocity)
    r('velocity[which(velocity[]>0)] <- 2')

    print '\tLoading Vegetation Layer'
    d = os.path.dirname(args.vegetation)
    f = os.path.splitext(os.path.basename(args.vegetation))[0]
    veg_shape = rgdal.readOGR(d, f, stringsAsFactors=False)
    r.assign('veg_shape', veg_shape)
    lakes = r('veg_shape[veg_shape$GRIDCODE==20,]')
    lakes_raster = raster.rasterize(lakes, fraction, getCover=True)

    print '\tTransforming lakes layer into raster coordinates'
    r.assign('lakes_raster', lakes_raster)
    r('lakes_raster[which(is.na(fraction[]))] <- 0')
    r.assign('threshold', threshold)
    r('velocity[which(lakes_raster[]>threshold)] <- 0.3')
    velocity = r('velocity')

    print '\tSaving Velocity Layer'
    raster.writeRaster(velocity, filename=os.path.join(args.outdir, 'velocity.asc'), format='ascii', overwrite=True, NAflag=0)

    # Create Diffusion Raster #######
    print '\tCreating Diffusion File'
    r('diffusion <- velocity')
    r('diffusion[which(diffusion[]==2)] <- 2000')
    r('diffusion[which(diffusion[]==0.3)] <- 1300')
    print '\tSaving Diffusion File'
    diffusion = r('diffusion')
    raster.writeRaster(diffusion, filename=os.path.join(args.outdir, 'diffusion.asc'), format='ascii', overwrite=True, NAflag=0)
    print '\tDone saving diffusion file'

Esempio n. 52

0

Mostra file

File: copula_rpy.py Progetto: rkawsar/ambhas

 def __init__(self, X, Y, family):
     """ initialise the class with X and Y
     Input:
         X:        one dimensional numpy array
         Y:        one dimensional numpy array
         family:   clayton or frank or gumbel
         
         Note: the size of X and Y should be same
     """
     # check dimension of input arrays
     if not ((X.ndim==1) and (Y.ndim==1)):
         raise ValueError('The dimension of array should be one.')
     
     # input array should have same size
     if X.size != Y.size:
         raise ValueError('The size of both array should be same.')
     
     # check if the name of copula family correct
     copula_family = ['clayton', 'frank', 'gumbel']
     if family not in copula_family:
         raise ValueError('The family should be clayton or frank or gumbel')
     
     self.X = X
     self.Y = Y
     self.family = family
     
     # estimate Kendall'rank correlation
     xy = np.vstack([X,Y])
     r.assign('xy',xy.T)
     tau = r('tau. <- cor(xy, method="kendall")[1,2]')[0]
     self.xy = xy
     self.tau = tau          
     
     # estimate pearson R and spearman R
     self.pr = r('cor(xy, method="pearson")[1,2]')[0]
     self.sr = r('cor(xy, method="spearman")[1,2]')[0]
             
     # estimate the parameter of copula
     self._get_parameter()
     
     # set U and V to none
     self.U = None
     self.V = None

Esempio n. 53

0

Mostra file

File: RPlotter.py Progetto: WestFlame/CGATReport

    def render(self, dataframe, path):

        R.library('ggplot2')

        # add all indices as columns
        dataframe.reset_index(inplace=True)

        rframe = pandas.rpy.common.convert_to_r_dataframe(dataframe)

        # for the issue below, see:
        # http://stackoverflow.com/questions/12865218/getting-rid-of-asis-class-attribute
        unAsIs = R('''function (x) {
                         if(typeof(x) %in% c("integer","double")) {
                             class(x) <- "numeric"
                             return (x)}
                         else if (typeof(x) == "character") {
                             class(x) <- "character"
                             return (x) }
                         else {
                             return(x) } }''')

        rframe = R["as.data.frame"](R.lapply(rframe, unAsIs))
        R.assign("rframe", rframe)

        # start plot
        R('''gp = ggplot(rframe)''')

        # add aesthetics and geometries
        try:
            pp = R('''gp + %s ''' % self.statement)
        except ValueError as msg:
            raise ValueError(
                "could not interprete R statement: "
                "gp + %s; msg=%s" % (self.statement, msg))

        figname = re.sub('/', '_', path2str(path))
        r = ResultBlock('#$ggplot %s$#' % figname,
                        title=path2str(path))
        r.rggplot = pp
        r.figname = figname

        return ResultBlocks(r)

Esempio n. 54

0

Mostra file

File: nifti_to_r.py Progetto: neurodata/dataset_variance

def process(outf, dti_f, bval_f, python=False):
    """
    Take a list of lists of files DTI and b-val files, returns a
    gzip R file with all B0 data arrays stored on it.
    """
    if python:
        import collections
        b0s = collections.OrderedDict()

    for idx, scan in enumerate(bval_f):
        print scan
        basename = os.path.basename(scan)
        print basename
        bval = np.loadtxt(scan)
        bval[np.where(bval==np.min(bval))] = 0
        im = nb.load(dti_f[idx])
        b0_loc = np.where(bval==np.min(bval))[0][0]
        dti = im.get_data()[:,:,:,b0_loc]
        if python:
            b0s[basename] = np.ravel(dti)
        else:
            ro = numpy2ri(np.ravel(dti+1))
            rr = robj.Matrix(ro)
            if idx is 0:
                myl = r.list(basename=rr)
            else:
                myl = r.c(myl, r.list(basename=rr))
    if python:
        import pickle
        # write python dict to a file
        #mydict = {'a': 1, 'b': 2, 'c': 3}
        output = open(outf, 'wb')
        pickle.dump(b0s, output)
        output.close()

        # read python dict back from the file
        # pkl_file = open('myfile.pkl', 'rb')
        # mydict2 = pickle.load(pkl_file)
        # pkl_file.close()
    else:
        r.assign('bar', myl)
        r("save(bar, file='"+outf+"', compress=TRUE)")

Esempio n. 55

0

Mostra file

File: networkstatistics.py Progetto: hostviralnetworks/nampy

def mtcorrect(p_value_dict, **kwargs):
    """ Apply MT correction.  This is a wrapper for R's p.adjust function.

    Arguments:
     p_value_dict: a dict with keys = probe names, and values of p-values

    kwargs:
     method: MT correction method, from R.  See mtcorrect_methods.  Default is 'none'
      
    Returns:
     adjusted_p
     
 
    """
    continue_flag = True

    method = test_kwarg('method', kwargs, mtcorrect_methods)
    
    try:
        from rpy2.robjects import r
        from rpy2 import robjects
        from rpy2.robjects import numpy2ri
        numpy2ri.activate()
    except ImportError:
        print "ImportError: networkstatistics.mtcorrect() requires a functional rpy2 and R, exiting..."
        continue_flag = False

    if continue_flag:
        row_names = [x for x in p_value_dict.keys()]
        p_values_list = [p_value_dict[id] for id in row_names]
        # need to create an r object first
        p_values_list_r = robjects.FloatVector(p_values_list)
        # need to assign the r object into the r namespace
        r.assign('p_values_list_r', p_values_list_r)
        method_r = mtcorrect_py_2_r_names[method]
        r('corrected_data = p.adjust(p_values_list_r, method = ' + str(method_r) + ')')
        adjusted_p = robjects.numpy2ri.ri2numpy(r('corrected_data'))
        adjusted_p.tolist
        adjusted_p = {id: adjusted_p[i] for i, id in enumerate(row_names)}
        return adjusted_p
    else:
        return {}

Esempio n. 56

0

Mostra file

File: stat_final.py Progetto: AlexisEidelman/Til_old

def stat(year):
    print "Calcul des statistiques individuelles"
    
    simul = "C:/til/output/simul.h5"
    simul = HDFStore(simul)
    
    df = simul['entities/register']  
    df = df.loc[df['period']==year]
    # export en R

    not_bool = df.dtypes[df.dtypes != bool]
    df = df.ix[:,not_bool.index]
    r_dataframe = com.convert_to_r_dataframe(df)
    name = 'result_sim'
    r.assign(name, r_dataframe)
    file_dir = "C:/Myliam2/output/" + name+ ".gzip"
    phrase = "save("+name+", file='" +file_dir+"', compress=TRUE)"
    r(phrase) 

    simul.close()

Esempio n. 57

0

Mostra file

File: __init__.py Progetto: Princeon/HFTrader

 def createClassifiers(self, verbose):
     test_date=self.date
     for window in self.training_periods:
         training_start = test_date - datetime.timedelta(days = int(1.7*window))
         day_before = test_date - datetime.timedelta(days=1)
         filename = training_start.strftime('%Y%m%d') + test_date.strftime('%Y%m%d') + self.ticker
         
         # Generate features:
         # (make CSV from the R database [mustn't forget to drop the close column]) 
         if verbose:
             print('Generating feature csvs from R xts..\n')
             print('Current window = {}\n\n'.format(filename))
         r.assign('windowStart', training_start.strftime('%Y-%m-%d'))
         r.assign('windowEnd', day_before.strftime('%Y-%m-%d'))
         r.assign('testDate', test_date.strftime('%Y-%m-%d'))
         r('windowDB<-DB[paste(windowStart,windowEnd,sep="/")]')
         r('windowDB<-subset(windowDB, select = -c(ticker) )')
         r('testDB<-DB[testDate]')
         r('testDB<-subset(testDB, select = -c(ticker) )')
         r.assign('remoteFilename', filename)
         r('write.table(windowDB, file=paste(remoteFilename, "train", sep="."),quote=FALSE,sep=",",eol=";\n",row.names=FALSE,col.names=FALSE)')
         r('write.table(testDB, file=paste(remoteFilename, "test", sep="."),quote=FALSE,sep=",",eol=";\n",row.names=FALSE,col.names=FALSE)')
     
         # Run bash script to invoke learner and generate classifier
         if verbose: print("Creating classifier using Jboost with runADTree.sh bash script..\n\n")
         command_line = '{}/runADTree.sh '.format(os.getcwd()) + os.getcwd() + ' ' + filename
         process = subprocess.Popen([command_line], shell=True)
         retcode = process.wait()
         
         # import classifier from file made on the fly
         if verbose: print("Importing classifier...\n")
         test_file = filename + '.test'
         spec_file = 'spec.spec'
         classifier_name = '{}predict'.format(filename)
         m = __import__(classifier_name, globals(), locals(), ['ADTree'])
         
         # Add new expert to list of experts
         self.experts.append(Expert(getattr(m, 'ATree'), self.trading_days_seen, self.new_exp_freq, (len(self.experts)<=len(self.training_periods))))
         # remove oldest expert
         if len(self.experts)>=self.max_experts: self.experts.pop(0)
         
         # delete all files generated above
         os.remove(filename + '.info')
         os.remove(filename + '.log')
         os.remove(filename + '.output.tree')
         os.remove(filename + '.train')
         os.remove(filename + '.test')
         os.remove(filename + '.test.boosting.info')
         os.remove(filename + '.train.boosting.info')
         os.remove(filename + 'predict.py')
         os.remove(filename + 'predict.pyc')

Esempio n. 58

0

Mostra file

File: 05CombineMatrices.py Progetto: ivomota/Olho-Passarinho

def calculate(subset, w0, w1, w2, sub):
	mat_im = subset[0]
	mat_spat = subset[1]
	mat_temp = subset[2]
	# print subset0[:3][:3]
	# print subset1[:3][:3]
	# print subset2[:3][:3]

	l = len(w1)
	dim = len(subset[0])
	new_mat = zeros((dim, dim))
	# print new_mat[:3][:3]
	for i in range(l):
		for y in xrange(dim):
			for x in xrange(dim):
				new_mat[y][x] = w0[i] * mat_im[y][x] + w1[i] * mat_spat[y][x] + w2[i] * mat_temp[y][x]

		mat = array(new_mat, dtype="float64") # <- convert to double precision numeric since R doesn't have unsigned ints
		ro = rpyn.numpy2ri(mat)
		r.assign("bar", ro)
		r("saveRDS(bar, file='./matrix_combined/S"+ str(sub) +"/combined_matrix_"+ str(w0[i]) +"_"+ str(w1[i]) +"_"+ str(w2[i]) +".rds')")