Beispiel #1
0
def xcorr(ts1,ts2=None,maxlag=72,freq=12):
	""" Replicates the matlab function xcorr using rpy2 (which may need to be installed)
	Input: 		ts1 and ts2 are timeseries, 1D arrays. If only one array is entered the the auto-correlation will be calculated.
						maxlag... the maximum lag values
						freq: For monthly data use freq=12, for annual I guess freq = 1
	Output:	cor_out:  The correlation values at each lag (numpy array)
						lags:			An array with the values of the lags. 
	To match the matlab output and make it easier to plot cross and auto-correlations together, the auto-correlations are 'mirrored' for <0.
	"""

	#define R functions
	rts=robjects.r['ts']		# R function used to create timeseries
	rccf=robjects.r['ccf']	# R function to calculate cross-correlations
	racf=robjects.r['acf']	# R function to calculate auto-correlations

	#Convert python array to an R vector (Floatvector), then an R timeseries
	ts1_r=rts(robjects.FloatVector(ts1),frequency=freq)

	if ts2==None:
		# Use autocorrelation if there's only one timeseries
		acf_ts1 = racf(ts1_r,lag_max=maxlag,plot=False)
		ac_ts1=rpyn.ri2numpy(acf_ts1[0])[:,0,0]										# Converts R array back to numpy array
		cor_out=np.concatenate([ac_ts1[::-1],ac_ts1[1:maxlag+1]])	# This mirrors the positive values of the auto-corr

	elif ts2!=None:
		ts2_r=rts(robjects.FloatVector(ts2),frequency=freq)
		ccf_ts12 = rccf(ts1_r,ts2_r,lag_max=maxlag,plot=False)
		cc_ts12=rpyn.ri2numpy(ccf_ts12[0])[:,0,0]
		cor_out=cc_ts12

	lags=np.concatenate([np.linspace(-maxlag,-1,num=maxlag),np.linspace(0,maxlag,num=maxlag+1)])

	return cor_out, lags
Beispiel #2
0
def ri2pandas(o):
    if isinstance(o, DataFrame):
        # use the numpy converter
        recarray = numpy2ri.ri2numpy(o)
        res = PandasDataFrame.from_records(recarray)
    else:
        res = ro.default_ri2ro(o)
    return res
Beispiel #3
0
def getRasterValues(lon, lat, layer, buffervalue):
    startTime = time.time()
    rinterface.initr()
    r = robjects.r
    r.require('raster')
    ras = r.raster(layer)
    rasvalues = r.extract(ras, r.cbind(lon,lat), buffer=buffervalue, small=True)
    values = npri.ri2numpy(rasvalues[0])
    endTime = time.time()
    print(str(endTime - startTime))
    return values
def custom_pathways(gene_vals, kegg_file, pval):
	importr("KEGGREST")
	importr("org.Mm.eg.db")
	importr("GSEABase")
	result = annotate_ensembl(gene_vals)
	sigs = []
	univ = []
	for key in result:
		if float(gene_vals[key]) < float(pval):
			sigs.append(result[key])
		univ.append(result[key])
	ro.globalenv["sigs"] = sigs
	ro.globalenv["univ"] = univ
	sets = ro.r.getGmt(kegg_file)
	ro.globalenv["sets"] = sets
	ro.r('genes_pathway <- lapply(sets, geneIds)')
	ro.r('names(genes_pathway) <- names(sets)')
	ro.r('hyperg <- Category:::.doHyperGInternal')
	ro.r('''hyperg_test <- function(pathway_genes, significant_genes, all_genes, over=TRUE) {
			white_balls_drawn <- length(intersect(significant_genes, pathway_genes))
			white_balls_in_urn <- length(pathway_genes)
			total_balls_in_urn <- length(all_genes)
			black_balls_in_urn <- total_balls_in_urn - white_balls_in_urn
			balls_pulled <- length(significant_genes)
			hyperg(white_balls_in_urn, black_balls_in_urn, balls_pulled, white_balls_drawn, over) } ''')
	ro.r('pVals_pathway <- t(sapply(genes_pathway, hyperg_test, sigs, univ))')
	ro.r('pVals_pathway <- cbind(rownames(pVals_pathway), pVals_pathway)')
	pvals = ro.r('pVals_pathway')
	vector1=rpyn.ri2numpy(pvals.rx(True,1))
	vector2=rpyn.ri2numpy(pvals.rx(True,2))
	vector3=rpyn.ri2numpy(pvals.rx(True,3))
	vector4=rpyn.ri2numpy(pvals.rx(True,4))
	output = open("Hypergeo_pathways.txt", "w")
	output.write("Pathway\tP-value\tOddsRatio\tExpected\n"),
	for i, j in enumerate(vector1[0]):
		output.write("{}\t{}\t{}\t{}\n".format(j, vector2[0][i],vector3[0][i],vector4[0][i])),
	output.close()
Beispiel #5
0
def manager(request):
    params = dict()
    params["queries"] = internal.ListQueries(request, {"projectID": [request.session["projectID"]]})
    if request.method == "POST":
        resp = dict()
        queryname = request.POST.get("query", None)
        dataset = request.POST.get("dataset", None)
        method = request.POST.get("method", None)
        category = request.POST.get("category", None)
        count = request.POST.get("count", 20)
        if not queryname or not dataset or not method or not category:
            return HttpResponse(
                json.dumps("Please check that the input form is complete."), content_type="application/json"
            )
        query = Query.objects.get(project=request.session["projectID"], name=queryname)
        from rpy2 import robjects
        import rpy2.robjects.numpy2ri as rpyn

        # fetch Analysis data of interest
        heatmapScript = SCRIPTPATH + "r/heatmapDataCreator.R"
        robjects.r.source(heatmapScript)
        heatmapCommand = robjects.r["heatmapDataCreator"]

        profiles1 = Analysis.objects.filter(
            project=request.session["projectID"],
            dataset=dataset,
            method=method,
            category=category,
            sample__in=query.expandsamples,
        ).values_list("sample", "entity", "profile")

        # format analysis data and load into R
        profiles = zip(*profiles1)
        profileRdata = robjects.DataFrame(
            {
                "samples": robjects.StrVector(profiles[0]),
                "entity": robjects.StrVector(profiles[1]),
                "profile": robjects.FloatVector(profiles[2]),
            }
        )
        processedMatrix = heatmapCommand(profileRdata, count)
        vector = rpyn.ri2numpy(processedMatrix)
        resp["rows"] = list(processedMatrix.rownames)
        resp["cols"] = list(processedMatrix.colnames)
        resp["maxVal"] = numpy.amax(vector)
        resp["minVal"] = numpy.amin(vector)
        resp["data"] = vector.tolist()
        return HttpResponse(json.dumps(resp), content_type="application/json")
    return render(request, "heatmap.html", params)
def from_dtw2dict(alignment):
    """Auxiliar function which transform useful information of the dtw function
    applied in R using rpy2 to python formats.
    """

    dtw_keys = list(alignment.names)
    bool_traceback = 'index1' in dtw_keys and 'index2' in dtw_keys
    bool_traceback = bool_traceback and 'stepsTaken' in dtw_keys

    ## Creating a dict to save all the information in python format
    dtw_dict = {}
    # Transformation into a dict
    dtw_dict['stepPattern'] = ri2numpy(alignment.rx('stepPattern'))
    dtw_dict['N'] = alignment.rx('N')[0]
    dtw_dict['M'] = alignment.rx('M')[0]
    dtw_dict['call'] = alignment.rx('call')
    dtw_dict['openEnd'] = alignment.rx('openEnd')[0]
    dtw_dict['openBegin'] = alignment.rx('openBegin')[0]
    dtw_dict['windowFunction'] = alignment.rx('windowFunction')
    dtw_dict['jmin'] = alignment.rx('jmin')[0]
    dtw_dict['distance'] = alignment.rx('distance')[0]
    dtw_dict['normalizedDistance'] = alignment.rx('normalizedDistance')[0]
    if bool_traceback:
        aux = np.array(ri2numpy(alignment.rx('index1')).astype(int))
        dtw_dict['index1'] = aux
        aux = np.array(ri2numpy(alignment.rx('index2')).astype(int))
        dtw_dict['index2'] = aux
        dtw_dict['stepsTaken'] = ri2numpy(alignment.rx('stepsTaken'))
    elif 'localCostMatrix' in dtw_keys:
        aux = np.array(ri2numpy(alignment.rx('localCostMatrix')))
        dtw_dict['localCostMatrix'] = aux
    elif 'reference' in dtw_keys and 'query' in dtw_keys:
        dtw_dict['reference'] = alignment.rx('reference')
        dtw_dict['query'] = alignment.rx('query')

    return dtw_dict
Beispiel #7
0
def matrix_to_normcount(matrix, samples):
    # read normalized count from matrix (row=clone; col=sample) and
    # update the samples' clones
    samplenames = matrix.colnames
    cloneids = matrix.rownames
    for sample in samples:
        if sample.name not in samplenames:
            sys.stderr.write(("Warning: sample %s does not have normalized "
                              % sample.name + "count."))
            continue
        for clone in sample.clones:
            normcount = 0.0
            for id in clone.vjseq_ids:
                assert id in cloneids
                nc = matrix.rx[id, sample.name]
                normcount = normcount + rpyn.ri2numpy(nc)[0]
            clone.set_normcount(normcount)
    return samples
w0 = [1, 0.75, 0.75, 0.5, 0.5, 0.5, 0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0]
w1 = [0, 0.25, 0, 0.5, 0.25, 0, 0.75, 0.5, 0.25, 0, 1, 0.75, 0.5, 0.25, 0]
w2 = [0, 0, 0.25, 0, 0.25, 0.5, 0, 0.25, 0.5, 0.75, 0, 0.25, 0.5, 0.75, 1]
path = './matrix_dist_norm/'
files = getfiles(path)

subset0 = []
subset1 = []
subset2 = []

print "Subset 0"
for index, file in enumerate(files[:3]):
	sub = 0
	m = r("readRDS('"+ str(file)+"')")
	mat = rpyn.ri2numpy(m)
	subset0.append(mat)

print "Calculating..."
calculate(subset0, w0, w1, w2, sub)	
print "end\n"

print "Subset 1"
for index, file in enumerate(files[3:6]):
	sub = 1
	m = r("readRDS('"+ str(file)+"')")
	mat = rpyn.ri2numpy(m)
	subset1.append(mat)

print "Calculating..."
calculate(subset1, w0, w1, w2, sub)	
Beispiel #9
0
# some simple R things
print R.r.median(R.IntVector([1,2,3,4]))[0]

# create two R vectors and do correlation coefficient in R
a = R.IntVector([1,2,3,4])
b = R.IntVector([1,2,3,4])

# need the subscript to get authentic python type?
print R.r.cor(a,b,method="pearson")[0]

my_vec = R.IntVector([1,2,3,4])
my_chr_vec = R.StrVector(['aaa','bbb'])
my_float_vec = R.FloatVector([0.001,0.0002,0.003,0.4])

print "\nconvert to numpy array?"
vector = rpyn.ri2numpy(my_float_vec)
print vector

python_list = list(my_float_vec)
print python_list

bigger_vec = R.IntVector(a+b) # using multiple lists
print list(bigger_vec)


# linear regression
observed = R.FloatVector([1.1, 1.2, 1.3]) # native python list will not do!!
theoretical = R.FloatVector([1.15, 1.25, 1.35])
R.globalEnv['observed'] = observed
R.globalEnv['theoretical'] = theoretical
m = R.r.lm('theoretical ~ observed')
def rx(data, var):
    return rpyn.ri2numpy(data.rx2(var))
Beispiel #11
0
import numpy as np
import rpy2.robjects as ro
import rpy2.robjects.numpy2ri as n2r

n2r.activate()

r = ro.r
r.library('glmnet')

# input files (for this example) need to have header and NO index column
X = np.loadtxt('./x.csv', dtype=float, delimiter=',', skiprows=1)
y = np.loadtxt('./y.csv', dtype=int, delimiter=',', skiprows=1)
y = ro.FactorVector(list(y.transpose()))  # use factors

trained_model = r['cv.glmnet'](X, y, nfolds=3, family="binomial")
lambda_ = np.asanyarray(trained_model.rx2('lambda'))
cvm_ = np.asanyarray(trained_model.rx2('cvm'))
cvsd_ = np.asanyarray(trained_model.rx2('cvsd'))

lambda_min = np.asanyarray(trained_model.rx2('lambda.min'))[0]
min_cvm = cvm_[np.argwhere(lambda_ == lambda_min)[0][0]]

idx = np.argwhere(cvm_ < min_cvm + 0.1 * cvsd_)
idx[0]

fit = trained_model.rx2('glmnet.fit')
beta = n2r.ri2numpy(r['as.matrix'](fit.rx2('beta')))

relvars = np.argwhere(beta[:, idx[0]].transpose()[0] > 1e-5)
print relvars.transpose()[0]
 def testAtomicVectorToNumpy(self):
     v = robjects.vectors.IntVector((1,2,3))
     a = rpyn.ri2numpy(v)
     self.assertTrue(isinstance(a, numpy.ndarray))
     self.assertEqual(1, v[0])
 def testDataFrameToNumpy(self):
     df = robjects.vectors.DataFrame(dict((('a', 1), ('b', 2))))
     reca = rpyn.ri2numpy(df)
     self.assertTrue(isinstance(reca, numpy.recarray))
     self.assertEqual(1, reca.a[0])
     self.assertEqual(2, reca.b[0])
Beispiel #14
0
def fa(source=False, use_filter="default", data_file="latest", participant_subset="", drop_metadata=True, drop=[], clean=7, factors=5, facecolor="#ffffff"):
    #gets config file:
    config = get_config_file(localpath=path.dirname(path.realpath(__file__))+'/')

    #IMPORT VARIABLES
    if not source:
	    source = config.get('Source', 'source')
    data_path = config.get('Addresses', source)
    filter_dir = config.get('Paths', "filter_dir")
    filter_name = config.get("Filters", use_filter)
    #END IMPORT VARIABLES

    filter_path = path.dirname(path.realpath(__file__)) + '/' + filter_dir + filter_name + '.csv'

    filters = DataFrame.from_csv(filter_path, header=None).transpose() # transpose filters because of .csv file formatting
    all_data = DataFrame.from_csv(data_path + data_file + ".csv")
    all_data = all_data.reset_index(level=0)
    #~ print filters["metadata"]

    #clean data of respondents who only ckeck extreme answers:
    all_data = all_data[map(lambda y: len(set(y)) > clean,np.array(all_data))]

    if drop_metadata == True:
        # drops metadata
        all_data = all_data.drop(filters["metadata"][Series.notnull(filters["metadata"])], axis=1)

    drop_list = []
    for drop_item in drop:
        # compile list of column names to be dropped:
        drop_list += list(filters[drop_item][Series.notnull(filters[drop_item])])
    #get unique column names (the list may contain duplicates if overlaying multiple filters):
    drop_list = list(set(drop_list))

    all_data = all_data.drop(drop_list, axis=1)

    if participant_subset == "odd":
        # selects only odd indexes (keep the other dataset half for validation)
        keep_rows = all_data.index.values[1::2]
        filtered_data = all_data.ix[keep_rows]
    elif participant_subset == "even":
        # selects only even indexes (keep the other dataset half for validation)
        keep_rows = all_data.index.values[0::2]
        filtered_data = all_data.ix[keep_rows]
    elif participant_subset == "male":
        # selects only male participants
        filtered_data = all_data[all_data['My legal gender:'] == 'Male']
    elif participant_subset == "female":
        # selects only female participants
        filtered_data = all_data[all_data['My legal gender:'] == 'Female']
    else:
        filtered_data = all_data

    #convert to correct type for analysis:
    filtered_data_array = np.array(filtered_data, dtype='float64')

    filtered_data_array = filtered_data_array / 100

    fit = r.factanal(filtered_data_array, factors, rotation='promax')
    load = r.loadings(fit)
    load = numpy2ri.ri2numpy(load)

    load = r.t(load)

    remapped_cmap = remappedColorMap(cm.PiYG, start=(np.max(load)-abs(np.min(load)))/(2*np.max(load)), midpoint=abs(np.min(load))/(np.max(load)+abs(np.min(load))), name='shrunk')

    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(17.5, 5), facecolor=facecolor)
    graphic = ax.imshow(load, cmap = remapped_cmap, interpolation='none')
    ax.xaxis.set_major_locator(matplotlib.ticker.MultipleLocator(base=1.0))
    ax.yaxis.set_major_locator(matplotlib.ticker.MultipleLocator(base=1.0))
    ax.set_xticklabels([0]+filtered_data.columns.tolist(),fontsize=8,rotation=90)
    ax.set_yticklabels(np.arange(factors+1))
    ax.set_ylabel('Factors')
    ax.set_title("Question Loadings on Factors")

    #Recolor plot spines:
    for spine_side in ["bottom", "top", "left", "right"]:
        ax.spines[spine_side].set_color("#777777")

    #Remove ticks:
    plt.tick_params(axis='both', which='both', left="off", right="off", bottom='off', top='off')

    divider = make_axes_locatable(ax)
    #calculate width for cbar so that it is equal to the question column width:
    cbar_width = str(100/np.shape(load)[1])+ "%"
    cax = divider.append_axes("right", size=cbar_width, pad=0.05)
    cbar = colorbar(graphic, cax=cax, drawedges=True)

    #Limit the number of ticks:
    tick_locator = ticker.MaxNLocator(nbins=6)
    cbar.locator = tick_locator
    cbar.update_ticks()

    #Align ticklabels so that negative values are not misaligned (meaning right align):
    for t in cbar.ax.get_yticklabels():
        t.set_horizontalalignment('right')
        t.set_x(0.045*(np.shape(load)[1]+6))

    #Tweak color bar borders
    cbar.outline.set_color("#666666")
    cbar.dividers.set_linewidth(0)
 def testAtomicVectorToNumpy(self):
     v = robjects.vectors.IntVector((1, 2, 3))
     a = rpyn.ri2numpy(v)
     self.assertTrue(isinstance(a, numpy.ndarray))
     self.assertEqual(1, v[0])
Beispiel #16
0
    def doMigration(self, ipSrc, movUsers, ipDst, dstUsers):

        cacheSize = self.getCacheSize(ipDst[0], ipSrc[0][0])

        dIntSrc = []

        for src in range(len(ipSrc)):
            dIntSrc.append(self.getMergedInterests(ipSrc[src], "1 months"))

        dIntDst = self.getMergedInterests(ipDst, "1 months")

        #Aux Lists
        auxB = []
        auxC = []
        auxL = []
        auxS = []

        numCells = len(dIntSrc) + 1

        # need mapping (-1)
        cnt = 1

        print "Processing 1st Source Interests"

        # Adding prefixes requested at 1st source
        for k,v in dIntSrc[0].items():
            popDst = dIntDst[k][1] if k in dIntDst else 0

            popSrcs = 0
            for i in range(numCells-1):
                popSrcs += (dIntSrc[i][k][1] if k in dIntSrc[i] else 0) * movUsers[i]

            popInt = (popSrcs + popDst*dstUsers) / numCells

            ### ID | Populatiry
            auxB.extend([cnt, popInt])

            ### ID | File Size
            auxC.extend([cnt, v[0]])

            auxL.append(k)
            auxS.append(v[0])

            cnt += 1

        print "Processing other Sources Interests"

        for j in range(1,numCells-1):
            for k,v in dIntSrc[j].items():

                if k not in auxL:
                    popSrcs += (dIntSrc[i][k][1] if k in dIntSrc[i] else 0) * movUsers[i]

                    popSrcs = 0
                    for i in range(j, numCells-1):
                        popSrcs += (dIntSrc[i][k][1] if k in dIntSrc[i] else 0) * movUsers[i]

                    popInt = (popSrcs + popDst*dstUsers) / numCells

                    ### ID | Populatiry
                    auxB.extend([cnt, popInt])

                    ### ID | File Size
                    auxC.extend([cnt, v[0]])

                    auxL.append(k)
                    auxS.append(v[0])

                    cnt += 1

        print "Processing Destination Interests"

        # Adding interests requested at destination still missing
        for k,v in dIntDst.items():
            if k not in auxL:
                popInt = (v[1]*dstUsers) / numCells

                ### ID | Populatiry
                auxB.extend([cnt, popInt])

                ### ID | File Size
                auxC.extend([cnt, v[0]])

                auxL.append(k)
                auxS.append(v[0])

                cnt += 1

        print "Going to run MADM"

        ## Create matrix Ben and matrix Cost
        mBen = r.matrix(auxB, ncol=2, byrow=True)
        mCost = r.matrix(auxC, ncol=2, byrow=True)

        vBen = ro.FloatVector([1.0])
        vCost = ro.FloatVector([1.0])

        output = MADM(mBen,mCost,vBen,vCost,1)

	if output == -1:
		return 200

        #print output

        outAux = rpyn.ri2numpy(output)

	if outAux[0] == -2:
	    return 200
	elif outAux[0] == -1:
	    code = self.sendMigrationData(auxL, ipDst)
	    return code

        cacheList = []
        usedCache = 0

        for out in outAux:
            if auxS[int(out[0])-1] + usedCache < cacheSize:
                cacheList.append(auxL[int(out[0]-1)])
                usedCache += auxS[int(out[0]-1)]

            if usedCache == cacheSize:
                break

        #for ent in cacheList:
            #print ent

        code = self.sendMigrationData(cacheList, ipDst)

        return code
Beispiel #17
0
# some simple R things
print R.r.median(R.IntVector([1, 2, 3, 4]))[0]

# create two R vectors and do correlation coefficient in R
a = R.IntVector([1, 2, 3, 4])
b = R.IntVector([1, 2, 3, 4])

# need the subscript to get authentic python type?
print R.r.cor(a, b, method="pearson")[0]

my_vec = R.IntVector([1, 2, 3, 4])
my_chr_vec = R.StrVector(['aaa', 'bbb'])
my_float_vec = R.FloatVector([0.001, 0.0002, 0.003, 0.4])

print "\nconvert to numpy array?"
vector = rpyn.ri2numpy(my_float_vec)
print vector

python_list = list(my_float_vec)
print python_list

bigger_vec = R.IntVector(a + b)  # using multiple lists
print list(bigger_vec)

# linear regression
observed = R.FloatVector([1.1, 1.2, 1.3])  # native python list will not do!!
theoretical = R.FloatVector([1.15, 1.25, 1.35])
R.globalenv['observed'] = observed
R.globalenv['theoretical'] = theoretical
m = R.r.lm('theoretical ~ observed')
# R.abline(lm,col='color') # add regression line to EXISTING plot
Beispiel #18
0
# Transform URL string into normal string in python (%20 to space etc)
rpy2: Convert FloatVector or Matrix back to a Python array or list?
import rpy2.robjects.numpy2ri as rpyn
vector=rpyn.ri2numpy(vector_R)
Beispiel #19
0
 parfOrig_r = IntVector(parfOrig)
 parfOrig_rc = r.circular(parfOrig_r, units='degrees', template='geographics')
 # optimal bandwith determined based on a VonMises distribution
 # using circular.bw_nrd_circular()
 if reg == 'andes':
     bandwith = 2.0
 elif reg == 'qf':
     bandwith = 3.5
 elif reg == 'amazon':
     bandwith = 1.0
 # calculate circular density
 print 'calculating density for ' + paramOrig + ', please wait...'
 dens = r.density(parfOrig_rc, bw=bandwith, kernel='vonmises')
 print 'density for ' + paramOrig + ' OK'
 # get density values back to python
 densX = rpyn.ri2numpy(dens[1])
 densY = rpyn.ri2numpy(dens[2])
 gc.collect()
 # densityOrig = ss.kde.gaussian_kde(parfOrig)
 # x = np.arange(0., np.max(parfOrig), .1)
 # yOrig = densityOrig(x)
 # plot
 # cartesian plot
 fig = reg + '_aspect_compass_original_density_circular.svg'
 print 'fig will be: ' + fig
 plt.ylabel('density')
 plt.xlabel('aspect_compass')
 plt.plot(densX, densY, label=paramOrig)
 plt.xticks((-270, -180, -90, 0, 90), ('E','S','W','N','E'))
 print 'cartesian plot for ' + paramOrig + ' OK'
 gc.collect()
 def testDataFrameToNumpy(self):
     df = robjects.vectors.DataFrame(dict((('a', 1), ('b', 2))))
     reca = rpyn.ri2numpy(df)
     self.assertTrue(isinstance(reca, numpy.recarray))
     self.assertEqual(1, reca.a[0])
     self.assertEqual(2, reca.b[0])