/
Pythontest_ryp.py
178 lines (148 loc) · 5.6 KB
/
Pythontest_ryp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import pyximport; pyximport.install()
import my_cython_functions #cython code used in this script
import rpy2 #allows me to call R functions from python
import rpy2.robjects as robj
from rpy2.robjects.numpy2ri import numpy2ri #submodules not imported automatically
#the following import will allow me to import arbitrary R code as a package
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import os, sys
import cPickle as pickle
import pymzml
import numpy as np
import math
import matplotlib
matplotlib.use('Agg')
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import time
def screw_around():
pi = robj.r['pi']
print pi
print pi+2
print pi[0]
print pi[0]+2
#create fake binned array
nrow = 5
ncol = 10
counter = 0
binned = np.zeros((nrow, ncol), dtype="float64")
for row in xrange(nrow):
for col in xrange(ncol):
binned[row, col] = counter
counter += 1
#print binned
#get binned array into R data.frame
#vec = robj.FloatVector([1.1, 2.2, 0, 4.4, 5.5, ])
#print binned.shape
print numpy2ri(binned)
rdf = robj.r['data.frame'](numpy2ri(binned), code="ID1000")
#print rdf
# now see if we can get R to use this dataframe
myRcode = """
square <- function(rdf) {
myv = rdf$X2 + rdf$X3
return(myv)
}
doit <- function() {
source("/srv/scratch/carolyn/Dengue_code/Rtest_rpy.R")
run_test_wrap(3)
}
"""
print "wwwwah"
powerpack = SignatureTranslatedAnonymousPackage(myRcode, "powerpack")
print powerpack._rpy2r.keys() #to reveal the functions within powerpack
print powerpack.square(rdf) #to run the function "square" found in powerpack
print powerpack.doit()
def build_row_of_lcms_matrix(binned, respD, nrow, ncol, filecount, filename):
print os.path.abspath(filename)
# First column should contain "code", which is IDXXXX
start_pos = filename.find("Nicaserhilic")
IDcode = "ID" + filename[start_pos+12:start_pos+16]
print "ID: " , IDcode
respD[filecount,0] = IDcode
cell = 1
for row in xrange(nrow):
for col in xrange(ncol):
respD[filecount,cell] = binned[row,col]
cell += 1
return respD
###############################################################################
def parse_arguments():
""" When running from command prompt, expect filename and output directory
Ex: python /srv/scratch/carolyn/Dengue_code/process_raw_data_and_do_prediction.py
/srv/scratch/carolyn/mzml_serumR1/Ni*.mzML
/srv/scratch/carolyn/Results/
"""
return sys.argv[1:-1], os.path.abspath(sys.argv[-1])
def main():
filenames, outdir = parse_arguments() #filenames will be a list
os.chdir(outdir) #change pwd to output directory
#print "filenames", filenames
# will need to get intensity_2D_binned into R data.frame
# intensity_2D_binned was created in my_cython_functions.pyx as follows:
# cdef np.ndarray[np.float_t, ndim=2] my2Da
# my2Da = np.zeros((rt_grid_size, mz_grid_size)) """
#create fake binned array
nrow = 5
ncol = 10
counter = 0
binned = np.zeros((nrow, ncol), dtype="float64")
for row in xrange(nrow):
for col in xrange(ncol):
binned[row, col] = counter
counter += 1
### Option 1 ###
#turn each binned array into one row of what will be an R data.frame
# then add ID to each row and combine these binned arrays
nele = nrow*ncol
rbinned1 = np.reshape(binned, nele)
rbinned1c = np.hstack(( np.array(["ID1001"]), rbinned1 )) #concatenate also works
rbinned2 = np.reshape(binned, nele)
rbinned2c = np.hstack(( np.array(["ID1002"]), rbinned2 ))
stacked = np.vstack((rbinned1c, rbinned2c))
#print stacked
#print stacked.shape #2 by 51
rrdf1 = robj.r['data.frame'](numpy2ri(stacked))
#print rrdf1
### Option 2 ###
#build empty array that is 5 (# mzml files) by 51 (# rt/mz bins + 1 for patient ID)
# fill each row with the binned data
floatD = np.zeros((5,nrow*ncol), dtype="float64")
strD = np.zeros((5,1), dtype='a6') #a6 is the dtype for a 6 character string
respD = np.hstack((strD, floatD))
print respD.shape
for filecount, filename in enumerate(filenames):
if filecount<2:
respD = build_row_of_lcms_matrix(
binned, respD, nrow, ncol, filecount, filename)
#print respD
df2 = robj.r['data.frame'](numpy2ri(respD))
print df2
# now see if we can get R to use this dataframe
myRcode = """
doR <- function(python_respD, lcms_run) {
source("/srv/scratch/carolyn/Dengue_code/prediction_with_LCMS_from_python.R")
run_predictions_wrap(python_respD, lcms_run)
}
"""
#Rpack = SignatureTranslatedAnonymousPackage(myRcode, "Rpack")
#print Rpack._rpy2r.keys() #to reveal the functions within powerpack
#3print Rpack.doR(df2, 1) #to run the function found in powerpack
# now see if we can get R to use this dataframe
myRcode = """
square <- function(rdf) {
myv = rdf$X2 + rdf$X3
return(myv)
}
doit <- function(input) {
source("/srv/scratch/carolyn/Dengue_code/Rtest_rpy.R")
run_test_wrap(input)
}
"""
print "wwwwah"
powerpack = SignatureTranslatedAnonymousPackage(myRcode, "powerpack")
#print powerpack._rpy2r.keys() #to reveal the functions within powerpack
#print powerpack.square(df2) #to run the function "square" found in powerpack
print powerpack.doit(df2)
if __name__ == '__main__':
main()