def main(): filenames, outdir = parse_arguments() #filenames will be a list os.chdir(outdir) #change pwd to output directory start_time_overall = time.time() rt_grid_size = 50 mz_grid_size = 50 log_statement( "Number of mzml patient files: {}".format(len(filenames)) ) #build empty np array to be filled with LCMS values for R prediction #dimension will be (# mzml files) by (# rt/mz bins + 1 for patient ID) floatD = np.zeros((len(filenames),rt_grid_size*mz_grid_size), dtype=float) #i vals strD = np.zeros((len(filenames),1), dtype='a6') #a6 is dtype for 6 char str respD = np.hstack((strD, floatD)) #fill the array for filecount, filename in enumerate(filenames): if filecount<1000: respD = fill_row_of_lcms_matrix(respD, rt_grid_size, mz_grid_size, filecount, filename) print "\n Data to use in R: " , respD[0:5,0:20] log_statement("Time till beginning of R section: {} minutes".format( (time.time() - start_time_overall)/60. ) ) #convert numpy array into data.frame recognized by R Rdf = robj.r['data.frame'](numpy2ri(respD)) #use this dataframe in R prediction code myRcode = """ doR <- function(python_respD, lcms_run) { source("/srv/scratch/carolyn/Dengue_code/prediction_with_LCMS_from_python.R") run_predictions_wrap(python_respD, lcms_run) } """ Rpack = SignatureTranslatedAnonymousPackage(myRcode, "Rpack") print Rpack.doR(Rdf, lcms_run=2) #to run the function doR, found in Rpack log_statement("Total execution time: {} minutes".format( (time.time() - start_time_overall)/60. ) ) #40 min to create binned data using pickles