def test_push_dataframe(self): df = pd.DataFrame([{ 'a': 1, 'b': 'bar' }, { 'a': 5, 'b': 'foo', 'c': 20 }]) self.ip.push({'df': df}) self.ip.run_line_magic('Rpush', 'df') # This is converted to factors, which are currently converted back to Python # as integers, so for now we test its representation in R. sio = StringIO() rinterface.set_writeconsole_regular(sio.write) try: r('print(df$b[1])') self.assertIn('[1] bar', sio.getvalue()) finally: rinterface.set_writeconsole_regular(None) # Values come packaged in arrays, so we unbox them to test. self.assertEqual(r('df$a[2]')[0], 5) missing = r('df$c[1]')[0] assert np.isnan(missing), missing
def setUp(self): self.console = rinterface.get_writeconsole_regular() def noconsole(x): pass rinterface.set_writeconsole_regular(noconsole)
def eval(self, code): ''' Parse and evaluate a line of R code with rpy2. Returns the output to R's stdout() connection, the value generated by evaluating the code, and a boolean indicating whether the return value would be visible if the line of code were evaluated in an R REPL. R Code evaluation and visibility determination are done via an R call of the form withVisible(code_string), and this entire expression needs to be evaluated in R (we can't use rpy2 function proxies here, as withVisible is a LISPy R function). ''' old_writeconsole_regular = ri.get_writeconsole_regular() ri.set_writeconsole_regular(self.write_console_regular) try: # Need the newline in case the last line in code is a comment value, visible = ro.r("withVisible({%s\n})" % code) except (ri.RRuntimeError, ValueError) as exception: warning_or_other_msg = self.flush() # otherwise next return seems to have copy of error raise RInterpreterError(code, str_to_unicode(str(exception)), warning_or_other_msg) text_output = self.flush() ri.set_writeconsole_regular(old_writeconsole_regular) return text_output, value, visible[0]
def R_package_retrieve(package, not_exists_ok=True): """ ["dynamicTreeCut", "fastcluster", "phyloseq", "philr", "ape", "edgeR"] """ from rpy2.robjects.packages import importr if rpy2_version_major == 2: from rpy2.rinterface import RRuntimeError importing_error = RRuntimeError pandas2ri.activate() ri.set_writeconsole_regular(None) # How do I do this v3? if rpy2_version_major == 3: # from rpy2.rinterface_lib.embedded import RRuntimeError from rpy2.robjects.conversion import localconverter from rpy2.robjects.packages import PackageNotInstalledError importing_error = PackageNotInstalledError try: return R_packages[package] except KeyError: try: R_packages[package] = importr(package) return R_packages[package] except importing_error: msg = f"{package} is not available" if not_exists_ok: print(msg, file=sys.stderr) else: raise ImportError(msg)
def eval(self, code): """ Parse and evaluate a line of R code with rpy2. Returns the output to R's stdout() connection, the value generated by evaluating the code, and a boolean indicating whether the return value would be visible if the line of code were evaluated in an R REPL. R Code evaluation and visibility determination are done via an R call of the form withVisible(code_string), and this entire expression needs to be evaluated in R (we can't use rpy2 function proxies here, as withVisible is a LISPy R function). """ old_writeconsole_regular = ri.get_writeconsole_regular() ri.set_writeconsole_regular(self.write_console_regular) try: # Need the newline in case the last line in code is a comment value, visible = ro.r("withVisible({%s\n})" % code) except (ri.RRuntimeError, ValueError) as exception: warning_or_other_msg = self.flush( ) # otherwise next return seems to have copy of error raise RInterpreterError(code, str(exception), warning_or_other_msg) text_output = self.flush() ri.set_writeconsole_regular(old_writeconsole_regular) return text_output, value, visible[0]
def redirect_stdout(self): self.stdout = [] def capture_stdout(x): self.stdout.append(x) rinterface.set_writeconsole_regular(capture_stdout)
def set_r_writeconsole(callback): """ Sets the (regular) R console to output to the callback function instead """ if RPY2_MAJOR_VERSION == 3: # The function should be overridden with an assignment consolewrite_print = callback # NOQA else: # The function should be called with the callback as an argument set_writeconsole_regular(callback)
def testChooseFileWithError(self): def noconsole(x): pass rinterface.set_writeconsole_regular(noconsole) # reverted by the tearDown method def f(prompt): raise Exception("Doesn't work.") rinterface.set_choosefile(f) self.assertRaises(rinterface.RRuntimeError, rinterface.baseenv["file.choose"]) self.assertEqual("Doesn't work.", str(sys.last_value))
def testSetFlushConsole(self): flush = {'count': 0} def f(): flush['count'] = flush['count'] + 1 rinterface.set_flushconsole(f) self.assertEqual(rinterface.get_flushconsole(), f) rinterface.baseenv.get("flush.console")() self.assertEqual(1, flush['count']) rinterface.set_writeconsole_regular(rinterface.consoleFlush)
def testSetWriteConsoleRegular(self): buf = [] def f(x): buf.append(x) rinterface.set_writeconsole_regular(f) self.assertEqual(rinterface.get_writeconsole_regular(), f) code = rinterface.SexpVector(["3", ], rinterface.STRSXP) rinterface.baseenv["print"](code) self.assertEqual('[1] "3"\n', str.join('', buf))
def testSetWriteConsoleRegular(self): buf = [] def f(x): buf.append(x) rinterface.set_writeconsole_regular(f) self.assertEqual(rinterface.get_writeconsole_regular(), f) code = rinterface.SexpVector([ "3", ], rinterface.STRSXP) rinterface.baseenv["print"](code) self.assertEqual('[1] "3"\n', str.join('', buf))
def testChooseFileWithError(self): def noconsole(x): pass rinterface.set_writeconsole_regular( noconsole) # reverted by the tearDown method def f(prompt): raise Exception("Doesn't work.") rinterface.set_choosefile(f) self.assertRaises(rinterface.RRuntimeError, rinterface.baseenv["file.choose"]) self.assertEqual("Doesn't work.", str(sys.last_value))
def testWriteConsoleRegularWithError(self): def f(x): raise CustomException("Doesn't work.") rinterface.set_writeconsole_regular(f) tmp_file = tempfile.NamedTemporaryFile() stderr = sys.stderr sys.stderr = tmp_file try: code = rinterface.SexpVector(["3", ], rinterface.STRSXP) rinterface.baseenv["print"](code) except Exception as e: sys.stderr = stderr raise e sys.stderr = stderr tmp_file.flush() tmp_file.seek(0) self.assertEqual("Doesn't work.", str(sys.last_value))
def test_push_dataframe(self): df = pd.DataFrame([{'a': 1, 'b': 'bar'}, {'a': 5, 'b': 'foo', 'c': 20}]) self.ip.push({'df':df}) self.ip.run_line_magic('Rpush', 'df') # This is converted to factors, which are currently converted back to Python # as integers, so for now we test its representation in R. sio = StringIO() rinterface.set_writeconsole_regular(sio.write) try: r('print(df$b[1])') self.assertIn('[1] bar', sio.getvalue()) finally: rinterface.set_writeconsole_regular(None) # Values come packaged in arrays, so we unbox them to test. self.assertEqual(r('df$a[2]')[0], 5) missing = r('df$c[1]')[0] assert np.isnan(missing), missing
def testWriteConsoleRegularWithError(self): def f(x): raise CustomException("Doesn't work.") rinterface.set_writeconsole_regular(f) tmp_file = tempfile.NamedTemporaryFile() stderr = sys.stderr sys.stderr = tmp_file try: code = rinterface.SexpVector([ "3", ], rinterface.STRSXP) rinterface.baseenv["print"](code) except Exception as e: sys.stderr = stderr raise e sys.stderr = stderr tmp_file.flush() tmp_file.seek(0) self.assertEqual("Doesn't work.", str(sys.last_value))
def select(self): numpy2ri.activate() rpy.r.assign('chol_k', self.knockoff_chol) rpy.r(''' knockoffs = function(X) { mu = rep(0, ncol(X)) mu_k = X # sweep(X, 2, mu, "-") %*% SigmaInv_s X_k = mu_k + matrix(rnorm(ncol(X) * nrow(X)), nrow(X)) %*% chol_k return(X_k) } ''') numpy2ri.deactivate() if True: numpy2ri.activate() rpy.r.assign('X', self.X) rpy.r.assign('Y', self.Y) rpy.r.assign('q', self.q) if self.forward_step: rpy.r( 'V = knockoff.filter(X, Y, fdr=q, knockoffs=knockoffs, stat=stat.forward_selection)$selected' ) elif self.sqrt_lasso: rinterface.set_writeconsole_regular(null_print) rpy.r( 'V = knockoff.filter(X, Y, fdr=q, knockoffs=knockoffs, stat=stat.sqrt_lasso)$selected' ) rinterface.set_writeconsole_regular(rinterface.consolePrint) else: rpy.r( 'V = knockoff.filter(X, Y, fdr=q, knockoffs=knockoffs)$selected' ) rpy.r('if (length(V) > 0) {V = V-1}') V = rpy.r('V') numpy2ri.deactivate() return np.asarray(V, np.int), np.asarray(V, np.int) else: # except: return [], []
def R(self, line, cell=None, local_ns=None): """ Execute code in R, optionally returning results to the Python runtime. In line mode, this will evaluate an expression and convert the returned value to a Python object. The return value is determined by rpy2's behaviour of returning the result of evaluating the final expression. Multiple R expressions can be executed by joining them with semicolons:: In [9]: %R X=c(1,4,5,7); sd(X); mean(X) Out[9]: array([ 4.25]) In cell mode, this will run a block of R code. The resulting value is printed if it would be printed when evaluating the same code within a standard R REPL. Nothing is returned to python by default in cell mode:: In [10]: %%R ....: Y = c(2,4,3,9) ....: summary(lm(Y~X)) Call: lm(formula = Y ~ X) Residuals: 1 2 3 4 0.88 -0.24 -2.28 1.64 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.0800 2.3000 0.035 0.975 X 1.0400 0.4822 2.157 0.164 Residual standard error: 2.088 on 2 degrees of freedom Multiple R-squared: 0.6993,Adjusted R-squared: 0.549 F-statistic: 4.651 on 1 and 2 DF, p-value: 0.1638 In the notebook, plots are published as the output of the cell:: %R plot(X, Y) will create a scatter plot of X bs Y. If cell is not None and line has some R code, it is prepended to the R code in cell. Objects can be passed back and forth between rpy2 and python via the -i -o flags in line:: In [14]: Z = np.array([1,4,5,10]) In [15]: %R -i Z mean(Z) Out[15]: array([ 5.]) In [16]: %R -o W W=Z*mean(Z) Out[16]: array([ 5., 20., 25., 50.]) In [17]: W Out[17]: array([ 5., 20., 25., 50.]) The return value is determined by these rules: * If the cell is not None (i.e., has contents), the magic returns None. * If the final line results in a NULL value when evaluated by rpy2, then None is returned. * No attempt is made to convert the final value to a structured array. Use %Rget to push a structured array. * If the -n flag is present, there is no return value. * A trailing ';' will also result in no return value as the last value in the line is an empty string. """ args = parse_argstring(self.R, line) # arguments 'code' in line are prepended to # the cell lines if cell is None: code = '' return_output = True line_mode = True else: code = cell return_output = False line_mode = False code = ' '.join(args.code) + code # if there is no local namespace then default to an empty dict if local_ns is None: local_ns = {} if args.converter is None: converter = self.converter else: try: converter = local_ns[args.converter] except KeyError: try: converter = self.shell.user_ns[args.converter] except KeyError: raise NameError("name '%s' is not defined" % args.converter) if not isinstance(converter, Converter): raise ValueError( "'%s' must be a %s object (but it is a %s)." % (args.converter, Converter, type(localconverter))) if args.input: for input in ','.join(args.input).split(','): try: val = local_ns[input] except KeyError: try: val = self.shell.user_ns[input] except KeyError: raise NameError("name '%s' is not defined" % input) with localconverter(converter) as cv: ro.r.assign(input, val) tmpd = self.setup_graphics(args) text_output = '' try: if line_mode: for line in code.split(';'): text_result, result, visible = self.eval(line) text_output += text_result if text_result: # the last line printed something to the console so we won't return it return_output = False else: text_result, result, visible = self.eval(code) text_output += text_result if visible: old_writeconsole_regular = ri.get_writeconsole_regular() ri.set_writeconsole_regular(self.write_console_regular) ro.r.show(result) text_output += self.flush() ri.set_writeconsole_regular(old_writeconsole_regular) except RInterpreterError as e: # TODO: Maybe we should make this red or something? print(e.stdout) if not e.stdout.endswith(e.err): print(e.err) if tmpd: rmtree(tmpd) return finally: if self.device in ['png', 'svg']: ro.r('dev.off()') if text_output: # display_data.append(('RMagic.R', {'text/plain':text_output})) publish_display_data(data={'text/plain': text_output}, source='RMagic.R') # publish the R images if self.device in ['png', 'svg']: display_data, md = self.publish_graphics(tmpd, args.isolate_svgs) for tag, disp_d in display_data: publish_display_data(data=disp_d, source=tag, metadata=md) # kill the temporary directory - currently created only for "svg" # and "png" (else it's None) rmtree(tmpd) if args.output: with localconverter(converter) as cv: for output in ','.join(args.output).split(','): output_ipy = ro.globalenv.get(output) self.shell.push({output: output_ipy}) # this will keep a reference to the display_data # which might be useful to other objects who happen to use # this method if self.cache_display_data: self.display_cache = display_data # We're in line mode and return_output is still True, # so return the converted result if return_output and not args.noreturn: if result is not ri.NULL: with localconverter(converter) as cv: res = cv.ri2py(result) return res
import rpy2 import rpy2.robjects as robjects from rpy2 import rinterface from rpy2.robjects import pandas2ri from rpy2.robjects.functions import SignatureTranslatedFunction from collections import OrderedDict from analysis.helpers import constants as a_consts from analysis import execute from misconceptions.common import datatypes from misconceptions.rUtils import generator, dataframer from utils import cache pandas2ri.activate() rinterface.set_writeconsole_warnerror(None) rinterface.set_writeconsole_regular(None) r_source = robjects.r['source'] R_GEN_PREFIX = "gen_func_r_" FUNC_BODY_REGEX = r'function\s*\(.*?\)\s*((.|\s)+)' FUNCTION_STORE = "/Users/panzer/Raise/ProgramRepair/CodeSeer/code/src/main/python/expt/r_functions.pkl" def get_R_error_message(exception): return exception.message.strip() def get_env_variables(r_file_path): try: robjects.r(''' source('%s')
def annotate_peaks(dict_of_dfs, folder, genome, log_file, db='UCSC', check=False, run_main=False): ''' Annotate a dictionary of dataframes from bed files to the genome using ChIPseeker and Ensembl annotations. Inputs ------ dict_of_beds: dictionary of bed files folder: output folder genome: hg38, hg19, mm10 db: default UCSC, but can also accept Ensembl check: bool. checks whether annotation file already exists Returns ------- dictionary of annotated bed files as dataframe ''' pandas2ri.activate() ri.set_writeconsole_regular(rout_write) ri.set_writeconsole_warnerror(rout_write) folder = make_folder(folder) chipseeker = importr('ChIPseeker') genomicFeatures = importr('GenomicFeatures') makeGR = ro.r("makeGRangesFromDataFrame") check_df = { key: os.path.isfile(f'{folder}{key.replace(" ", "_")}_annotated.xlsx') for key in dict_of_dfs.keys() } return_bool = False not in set(check_df.values()) if return_bool & check: return { f'{key}_annotated': pd.from_excel(f'{folder}{key.replace(" ", "_")}_annotated.xlsx') for key in dict_of_dfs.keys() } if db.lower() == 'ucsc': species = ('Mmusculus' if genome.lower() == 'mm10' else 'Hsapiens') TxDb = importr(f'TxDb.{species}.UCSC.{genome.lower()}.knownGene') txdb = ro.r(f'txdb <- TxDb.{species}.UCSC.{genome.lower()}.knownGene') elif db.lower() == 'ensembl': pwd = 'todo' loadDb = ro.r('loadDb') txdb = loadDb(pwd.format(genome.lower())) else: raise ValueError('UCSC or Ensembl only.') if genome.lower() == 'mm10': annoDb = importr('org.Mm.eg.db') anno = 'org.Mm.eg.db' elif genome.lower() == 'hg38' or genome.lower() == 'hg19': annoDb = importr('org.Hs.eg.db') anno = 'org.Hs.eg.db' return_dict = {} output('Annotating Peaks...', log_file=log_file, run_main=run_main) for key, df in dict_of_dfs.items(): if check & check_df[key]: return_dict[f'{key}_annotated'] = pd.from_excel( f'{folder}{key.replace(" ", "_")}_annotated.xlsx') else: col_len = len(df.columns) df.columns = ["chr", "start", "end"] + list(range(col_len - 3)) GR = makeGR(df) GR_anno = chipseeker.annotatePeak(GR, overlap='all', TxDb=txdb, annoDb=anno) return_dict[f'{key}_annotated'] = ro.pandas2ri.ri2py( chipseeker.as_data_frame_csAnno(GR_anno)) return_dict[f'{key}_annotated'].to_excel( f'{folder}{key.replace(" ", "_")}_annotated.xlsx', index=None) return return_dict
def tearDown(self): rinterface.set_writeconsole_regular(self.console)
def tearDown(self): rinterface.set_writeconsole_regular(rinterface.consolePrint) rinterface.set_readconsole(rinterface.consoleRead) rinterface.set_readconsole(rinterface.consoleFlush) rinterface.set_choosefile(rinterface.chooseFile) sys.last_value = None
def reset_redirects(self): rinterface.set_writeconsole_warnerror(rinterface.consolePrint) rinterface.set_writeconsole_regular(rinterface.consolePrint)
def call_gsoa(request): # data from task tiger print("request: {}".format(request)) local_buffer = [] try: gsoa = importr('GSOA') #flex_dashboard = importr('') args = request.copy() for field in NECESSARY_FIELDS: args.pop(field) if len(str(request.get('dataFilePath'))) < 2: return "no data" outFilePath = "/data/{}_{}.txt".format( request.get('email', 'results_txt').replace('.com', '').strip(), request.get('dataFilePath').split(".")[0]) print("email: {}".format(request.get('email', 'results_txt'))) #redirect everything from R into the python console (local buffer) rinterface.set_writeconsole_warnerror( lambda line: local_buffer.append(line)) rinterface.set_writeconsole_regular( lambda line: local_buffer.append(line)) result = gsoa.GSOA_ProcessFiles( dataFilePath=request.get('dataFilePath', ''), classFilePath=request.get('classFilePath', ''), gmtFilePath=request.get('gmtFilePath', ''), outFilePath=outFilePath, numCores=multiprocessing.cpu_count(), numRandomIterations=request.get('numRandomIterations', ''), classificationAlgorithm=request.get('classificationAlgorithm', ''), numCrossValidationFolds=request.get('numCrossValidationFolds', ''), removePercentLowestExpr=request.get('removePercentLowestExpr', ''), removePercentLowestVar=request.get('removePercentLowestVar', '')) print("Writing RMarkdown") outFilePath_html = outFilePath.replace('txt', 'html') rmarkdown.render( '/app/GSOA_Report.Rmd', output_file=outFilePath.replace('txt', 'html'), params=ListVector({ 'data1': outFilePath, 'alg': request.get('classificationAlgorithm', 'svm'), 'class': request.get('classFilePath', ''), 'crossval': request.get('numCrossValidationFolds', ''), 'data_files': request.get('dataFilePath', ''), 'genesets': request.get('gmtFilePath', ''), #'hallmarks': 'iterations': request.get('numRandomIterations', ''), 'lowexpress': request.get('removePercentLowestExpr', ''), #'results_hallmark' : 'var': request.get('removePercentLowestVar', '') })) email_report(request.get('email'), outFilePath) except Exception as e: email_error(request.get('email'), e, local_buffer) finally: rinterface.set_writeconsole_warnerror(rinterface.consolePrint) rinterface.set_writeconsole_regular(rinterface.consolePrint)
def R(self, line, cell=None, local_ns=None): ''' Execute code in R, optionally returning results to the Python runtime. In line mode, this will evaluate an expression and convert the returned value to a Python object. The return value is determined by rpy2's behaviour of returning the result of evaluating the final expression. Multiple R expressions can be executed by joining them with semicolons:: In [9]: %R X=c(1,4,5,7); sd(X); mean(X) Out[9]: array([ 4.25]) In cell mode, this will run a block of R code. The resulting value is printed if it would be printed when evaluating the same code within a standard R REPL. Nothing is returned to python by default in cell mode:: In [10]: %%R ....: Y = c(2,4,3,9) ....: summary(lm(Y~X)) Call: lm(formula = Y ~ X) Residuals: 1 2 3 4 0.88 -0.24 -2.28 1.64 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.0800 2.3000 0.035 0.975 X 1.0400 0.4822 2.157 0.164 Residual standard error: 2.088 on 2 degrees of freedom Multiple R-squared: 0.6993,Adjusted R-squared: 0.549 F-statistic: 4.651 on 1 and 2 DF, p-value: 0.1638 In the notebook, plots are published as the output of the cell:: %R plot(X, Y) will create a scatter plot of X bs Y. If cell is not None and line has some R code, it is prepended to the R code in cell. Objects can be passed back and forth between rpy2 and python via the -i -o flags in line:: In [14]: Z = np.array([1,4,5,10]) In [15]: %R -i Z mean(Z) Out[15]: array([ 5.]) In [16]: %R -o W W=Z*mean(Z) Out[16]: array([ 5., 20., 25., 50.]) In [17]: W Out[17]: array([ 5., 20., 25., 50.]) The return value is determined by these rules: * If the cell is not None (i.e., has contents), the magic returns None. * If the final line results in a NULL value when evaluated by rpy2, then None is returned. * No attempt is made to convert the final value to a structured array. Use %Rget to push a structured array. * If the -n flag is present, there is no return value. * A trailing ';' will also result in no return value as the last value in the line is an empty string. ''' args = parse_argstring(self.R, line) # arguments 'code' in line are prepended to # the cell lines if cell is None: code = '' return_output = True line_mode = True else: code = cell return_output = False line_mode = False code = ' '.join(args.code) + code # if there is no local namespace then default to an empty dict if local_ns is None: local_ns = {} if args.converter is None: pass else: try: localconverter = local_ns[args.converter] except KeyError: try: localconverter = self.shell.user_ns[args.converter] except KeyError: raise NameError("name '%s' is not defined" % args.converter) if not isinstance(localconverter, Converter): raise ValueError("'%s' must be a Converter object.") if args.input: for input in ','.join(args.input).split(','): try: val = local_ns[input] except KeyError: try: val = self.shell.user_ns[input] except KeyError: raise NameError("name '%s' is not defined" % input) if args.converter is None: ro.r.assign(input, self.pyconverter(val)) else: ro.r.assign(input, localconverter.py2ri(val)) tmpd = self.setup_graphics(args) text_output = '' try: if line_mode: for line in code.split(';'): text_result, result, visible = self.eval(line) text_output += text_result if text_result: # the last line printed something to the console so we won't return it return_output = False else: text_result, result, visible = self.eval(code) text_output += text_result if visible: old_writeconsole_regular = ri.get_writeconsole_regular() ri.set_writeconsole_regular(self.write_console_regular) ro.r.show(result) text_output += self.flush() ri.set_writeconsole_regular(old_writeconsole_regular) except RInterpreterError as e: # XXX - Maybe we should make this red or something? print(e.stdout) if not e.stdout.endswith(e.err): print(e.err) if tmpd: rmtree(tmpd) return finally: if self.device in ['png', 'svg']: ro.r('dev.off()') if text_output: # display_data.append(('RMagic.R', {'text/plain':text_output})) publish_display_data(data={'text/plain':text_output}, source='RMagic.R') # publish the R images if self.device in ['png', 'svg']: display_data, md = self.publish_graphics(tmpd, args.isolate_svgs) for tag, disp_d in display_data: publish_display_data(data=disp_d, source=tag, metadata=md) # kill the temporary directory - currently created only for "svg" # and "png" (else it's None) rmtree(tmpd) if args.output: for output in ','.join(args.output).split(','): if args.converter is None: output_ipy = converter.ri2py(ri.globalenv.get(output)) else: output_ipy = localconverter.ri2py(ri.globalenv.get(output)) self.shell.push({output: output_ipy }) # this will keep a reference to the display_data # which might be useful to other objects who happen to use # this method if self.cache_display_data: self.display_cache = display_data # We're in line mode and return_output is still True, # so return the converted result if return_output and not args.noreturn: if result is not ri.NULL: if args.converter is None: return converter.ri2py(result) else: return localconverter.ri2py(result)