def create_rserve_connection(cls, host_IN=None, port_IN=None): # return reference value_OUT = None # declare variables got_host = False my_host = "" got_port = False my_port = -1 # make Rserve Connection instance. # got a host? if (host_IN is not None) and (host_IN != ""): # yes my_host = host_IN got_host = True else: # no my_host = cls.DEFAULT_HOST got_host = False # -- END check to see if we have a host. --# # got a port? if (port_IN is not None) and (port_IN != "") and (port_IN > 0): # yes my_port = port_IN got_port = True else: # no my_port = cls.DEFAULT_PORT got_port = False # -- END check to see if we have a host. --# # do we have either a host or a port? if (got_host == True) or (got_port == True): # we do. Include in connect() call. value_OUT = pyRserve.connect(host=my_host, port=my_port) else: # no. Just call connect(). value_OUT = pyRserve.connect() # -- END check to see if host or port. --# return value_OUT
def getRConnection(): global conn if conn and type(conn) is pyRserve.rconn.RConnector and not conn.isClosed: return conn.r conn = pyRserve.connect(host=rServeHost, port=6311) conn.eval(rFuncStr) return conn.r
def __init__(self): self.conn = pyRserve.connect() self.conn.voidEval('setwd("' + config.SCRIPTS_DIR + '")') for pkg in reqd_packages: self.conn.voidEval('library("' + pkg + '")') for src in reqd_sources: self.conn.voidEval('source("' + src + '")')
def rzindex_wrapper(reqId, cand): conn=pyRserve.connect() conn.voidEval('source("/home/pandera/RCode/zindex_main.r",chdir=T)') score=conn.r.zindex_main(reqid,'r',cand) j=len(score[0]) zindex=[] for num in range(0,j): zend=[] dist={} dist1={} dist["candidate_id"]=score[0][num] dist["requisition_id"]=score[4][num] dist["zindex_score"]=score[5][num] dist1["name"]="Experience" dist1["score"]=score[3][num] zend.append(dist1) dist1={} dist1["name"]="Skills" dist1["score"]=score[1][num] zend.append(dist1) dist1={} dist1["name"]="Job Fit" dist1["score"]=score[2][num] zend.append(dist1) dist["zindex_distribution"]=zend zindex.append(dist) conn.close() return(zindex)
def manual_ISA_gen_seeds(binary_mat, est_col_width, pre_exclude_gene_indices): num_row = binary_mat.shape[0] num_col = binary_mat.shape[1] num_seeds = int(num_col / est_col_width) print 'in manual_ISA num_col = ' + str(num_col) print 'in manual_ISA num_seeds = ' + str(num_seeds) conn = pyRserve.connect() conn.r('require("isa2")') seeds_mat = conn.r('generate.seeds(' + str(num_col) + ',count = ' + str(num_seeds) + ',sparsity=' + str(est_col_width) + ')') seeds_list = [] num_seeds = seeds_mat.shape[1] #make a ndarray that having 1 at pre_excluded_indices pre_exclude_mask = numpy.zeros(num_col) for pre_exclude_gene_index in pre_exclude_gene_indices: pre_exclude_mask[pre_exclude_gene_index] = 1 for i in range(num_seeds): curr_seed_vec = seeds_mat[:, i] #if curr_seed_vec contains pre_excluded gene_indices, remove this seed #the product is greater than 0 only when index matches if numpy.dot(curr_seed_vec, pre_exclude_mask) > 0: continue seeds_list.append(curr_seed_vec) return seeds_list
def __call__(self): conn = pyRserve.connect(host=self.host, port=self.port) if self.conn.eval("1+1") != 2: raise IOError("Unable to execute on R connection") return RConnection(conn)
def get(self, session_id): if session_mgr.is_ok(session_id): try: # Call the mlrMBO R script to actually propose a point. conn = pyRserve.connect() conn.eval('setwd("..")') # Prepare call in R commandline = 'propose("%s")' % str(session_id) # Run that function point = conn.eval(commandline) # Close connection conn.close() # Debugging point = json.loads(point) except Exception as e: print(e) session_mgr.close(session_id) return error_mgr.internal_error() return point, 200 else: return error_mgr.no_session()
def rzindex_wrapper(ReqId, cand): zindex={} try: conn=pyRserve.connect() conn.voidEval(r_conn_string) score=conn.r.zindex_main(ReqId,'c',cand) conn.close() j=len(score[0]) for num in range(0,j): candidate_id=score[0][num] zindex[candidate_id] = { "zindex_distribution" : [ { "name" : "Experience", "score" : score[3][num] }, { "name" : "Skills", "score" : score[1][num] }, { "name" : "Job Fit", "score" : score[2][num] } ], "zindex_score" : score[5][num] } except Exception as e: DebugException(e) conn.close(); return zindex
def det_syncer(): # only have to sync if changes were made to our two inventory forms if request.form['instrument'] == 'pbmc' or request.form['instrument'] == 'cytokine': # connect to our Rserve instance conn = pyRserve.connect() # set our REDCap token appropriately if request.form['project_id'] == '65': conn.eval("tok <- readRDS('~/.redcap/controls/token.rds')") elif request.form['project_id'] == '52': conn.eval("tok <- readRDS('~/.redcap/ecmo/token.rds')") else: print 'No token for this project id...' print str(request.form) return make_response(jsonify({'result': 'failed'}), 400) print 'Syncing the following data:' print str(request.form) print '----------' # fill in our script & run it to_run = sync_template.format(**request.form) conn.eval(to_run) else: print 'Nothing to sync...' print str(request.form) print '----------' return make_response(jsonify({'result': 'success'}), 201)
def filter_queryset(self, request, queryset, view): ''' Override this method to request just the documents required from Rserve. ''' try: filterable = getattr(view, 'filter_fields', []) filters = dict([(k, v) for k, v in request.GET.items() if k in filterable]) mid1 = filters.get('marker', 'rs2476601') dataset = filters.get('dataset', 'EUR').replace('-', '') query = ElasticQuery(BoolQuery(must_arr=[Query.term("id", mid1)]), sources=['seqid', 'start']) elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=1) doc = elastic.search().docs[0] seqid = getattr(doc, 'seqid') rserve = getattr(settings, 'RSERVE') conn = pyRserve.connect(host=rserve.get('HOST'), port=rserve.get('PORT')) pop_str = conn.r.get_pop(dataset, seqid, mid1) pops = json.loads(str(pop_str)) populations = [] for pop in pops: pops[pop]['population'] = pop populations.append(pops[pop]) conn.close() return [ElasticObject(initial={'populations': populations, 'marker': mid1})] except (TypeError, ValueError, IndexError, ConnectionError): return [ElasticObject(initial={'populations': None, 'marker': mid1})]
def __call__(self): conn = pyRserve.connect(host = self.host, port = self.port) if self.conn.eval("1+1") != 2: raise IOError("Unable to execute on R connection") return RConnection(conn)
def compute_importance_step1(vectors, limit): conn = pyRserve.connect() try: conn.eval('library(\'relaimpo\')', void=True) conn.eval('df <- data.frame()', void=True) size = 0 anomalies = [] results = {} for i in range(len(vectors)): v = vectors[i] if v[-1] <= limit: conn.r.x = v conn.eval('df <- rbind(df,x)', void=True) size += 1 if size == 1: col_names = map(lambda x: 'API' + str(x), range(len(v) - 1)) col_names.append('Total') conn.r.df_names = col_names conn.eval('names(df) <- df_names', void=True) if size > len(v): results[i] = compute_importance_internal(conn) else: results[i] = 'Insufficient Data' else: anomalies.append(i) return anomalies, results finally: conn.close()
def __init__(self, port=6311, host='localhost'): self.port = int(port) self.host = str(host) try: self.conn = pyRserve.connect(host=host, port=port) except RConnectionRefused: # try to start Rserver on localhost if possible if host == "localhost": os.system("R CMD Rserve --RS-port %i --no-save" % port) self.conn = pyRserve.connect(host=host, port=port) else: raise RConnectionRefused res = self.conn.eval("1+1") if res != 2: raise IOError("Unable to execute on R connection")
def __init__(self,conn=None): """Creates connection to R server and loads HE library containing FV.""" if(conn is None): self.conn = pyRserve.connect() else: self.conn = conn self.conn.r('library("HomomorphicEncryption")',void=True)
def __init__(self, port = 6311, host = 'localhost'): self.port = int(port) self.host = str(host) try: self.conn = pyRserve.connect(host = host, port = port) except RConnectionRefused: # try to start Rserver on localhost if possible if host == "localhost": os.system("R CMD Rserve --RS-port %i --no-save" % port) self.conn = pyRserve.connect(host = host, port = port) else: raise RConnectionRefused res = self.conn.eval("1+1") if res != 2: raise IOError("Unable to execute on R connection")
def createRServeConn(): global Rconn global RconnStatus Rconn = pyRserve.connect(host=RServeIP, port=6311) Rconn.voidEval('library("quantmod")') Rconn.voidEval('library("PerformanceAnalytics")') Rconn.voidEval('library("RMySQL")') Rconn.voidEval('library("xts")') RconnStatus = True
def crossvalidate(profiles, true_group_name, holdout_group_name=None, sva=False, train=NNClassifier): profiles.assert_not_isnan() keys = profiles.keys() true_labels = regroup(profiles, true_group_name) labels = list(set(true_labels.values())) if holdout_group_name: holdouts = regroup(profiles, holdout_group_name) else: holdouts = dict((k, k) for k in keys) confusion = {} for ho in set(holdouts.values()): test_set_mask = np.array([tuple(holdouts[k]) == ho for k in keys], dtype=bool) training_features = profiles.data[~test_set_mask, :] test_features = profiles.data[test_set_mask, :] training_labels = [labels.index(true_labels[tuple(k)]) for k, m in zip(keys, ~test_set_mask) if m] if sva: import pyRserve conn = pyRserve.connect() conn.r.traininglabels = np.array(training_labels, dtype='i4') conn.r.trainData = training_features.ravel().reshape(training_features.shape, order='F').T conn.r.testData = test_features.ravel().reshape(test_features.shape, order='F').T #import pdb; pdb.set_trace() #assert conn.r('trainData[1,2]') == training_features[1, 0] #assert conn.r('testData[1,2]') == test_features[1, 0] conn.r('library(sva)') conn.r('trainData <- as.matrix(trainData)') conn.r('testData <- as.matrix(testData)') conn.r('trainpheno <- data.frame(label=traininglabels)') #conn.r('write.table(trainData, "/tmp/trainData.txt")') #conn.r('write.table(testData, "/tmp/testData.txt")') #conn.r('write.table(trainpheno, "/tmp/trainpheno.txt")') conn.r('trainMod <- model.matrix(~as.factor(label), trainpheno)') nsv = conn.r('num.sv(trainData, trainMod)') print nsv, 'surrogate variables' conn.r('trainMod0 <- model.matrix(~1, trainpheno)') conn.r('trainSv <- sva(trainData, trainMod, trainMod0, B=1)') conn.r('fsvaobj <- fsva(trainData, trainMod, trainSv, testData)') filtered_train = getattr(conn.r, 'fsvaobj$db').T filtered_test = getattr(conn.r, 'fsvaobj$new').T else: filtered_train = training_features filtered_test = test_features model = train(filtered_train, training_labels) for k, f, m in zip(keys, profiles.data, test_set_mask): if not m: continue true = true_labels[k] predicted = labels[model.classify(f)] confusion[true, predicted] = confusion.get((true, predicted), 0) + 1 return confusion
def rzindex_wrapper_insert(ReqId, cand): try: conn=pyRserve.connect() conn.voidEval(r_conn_string) status=conn.r.zindex_main(ReqId,'c',cand) conn.close() print("Success") except Exception as e: DebugException(e) conn.close();
def rzindex_candidate(Candlist): try: conn=pyRserve.connect() conn.voidEval(r_conn_candidate) status=conn.r.candidate_incremental(Candlist) conn.close() except Exception as e: print("HERE - %s" % e) DebugException(e) conn.close();
def rzindex_wrapper_newreq(ReqId): try: conn=pyRserve.connect() conn.voidEval(r_conn_requisition) status=conn.r.reqScoring(ReqId) conn.close() print("Success") except Exception as e: DebugException(e) conn.close();
def get_connection(): try: return True, pyRserve.connect(host='localhost', port=6311, atomicArray=True) except Exception as ex: return False, { 'error': 'Unable to connect to R server!', 'code': 500, 'details': str(ex) }
def run_hmm(series, nStates): conn= pyRserve.connect() srcpath= os.getcwd() # assumes that hmm.r is in the same directory as this python file conn.eval('setwd("' + config.CORE_DIR + '")') conn.eval('source("hmm.r")') conn.eval('library("RHmm")') conn.r.sc= series conn.r.nStates= nStates streval= 'v <- run_hmm(sc, n_states= nStates )' conn.eval(streval) return conn.r.v
def get_r_plot(self): self.cursor.execute(sql.profits) res = self.cursor.fetchall() self.arr = [] for i in res: self.arr.append(float(i[0])) conn = pyRserve.connect() conn.r.xvar = self.arr return conn.eval(rprog.prog)
def run_hmm(series, nStates): conn = pyRserve.connect() srcpath = os.getcwd( ) # assumes that hmm.r is in the same directory as this python file conn.eval('setwd("' + config.CORE_DIR + '")') conn.eval('source("hmm.r")') conn.eval('library("RHmm")') conn.r.sc = series conn.r.nStates = nStates streval = 'v <- run_hmm(sc, n_states= nStates )' conn.eval(streval) return conn.r.v
def manual_ISA(args): start_time = time.time() #pdb.set_trace() binary_mat = args[0] abs_cutoff = args[1] per_cutoff = args[2] converge_epsilon = args[3] converge_depth = args[4] seed0 = args[5] #print '\nthis thread seed0 = '+ str(numpy.nonzero(seed0)) num_row = binary_mat.shape[0] num_col = binary_mat.shape[1] if False: #using threadPool, seed0 will be passed in conn = pyRserve.connect() conn.r('require("isa2")') seeds = conn.r('generate.seeds(' + str(num_col) + ',count = 1)') num_seeds = seeds.shape[1] len_each_seed = seeds.shape[0] seed0 = seeds[:, 0] prev_cols = seed0 curr_depth = 0 #print 'seed0 = ' + str(numpy.nonzero(seed0)) while True: curr_rows = manual_ISA_filter_row(binary_mat, prev_cols, abs_cutoff, per_cutoff) curr_cols = manual_ISA_filter_col(binary_mat, curr_rows, abs_cutoff, per_cutoff) if not numpy.any( curr_cols): #if converge to empty hole, terminate early print 'xxx ABORTION, ALL ZERO' break if converge(curr_cols, prev_cols, converge_epsilon): print '$$$ REAL CONVERGE num_rows = ' + str( numpy.count_nonzero(curr_rows)) + ' num_cols = ' + str( numpy.count_nonzero(curr_cols)) #pdb.set_trace() a = 1 break elif curr_depth > converge_depth: print 'xxx TIME CONVERGE' #pdb.set_trace() a = 1 break else: prev_cols = curr_cols #iterate curr_depth = curr_depth + 1 #pdb.set_trace() #print 'this thread takes {} seconds'.format(time.time() - start_time) return curr_rows, curr_cols
def main(job_id, params): params = copy.deepcopy(params) np.random.seed(NUM_EXP) #Parsing parameters. tests = ["cor", "zf", "mi-g", "mi-g-sh"] test = tests[np.argmax(params['b_test'])] alpha = np.power(10, float(params['c_alpha'])) #Experiments to be carried out. total_SHD = 0.0 size_nodes = np.array([25, 50, 75, 100]) size_neighbors = np.array([2, 8]) size_samples = np.array([10, 50, 100, 500]) total_experiments = size_nodes.shape[0] * size_neighbors.shape[ 0] * size_samples.shape[0] true_bn_fr = "" sample_bn_fr = "" #Connecting to pyRserve, will launch an exception is Rserve is not listening in port. conn = pyRserve.connect(port=PORT) i = 0 for node_example in size_nodes: for neighbor_example in size_neighbors: true_bn_fr = str(node_example) + "_" + str( neighbor_example) + "_r" + str(NUM_EXP) + ".rds" for sample_example in size_samples: script = "library(\"bnlearn\"); " script += "bn_true <- readRDS(\"" + DATA_ROUTE + "/" + true_bn_fr + "\"); " sample_bn_fr = str(node_example) + "_" + str( neighbor_example) + "_r" + str(NUM_EXP) + "_" + str( sample_example) + ".rds" script += "bn_data <- readRDS(\"" + DATA_ROUTE + "/" + sample_bn_fr + "\"); " script += ( "bn_learned <- bnlearn::pc.stable(x = bn_data, test = \"" + test + "\", alpha = " + str(alpha) + "); ") script += "result <- shd(bn_learned, bn_true);" #We send the script and wait for evaluation. conn.eval(script) #Once the script is finished, we retreive the result variable. shd = conn.eval("result") shd_norm = shd / float( (node_example * (node_example - 1) / 2.0)) total_SHD += shd_norm i += 1 print i conn.close() return {'shd': total_SHD / float(total_experiments)}
def gwr_initialize(request): if request.method == "POST": data = json.loads(request.body) shapefile_filename = data['namespace'] shapefile_object = Shapefile.objects.get(name=shapefile_filename) conn = pyRserve.connect() # get the path to shapefile shapefile_filename = shapefile_object.get_full_path() + "projected" # load the function functionFile = open(settings.BASE_DIR + '/fileupload/new.r') ##windows path # functionFile = open(settings.BASE_DIR + '\\fileupload\\new.r') functionContent = functionFile.read() conn.voidEval(functionContent) try: # get the list of columns in this shapefile nameslist = conn.r.getshpheader(shapefile_filename) nameslist = list(nameslist) except: message = "error running function in r" return HttpResponse(json.dumps({"status":"error", "message":message}), content_type="application/json") finally: conn.close() response_obj = {} response_obj['status'] = "success" response_obj['variables'] = nameslist return HttpResponse(json.dumps(response_obj), content_type="application/json") else: form = GWRInitializeForm() # an empty, unbound form return render_to_response( 'gwr_initialize_form.html', {'form': form}, context_instance=RequestContext(request) )
def filter_queryset(self, request, queryset, view): """ Override this method to request just the documents required from Rserve. """ try: filterable = getattr(view, "filter_fields", []) filters = dict([(k, v) for k, v in request.GET.items() if k in filterable]) mid1 = filters.get("m1") if mid1 is None or mid1 == "": return [ElasticObject(initial={"error": "No marker ID provided."})] dataset = filters.get("dataset", "EUR").replace("-", "") mid2 = filters.get("m2") window_size = int(filters.get("window_size", 1000000)) dprime = filters.get("dprime", 0.0) rsq = filters.get("rsq", 0.8) maf = filters.get("maf", False) if maf: maf = True build_version = filters.get("build", "GRCh38").lower() pos = filters.get("pos", False) if pos: pos = True query = ElasticQuery(BoolQuery(must_arr=[Query.term("id", mid1)]), sources=["seqid", "start"]) elastic = Search(search_query=query, idx=ElasticSettings.idx("MARKER", "MARKER"), size=1) doc = elastic.search().docs[0] seqid = getattr(doc, "seqid") rserve = getattr(settings, "RSERVE") conn = pyRserve.connect(host=rserve.get("HOST"), port=rserve.get("PORT")) ld_str = conn.r.ld_run( dataset, seqid, mid1, marker2=mid2, window_size=window_size, dprime=dprime, rsq=rsq, maf=maf, position=pos, build_version=build_version, ) ld_str = ld_str.replace("D.prime", "dprime").replace("R.squared", "rsquared") conn.close() return [ElasticObject(initial=json.loads(str(ld_str)))] except (TypeError, ValueError, IndexError, ConnectionError): raise Http404
def getCorrelationMatrix(*args): # connect to Rserve (running as daemon process - configured for port 9999) # to start run on command line 'R CMD Rserve --RS-port 9999' conn = pyRserve.connect(host='localhost', port=9999) # combine streams by column - will create a matrix with dimensions = #ofstreams by number of elements #inputDataStreams = conn.r.cbind(stream1,stream2,stream3,stream4) inputDataStreams = conn.r.cbind(*args) #print(inputDataStreams) # Create a correlation matrix based on input streams corrMatrix = conn.r.cor(inputDataStreams) #print(corrMatrix) # Always good to close connections conn.close() return corrMatrix
def getCorrelationMatrix(*args): # connect to Rserve (running as daemon process - configured for port 9999) # to start run on command line 'R CMD Rserve --RS-port 9999' conn = pyRserve.connect(host='localhost',port=9999) # combine streams by column - will create a matrix with dimensions = #ofstreams by number of elements #inputDataStreams = conn.r.cbind(stream1,stream2,stream3,stream4) inputDataStreams = conn.r.cbind(*args) #print(inputDataStreams) # Create a correlation matrix based on input streams corrMatrix = conn.r.cor(inputDataStreams) #print(corrMatrix) # Always good to close connections conn.close() return corrMatrix
def __init__(self, dump_stdout=True, r_exe=None, do_log=False, port=None, **kw): if port is None: s = socket.socket() s.bind(("127.0.0.1", 0)) port = s.getsockname()[1] s.close() rip = RInterpreter(r_exe=r_exe, **kw) rip.execute("""if("Rserve" %in% rownames(installed.packages()) == FALSE) { install.packages("Rserve", repos="http://cran.rstudio.com"); } """) cmd = """library(Rserve); Rserve::run.Rserve(port=%d); """ % port thread.start_new_thread(rip.execute, (cmd,)) self.__dict__["dump_stdout"] = dump_stdout for _ in range(10): try: self.__dict__["conn"] = pyRserve.connect(host="127.0.0.1", port=port) except pyRserve.rexceptions.RConnectionRefused: time.sleep(0.1) else: break else: raise Exception("connection failed after 10 trials over one second in total") def on_die(killed_ref, conn=self.conn): # we pass conn here because access to self ist not allowed in this handler. # further we import socket inside because this function maybe called when the Python # interpreter is shut down and a globally imported module socket might not be available # any more import socket try: conn.shutdown() except socket.error: pass # instead of implementing __del__ to force shutdown if the object dies, we use the # weakref trick to trigger shutdown at the end of self's life: self.__dict__["_del_ref"] = weakref.ref(self, on_die)
def compute(input): try: conn = pyRserve.connect() except: print "RServe not running... execute Rserve" return quiz = """\n collatz <- function(n, acc=0) { if(n==1) return(acc); collatz(ifelse(n%%2==0, n/2, 3*n +1), acc+1) } quiz<-collatz(""" + str(input) + ")" print "computing collatz(" + str(input) + ")" output = conn.eval(quiz) conn.close() return output
def connect_to_rserve(host, port, wait_time=2, wait_loop=10): logging.info("Connecting to Rserve at %s:%d" % (host, port)) i = 0 conn = None exception = None while i < wait_loop: i += 1 logging.info("Connection attempt %d of %d " % (i, wait_loop)) try: conn = pyRserve.connect(host=host, port=port) break except pyRserve.rexceptions.RConnectionRefused as e: exception = e time.sleep(wait_time) if conn is None: raise exception logging.info("Connection to Rserve successful.") return conn
def predict_logs(molecule_file_path, email_address): print molecule_file_path print email_address logger.debug('TESTING LOGGING FROM CELERY') conn = pyRserve.connect() conn.eval('library(smpredict)') logger.debug(conn.eval('PredictPropertytoCSV("LogS", csv.file="predictions.csv", structures.file="' + molecule_file_path + '", error.variance=TRUE)')) logger.debug(conn.eval('getwd()')) mail = EmailMessage('Your LogS Predictions', """Dear User Thank you for using our service. here are your LogS predictions. Kind regards smpredict team""", 'smpredict', [email_address]) mail.attach_file('' + conn.eval('getwd()') + '/predictions.csv') mail.send()
def consultar_hive(consulta) : conn = pyRserve.connect(host='10.71.1.30', port=6311) conn.atomicArray = True R_Loadder = """library(rJava) library(RJDBC) .jinit() #Sys.setenv(HADOOP_JAR= paste0("", collapse=.Platform$path.sep)) driverclass = "org.apache.hive.jdbc.HiveDriver" classPath = c("/usr/lib/hive/apache-hive-1.2.1-bin/lib/hive-jdbc-1.2.1-standalone.jar", "/etc/hadoop-2.7.1/share/hadoop/common/lib/commons-configuration-1.6.jar", "/etc/hadoop-2.7.1/share/hadoop/common/hadoop-common-2.7.1.jar") dr2 = JDBC(driverclass,classPath, identifier.quote = "`") Sys.setenv(HADOOP_JAR= paste0(classPath, collapse=.Platform$path.sep)) url = paste0("jdbc:hive2://", "10.71.1.30", ":", "10000", "/default",";auth=noSasl") dbConnect(dr2, url) -> conn""" # Abrimos conexión print "...Conectando..." conn.eval(R_Loadder) print "...Consultando..." conn.eval(sql_query('hive_rqt', consulta)) print conn.eval('hive_rqt') respuesta = conn.eval('hive_rqt') return str(respuesta)
def getRConnObject(): """Establish a connection with Rserve and load it with our program""" global conn rServeHost = 'localhost' rServePort = 6311 # Check if a connection alread exists if conn and type(conn) is pyRserve.rconn.RConnector and not conn.isClosed: # Return the existing connection return conn # There's no connection yet: establish one try: conn = pyRserve.connect(host=rServeHost, port=rServePort) except: # This probably means that Rserve is not running return None # Load the function that needs to be there conn.eval(rFuncStr) # Return the connection, which now contains our function return conn
def eval(self, expr, conn=0): """Evaluate an R expression on a particular connection and return a list of (result, type) pairs, where type is the actual R type of the result. In the case that expr contains multiple expressions (e.g. '1+1;2+2'), each expression will be evaluated independently. """ if conn not in self.connections: # Create connection if it doesn't exist self.connections[conn] = pyRserve.connect() elif self.connections[conn].isClosed: # Re-open the connection if it is closed self.connections[conn].connect() results = [] for chunk in RChunker(expr).chunk(): try: result = self.connections[conn].eval(chunk) if isinstance(result, pyRserve.rparser.Closure): # Result is most likely a function, I have not currently # come up with a good way to handle this situation yet. type_ = '__closure__' else: # Set the .Last.value variable since pyRserve or Rserve # doesn't do this for some reason. self.connections[conn].r.__setattr__('.Last.value', result) # Get the type of .Last.value type_ = self.connections[conn].eval('class(.Last.value)') # Re-set .Last.value self.connections[conn].r.__setattr__('.Last.value', result) except pyRserve.rexceptions.REvalError as error: result = str(error) if not result: result = 'Error: unable to parse R code' type_ = '__error__' results.append((result, type_)) return results
def R_discover_sub_clusters_PLAID(gene_p_qs, binarize_cutoff): start_time = time.time() #p_m = R_build_matrix(gene_p_qs) binary_mat = R_build_numpy_matrix_from_gene_p_qs(gene_p_qs, binarize_cutoff) conn = pyRserve.connect() conn.r('require("biclust")') R_args = { 'x': binary_mat, 'method': 'BCPlaid', 'cluster': 'b', # 'fit.model':'y~m+a+b', #'background':False, 'row.release': 0.2, 'col.release': 0.2, #'shuffle':3, #'shuffle':3, #'back.fit':0, #'max.layers':20, #'iter.startup':5, #'iter.layer':10, #'verbose':True, } result = conn.r.biclust(**R_args) result = conn.r.biclust(binary_mat, method="BCPlaid") attr = result.lexeme.attr #pdb.set_trace() disease_names = gene_p_qs.keys( ) #FIXME temporarily comment off for testing p_m clusters = R_parse_cluster_result(attr, disease_names) print 'found ' + str(len(clusters)) + ' clusters\n' #clusters = R_filter_clusters(clusters,gene_p_qs,row_percent,row_cutoff,col_percent,col_cutoff) print("sub_clustering took --- %s seconds ---" % (time.time() - start_time)) return clusters
def filter_queryset(self, request, queryset, view): ''' Override this method to request just the documents required from Rserve. ''' try: filterable = getattr(view, 'filter_fields', []) filters = dict([(k, v) for k, v in request.GET.items() if k in filterable]) mid1 = filters.get('m1', 'rs2476601') dataset = filters.get('dataset', 'EUR').replace('-', '') mid2 = filters.get("m2") window_size = int(filters.get('window_size', 1000000)) dprime = filters.get("dprime", 0.) rsq = filters.get("rsq", 0.8) maf = filters.get("maf", False) if maf: maf = True build_version = filters.get("build", 'GRCh38').lower() pos = filters.get("pos", False) if pos: pos = True query = ElasticQuery(BoolQuery(must_arr=[Query.term("id", mid1)]), sources=['seqid', 'start']) elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=1) doc = elastic.search().docs[0] seqid = getattr(doc, 'seqid') rserve = getattr(settings, 'RSERVE') conn = pyRserve.connect(host=rserve.get('HOST'), port=rserve.get('PORT')) ld_str = conn.r.ld_run(dataset, seqid, mid1, marker2=mid2, window_size=window_size, dprime=dprime, rsq=rsq, maf=maf, position=pos, build_version=build_version) ld_str = ld_str.replace('D.prime', 'dprime').replace('R.squared', 'rsquared') conn.close() return [ElasticObject(initial=json.loads(str(ld_str)))] except (TypeError, ValueError, IndexError, ConnectionError): return [ElasticObject(initial={'ld': None})]
def connect(self): self.conn = R.connect()
def predict_NCI60(molecule_file_path, email_address): ################################################################################################# # 1. Load molecules ################################################################################################# import repo.bioalerts as bioalerts import os import numpy as np import sklearn from sklearn.ensemble import RandomForestRegressor try: print "Reading input file.\n" molecules = bioalerts.LoadMolecules(molecule_file_path, verbose=False) molecules.ReadMolecules() print "Total number of input molecules correctly processed: ", len(molecules.mols) except: print "ERROR: The input molecules could not be processed.\n The extension of the input file might not be supported\n" mail = EmailMessage('NCI60 Sensitivity Predictions', """Dear User, The requested cell line sensitivity predictions on the NCI60 panel could not be calculated. It is likely that (i) the input file was corrupted or (ii) the format of the input molecules not supported. Kind regards Cancer Cell Line Profiler team""", 'CancerCellLineProfiler', [email_address]) mail.send() # Check whether the file is huge.. if (os.path.getsize(molecule_file_path) >> 20) > 1: mail = EmailMessage('NCI60 Sensitivity Predictions', """Dear User, The requested cell line sensitivity predictions on the NCI60 panel could not be calculated because the size of the file was higher than 1Mb (maximum input file size supported). Kind regards Cancer Cell Line Profiler team""", 'CancerCellLineProfiler', [email_address]) mail.send() if len(molecules.mols) == 0: print "ERROR: None of the input molecules was processed successfully\n" mail = EmailMessage('NCI60 Sensitivity Predictions', """Dear User, The requested cell line sensitivity predictions on the NCI60 panel could not be calculated, because the input file was empty or none of the input molecules was processed correctly. Kind regards Cancer Cell Line Profiler team""", 'CancerCellLineProfiler', [email_address]) mail.send() raise ################################################################################################# # 2. Calculate Morgan fps for the input molecules ################################################################################################# print "Calculating Morgan fingerprints for the input molecules\n" mols_info = bioalerts.GetDataSetInfo() #mols_info.extract_substructure_information(radii=[0,1,2],mols=molecules.mols) fps_input_molecules = bioalerts.CalculateFPs(mols=molecules.mols,radii=[0,1,2]) fps_input_molecules.calculate_hashed_fps(nBits=256) #hashed_binary = fps_input_molecules.fps_hashed_binary hashed_counts = fps_input_molecules.fps_hashed_counts mean_fps = np.load("./NCI60/server_model/mean_fps_server_NCI60.npy") std_fps = np.load("NCI60/server_model/std_fps_server_NCI60.npy") hashed_counts = (hashed_counts - mean_fps) / std_fps ################################################################################################# # 3. load cell line descriptors (pathways 1000) ################################################################################################# nb_input_mols = len(molecules.mols) cell_descs = np.genfromtxt('./NCI60/pathway_descriptors_most_var.csv',delimiter=",",skiprows=1) cell_names = np.genfromtxt('./NCI60/pathway_descriptors_most_var_CELL_NAMES.csv',skiprows=0,dtype="|S40") mean_cell_descs = np.mean(cell_descs,axis=0) std_cell_descs = np.std(cell_descs,axis=0) cell_descs = (cell_descs-mean_cell_descs) / std_cell_descs #cell_descs = np.repeat(cell_descs,molecules.mols,axis=0) # tile and repeat the cell line and compound descriptors hashed_counts = np.tile(hashed_counts,(59,1)) input_mols_names = np.tile(molecules.mols_ids,(59,1)) cell_descs = np.repeat(cell_descs,nb_input_mols,axis=0) cell_names = np.repeat(cell_names,nb_input_mols,axis=0) X = np.hstack((hashed_counts,cell_descs)) ################################################################################################# # 4. Load point prediction and error models ################################################################################################# from sklearn.externals import joblib point_prediction_model = joblib.load('./NCI60/server_model/point_prediction_model_NCI60.pkl') error_prediction_model = joblib.load('./NCI60/server_model/error_prediction_model_NCI60.pkl') ################################################################################################# # 5. Predict the activities ################################################################################################# point_predictions = point_prediction_model.predict(X) error_prediction = error_prediction_model.predict(X) ################################################################################################# # 6. Calculate the confidence intervals (70, 80, 90%) ################################################################################################# alphas = np.load("./NCI60/server_model/alphas_NCI60.npy") alpha_70 = alphas[np.round(len(alphas)*0.7,decimals=0)] alpha_80 = alphas[np.round(len(alphas)*0.8,decimals=0)] alpha_90 = alphas[np.round(len(alphas)*0.9,decimals=0)] confi_70 = error_prediction * alpha_70 confi_80 = error_prediction * alpha_80 confi_90 = error_prediction * alpha_90 ################################################################################################# # 7. Write predictions to .csv ################################################################################################# fich = open("./NCI60/predictions_NCI60.csv","w") fich.write("Cell_line\tCompound_ID\tPredicted_pGI50\tCI_70\tCI_80\tCI_90\n" %()) for i in range(0,len(input_mols_names)): fich.write("%s\t%s\t%f\t%f\t%f\t%f\n" %(cell_names[i],input_mols_names[i][0],point_predictions[i],confi_70[i],confi_80[i],confi_90[i])) fich.close() ################################################################################################# # 8. Generate plot with R of the barplot for the NCI60 ################################################################################################# conn = pyRserve.connect() logger.debug(conn.eval('source("barplot_NCI60.R")')) mail = EmailMessage('NCI60 Sensitivity Predictions', """Dear User, Thank you for using our service. Here are the (i) predicted pGI50 values, and (ii) the 70, 80 and 90% confidence intervals calculated with conformal prediction for your input molecules. In addition, you will find a pdf displaying the bioactivity profile of each input molecule across the NCI60 panel. Kind regards Cancer Cell Line Profiler team""", 'CancerCellLineProfiler', [email_address]) mail.attach_file('./NCI60/predictions_NCI60.csv') mail.attach_file('./NCI60/predicted_profiles_NCI60.pdf') mail.send() ################################################################################################# # 9. Remove generated files ################################################################################################# import os, os.path if os.path.exists('./NCI60/predictions_NCI60.csv'): os.remove('./NCI60/predictions_NCI60.csv')
def gwr_plot(request): if request.method == "POST": data = json.loads(request.body) shapefile_filename = data['namespace'] dependent = data['dependent'] independent = data['independent'] # get the filepath to this shapefile shapefile_object = Shapefile.objects.get(name=shapefile_filename) # get the path to shapefile shapefile_file = shapefile_object.get_full_path() + "projected" # based on the dependent and independent variables, prepare the formula prepared_formula = dependent + " ~ " for variable in independent: prepared_formula = prepared_formula + variable + " + " # remove the last + prepared_formula = prepared_formula[:-3] # get a connection to rserve conn = pyRserve.connect() # prepare the r function functionFile = open(settings.BASE_DIR + '/fileupload/plotGWR.r') ##windows path # functionFile = open(settings.BASE_DIR + '\\fileupload\\plotGWR.r') functionContent = functionFile.read() conn.voidEval(functionContent) # set the file path for the output shapefile output_path = os.path.join(settings.BASE_DIR, 'gwroutputs') ##windows path # output_path = settings.BASE_DIR + '\\gwroutputs\\' output_name = shapefile_filename + str(uuid.uuid4()).replace("-", "")[:10] try: # get the statistics of this particular input # this function will also create a shapefile function_output = conn.r.gwr_function(shapefile_file, prepared_formula, output_path, output_name) # print 'variables' # print list(function_output['variables']) # print 'significance' # print function_output['significance'] # print 'variance_inflation_factors' # print function_output['variance_inflation_factors'] variables = list(function_output) except: message = "error running function in r" return HttpResponse(json.dumps({"status":"error", "message":message}), content_type="application/json") finally: conn.close(); # convert to geojson source_filename = os.path.join(output_path, output_name + ".shp") output_filename = os.path.join(output_path, output_name + ".geojson") print subprocess.call("ogr2ogr -f GeoJSON -s_srs EPSG:4326 -t_srs EPSG:4326 " + output_filename + " " + source_filename) # output the geojson with open (os.path.join(output_path, output_name + ".geojson"), "rb") as geojsonfile: outputgeojson = json.loads(geojsonfile.read().replace('\n', '')) geojsonfile.close() # prepare response response = {} response['variables'] = variables response['outputgeojson'] = outputgeojson return HttpResponse(json.dumps(response), content_type="application/json") else: return HttpResponse("coming soon")
def kde_function(request): if request.method == "POST": data = json.loads(request.body) point_filename = data['point'] window_filename = data['window'] bandwidth = float(data['bandwidth']) # get the relevant files point_object = Shapefile.objects.get(name=point_filename) window_object = Shapefile.objects.get(name=window_filename) conn = pyRserve.connect() # read the shapefile window_filename = window_object.get_full_path() + "projected" point_filename = point_object.get_full_path() + "projected" # print window_filename # load the function functionFile = open(settings.BASE_DIR + '/fileupload/kdefunction.r') ##windows path # functionFile = open(settings.BASE_DIR + '\\fileupload\\kdefunction.r') print functionFile functionContent = functionFile.read() conn.voidEval(functionContent) # output_path = settings.BASE_DIR + '/kdeoutputs/' ##windows path output_path = os.path.join(settings.BASE_DIR,'kdeoutputs') output_name = str(uuid.uuid4()).replace("-", "") print output_path print output_name try: # note that KDE function only returns the status # it creates the shapefile of contour lines resultsJson = conn.r.KDE_function(window_filename, point_filename, bandwidth, output_path, output_name) # convert to geojson source_filename = os.path.join(output_path, output_name + ".shp") output_filename = os.path.join(output_path, output_name + ".geojson") print subprocess.call("ogr2ogr -f GeoJSON -s_srs EPSG:4326 -t_srs EPSG:4326 " + output_filename + " " + source_filename) # output the geojson with open (os.path.join(output_path,output_name + ".geojson"), "rb") as geojsonfile: outputgeojson = json.loads(geojsonfile.read().replace('\n', '')) geojsonfile.close() except: message = "error running kde function in r" return HttpResponse(json.dumps({"status":"error", "message":message}), content_type="application/json") finally: conn.close() response = json.dumps(outputgeojson, indent=2) finalresponse = HttpResponse(response, content_type="application/json") finalresponse["Access-Control-Allow-Origin"] = "*" finalresponse["Access-Control-Allow-Methods"] = "POST, GET, OPTIONS" finalresponse["Access-Control-Max-Age"] = "1000" finalresponse["Access-Control-Allow-Headers"] = "*" return finalresponse else: form = KfunctionKDEInitializeForm() # an empty, unbound form return render_to_response( 'kfunction_kde.html', {'form': form}, context_instance=RequestContext(request) )
#print "made connection shuttin down port to reopen" #s.shutdown(socket.SHUT_RDWR) #s.close() #validConnection = True #except Exception as inst: #print str(inst) #i+=1 i = getOpenPort() print "Starting R with port %s" % str(i) startRserve(i) import time con = None while con==None: try: con = pyRserve.connect(host = 'localhost', port=i) except: print "R connection not active" time.sleep(1) #print 'done importing conversion' import redRLog #print 'Rsession loaded' # import redRi18n mutex = QMutex() def assign(name, object): try: rpy.r.assign(name, object) redRLog.log(redRLog.R, redRLog.DEBUG, _('Assigned object to %s') % name)
def fitNegBinom_Rserve(countsByDistance, plot_distribution=False, per_chr=False): """ Fits a negative binomial distribution to the counts found at each different distance. The fitting is attempted first using a python method, and if this fails R is used through Rserve. For the fitting, the outliers are removed. Outliers are defined as those having a z-score higher than 3.4. This number as defined after exploring different values of z-scores and estimating the best goodness of fit. The HiC data is expected to contain outliers but they are problematic to fit and test the goodness of fit of a distribution, that's why there are removed. """ # if the counts are per chromosome, # use the function recursively if per_chr: size = {} prob = {} for chrom in countsByDistance.keys(): sys.stderr.write('computing negative binomial for ' '{}\n'.format(chrom)) size[chrom], prob[chrom] = \ fitNegBinom_Rserve(countsByDistance[chrom], plot_distribution=plot_distribution) return size, prob import pyRserve import matplotlib.pyplot as plt try: conn = pyRserve.connect() conn.r('library("MASS")') except: print "Could not connect to Rserve. Check that Rserve is up and running" exit(1) size = {} mu = {} prob = {} pval = {} good = 0 bad = 0 for dist in np.sort(countsByDistance.keys()): if dist == -1: # skip intra chromosomal counts continue size[dist] = np.nan mu[dist] = np.nan prob[dist] = np.nan if sum(countsByDistance[dist]) == 0.0: print "no counts for bins at distance {}".format(dist) continue if np.any(np.isnan(countsByDistance[dist])) is True: exit("ERROR: matrix contains NaN values\n") counts = remove_outliers(countsByDistance[dist]) if len(counts) <= 20: continue # the values in countsByDistance of a corrected matrix # are float values, but integers are needed for # the negative binomial fitting in R. counts_int = np.round(counts).astype('int') # try first using the python fit for the # negative binomial try: size[dist], prob[dist] = fit_nbinom(counts) except ValueError: # try with R.. try: res = conn.r.fitdistr(counts_int, 'negative binomial') except: continue size[dist] = res[0]['size'] mu[dist] = res[0]['mu'] if np.isnan(size[dist]) or np.isnan(mu[dist]): sys.stderr.write("for dist={}, size={}, mu={}, " "len={}\n".format(dist, size[dist], mu[dist], len(counts))) continue # The output from 'fitdistr' are size and mu. # but the scipy function that is based on the negative binomial # needs size and probability as parameters. However, # prob = size / ( size + mu ) prob[dist] = size[dist] / (size[dist] + mu[dist]) sys.stderr.write(".") # print a . to show progress # evaluate fit of the counts distribution with respect to # the negative binomial distribution using the parameters # returned by R fitted_dist = scipy.stats.nbinom.rvs(size[dist], prob[dist], size=len(counts) * 2) pval[dist] = scipy.stats.ks_2samp(counts_int, fitted_dist)[1] # pval[dist] = scipy.stats.wilcoxon(counts, fitted_dist)[1] if pval[dist] < 0.01: bad += 1 sys.stderr.write( "\nThe fit p-value {} for {} is too low to consider " "the distribution negative binomial".format(pval[dist], dist)) else: good += 1 if (plot_distribution and dist in [50000] + range(0, max(countsByDistance.keys()), 1000000)): # actual and fitted distributions are plotted # next to each other diff = counts.max() - counts.min() if diff >= 1000: nbins = 50 elif 1000 > diff >= 100: nbins = 30 elif 100 > diff >= 50: nbins = diff / 2 else: nbins = (counts.max() - counts.min()) freq, bins = np.histogram(counts.astype(int), nbins, normed=True) plt.hist(counts, bins, linewidth=0.1, alpha=0.8, normed=True) # plt.hist(fitted_dist, bins, histtype='step', linestyle='solid', # linewidth=1.5, color='black', normed=True) pdf_fitted = scipy.stats.nbinom.pmf(bins.astype('int'), size[dist], prob[dist]) plt.plot(bins.astype(int), pdf_fitted, label='fitted nbinom gf={:.3f}'.format(pval[dist])) fig_name = '/tmp/fitt_{}_{}.png'.format('nbinom', dist) plt.title('{} bp; size: {}, prob: {}'.format( dist, size[dist], prob[dist])) plt.ylim(0, np.max(freq) + np.max(freq) * 0.2) plt.legend() plt.savefig(fig_name, dpi=200) plt.close() sys.stderr.write("check {}".format(fig_name)) sys.stderr.write("good {}, bad {}\n".format(good, bad)) return size, prob
import pyRserve rcmd = pyRserve.connect(host='localhost', port=6311) print(rcmd('rnorm(20, mean=2, sd=0.1)')) rcmd('b <- c(1,3,5,7,9)') print(rcmd('b')) rcmd('a <- c("COL1","COL1","COL1","COL2","COL2","COL2","COL3","COL3")') rcmd('b <- c("Item1","Item1","Item2","Item2","Item3","Item3","Item3","Item3")') rcmd('results <- table(a,b)') print(rcmd('results')) rcmd('x <- seq(-20,20,by=.5)') rcmd('y <- dt(x,df=10)') rcmd('plot(x,y)') This is the output (the graph is plotted in R window and not shown here) [ 2.02055894 2.05137019 1.97653928 1.99654565 2.08948691 2.07250623 1.95475797 2.11145948 1.97653835 2.01341228 2.05299939 2.14354837 2.06876532 1.94614396 1.9924665 2.08839507 1.87483786 2.08817775 1.97000129 2.26570712] [ 1. 3. 5. 7. 9.] [[2 0 0] [1 1 0] [0 2 2]]
# -*- coding: utf-8 -*- # 启动RServe http://blog.fens.me/r-rserve-server/ import pyRserve conn = pyRserve.connect() conn.eval('''source('test.R')''') conn.r.testLoadRecord('600016')
def checkJobResult(client_ip, volunteer_session, jobId, RData_output, RData_input): mid = volunteerAuth.volunteer_sessions[volunteer_session]["mid"] machines = None #####Validate if this machine was assigned for the job try: cur.execute("""SELECT mid,status FROM machine_job WHERE jobId=%s""" % jobId) machines = cur.fetchall() except: return "Error getting machines for the job " + str(jobId) if machines == None: return "There is no volunteers assigned to job " + str(jobId) legitMachine = False for entry in machines: if entry[0] == mid: legitMachine = True break if not legitMachine: return "The machine" + mid + " was not assigned to job with id:" + str( jobId) ####Get the number of machines assigned for the job quorum_machines = len(machines) ###Get validated jobs for this job ID machines_for_job[jobId][mid] = { "mid": mid, "status": "Computing", "vars": None, "filename": None } ###Collect the jobs already checked checked_jobs_machines = [] for machineID in machines_for_job[jobId]: machine = machines_for_job[jobId][machineID] if machine["status"] == "Error" or machine[ "status"] == "Wrong" or machine["status"] == "Success": checked_jobs_machines.append(machine) #Verify RData_output #If there is no RData_Output then there was an execution error if RData_output == None: try: query = "UPDATE machine_job SET status = 'Error' WHERE mid = " + str( volunteerAuth.volunteer_sessions[volunteer_session] ["mid"]) + " AND jobId =" + str(jobId) cur.execute(query) con.commit() except: return "Error executing query: " + query con.rollback() if quorum_machines == 1: try: query = "UPDATE job SET Status = 'Error' WHERE jobId = " + str( jobId) cur.execute(query) con.commit() except: print "Could not execute query: " + query con.rollback() machines_for_job[jobId][mid]["status"] = "Error" checked_jobs_machines.append(machines_for_job[jobId][mid]) if quorum_machines == len( checked_jobs_machines) and quorum_machines > 1: ##majority report majorityReport(checked_jobs_machines) ###Release the machine to receive more jobs volunteerAuth.volunteer_sessions[volunteer_session]["State"] = "FREE" return ## IF there is an output RDATA then start an Rserve session to validate the result #connect to R try: conn = pyRserve.connect() except: print "RServe not running... execute Rserve" return path = conn.eval('getwd()') filename = str(path) + "/" + str(jobId) + "_" + str(mid) + "_output.RData" handle = open(filename, "wb") handle.write(RData_output.data) handle.close() ##Clean all variables from R environment conn.eval("rm(list=ls())") ###Run the file with the R code conn.voidEval('load("' + filename + '")') ### Extract value from the quiz variable quiz = conn.eval("quiz") #Job validation #criteria1 - validate the quiz variable machines_for_job[jobId][mid]["filename"] = filename ### Get the expected output for the quiz try: query = """SELECT output FROM market_quiz INNER JOIN job_quiz ON market_quiz.input=job_quiz.input WHERE jobId=""" + str( jobId) cur.execute(query) quiz_output = cur.fetchone()[0] except: return "Error executing query: " + query ### compare the quiz with the expected quiz result if quiz == quiz_output: criteria1 = True else: criteria1 = False #criteria2 - validate if the variables updated/created are available on output.RData #This method can generate false positives because the test pass if no new variables were created in this job computation #However, this method does not generate false negatives because if the expected variables are not in the RData file so the job #execution is corrupted or failed variables = conn.eval( "ls()[!sapply(ls(), function(x) is.function(get(x)))]") if (set(jobBuffer[jobId]["vars"]).issubset(set(variables))): criteria2 = True else: criteria2 = False #del jobBuffer[jobId] vars = dict() for var in jobBuffer[jobId]["vars"]: vars[var] = conn.eval(var) machines_for_job[jobId][mid]["vars"] = vars ###If both criterias are granted then the job was computed successfully if criteria1 and criteria2: print "The computed job with the id " + str( jobId) + " was successfully validated!" try: query = """SELECT InitTime FROM job WHERE jobId= """ + str(jobId) cur.execute(query) initTime = cur.fetchone()[0] except: print "error with query: " + query try: execTime = time.time() - initTime query = "UPDATE job SET ExecTime = " + str( execTime) + " WHERE jobId = " + str(jobId) cur.execute(query) con.commit() except: print "Could not execute query: " + query con.rollback() try: query = "UPDATE machine_job SET status = 'Success' WHERE mid = " + str( mid) + " AND jobId =" + str(jobId) cur.execute(query) con.commit() except: print "Could not execute query: " + query con.rollback() if quorum_machines == 1: try: update_credibility(cur, con, mid, "Success") query = "UPDATE job SET Status = 'Success' WHERE jobId = " + str( jobId) cur.execute(query) con.commit() except: print "Could not execute query: " + query con.rollback() machines_for_job[jobId][mid]["status"] = "Success" checked_jobs_machines.append(machines_for_job[jobId][mid]) if quorum_machines == len( checked_jobs_machines) and quorum_machines > 1: ##majority report checked_jobs_machines = majorityReport(checked_jobs_machines) else: #the volunteer computed a wrong result # scj is now set to 1 # credibility is updated with the new scj try: query = "UPDATE machine_job SET status = 'Wrong' WHERE mid = " + str( mid) + " AND jobId =" + str(jobId) cur.execute(query) con.commit() except: print "Could not execute query: " + query con.rollback() if quorum_machines == 1: try: update_credibility(cur, con, mid, "Wrong") query = "UPDATE job SET Status = 'Wrong' WHERE jobId = " + str( jobId) cur.execute(query) con.commit() except: print "Could not execute query: " + query con.rollback() machines_for_job[jobId][mid]["status"] = "Wrong" checked_jobs_machines.append(machines_for_job[jobId][mid]) if quorum_machines == len( checked_jobs_machines) and quorum_machines > 1: ##majority report checked_jobs_machines = majorityReport(checked_jobs_machines) conn.close() if conn.isClosed: print "Rserve connection is closed" if quorum_machines == 1: try: query = "UPDATE job SET RDataPath = '" + str( machine["filename"]) + "' WHERE jobId = " + str(jobId) cur.execute(query) con.commit() except: print "Could not execute query: " + query con.rollback() #update volunteer state to FREE volunteerAuth.volunteer_sessions[volunteer_session]["State"] = "FREE"
def open_rserv(): global conn conn = pyRserve.connect()
def fitDistribution(countsByDistance, distribution, plot_distribution=False): """ Generic method to fit continuous distributions to the HiC countsByDistance The distribution names are the ones supported by scipy. """ mu = {} sigma = {} pval = {} good = 0 bad = 0 good_nb = 0 bad_nb = 0 import pyRserve try: conn = pyRserve.connect() conn.r('library("MASS")') except: print "Could not connect to Rserve. Check that Rserve is up and running" exit(1) import sys for distnc in np.sort(countsByDistance.keys()): if distnc == -1: # skip intra chromosomal counts continue if sum(countsByDistance[distnc]) == 0.0: print "no counts for bins at distance {}".format(distnc) continue if len(countsByDistance[distnc]) <= 2: continue sys.stderr.write('.') # TEMP code to compare with negative binomial ### # the values in countsByDistance of a corrected matrix # are float values, but integers are needed for # the negative binomial. counts_nb = remove_outliers( np.round(countsByDistance[distnc]).astype('int')) # try first using the python fit for the # negative binomial try: size, prob = fit_nbinom(remove_outliers(countsByDistance[distnc])) except ValueError: # try with R.. res = conn.r.fitdistr(counts_nb, 'negative binomial') size = res[0]['size'] mu_ = res[0]['mu'] if np.isnan(size) or np.isnan(mu_): print "for dist={}, size={}, mu={}, len={}".format( distnc, size, mu_, len(counts_nb)) continue prob = size / (size + mu_) nbin = scipy.stats.nbinom(size, prob) ##### counts = remove_outliers(countsByDistance[distnc]) counts[counts == 0] = 0.01 dist = getattr(scipy.stats, distribution) param = dist.fit(counts, floc=0) if np.any(np.isnan(param)): sys.stderr.write('\n{} no params computed'.format(distnc)) import ipdb ipdb.set_trace() mu[distnc] = param[-1] sigma[distnc] = param[0] # estimate the goodness of fit pvalue fitted_dist = dist.rvs(*param[:-2], loc=param[-2], scale=param[-1], size=len(counts) * 2) pval[distnc] = scipy.stats.ks_2samp(counts, fitted_dist)[1] fitted_dist_nb = scipy.stats.nbinom.rvs(size, prob, size=len(counts_nb) * 2) pval_nb = scipy.stats.ks_2samp(counts_nb, fitted_dist_nb)[1] if pval[distnc] < 0.01: bad += 1 else: good += 1 if pval_nb < 0.01: bad_nb += 1 else: good_nb += 1 if pval[distnc] < 0.01: sys.stderr.write("\nproblem with {}, p-value for " "{} fit: {} (NB fit: {})".format( distnc, distribution, pval[distnc], pval_nb)) if (plot_distribution and distnc in range( 50000, max(countsByDistance.keys()), 500000)): import matplotlib.pyplot as plt freq, bins = np.histogram(counts, 30, normed=True) plt.close() # to avoid overlaps plt.hist(counts, bins, linewidth=0.1, alpha=0.8, normed=True) # plt.hist(fitted_dist, bins, histtype='step', linestyle='solid', # linewidth=1.5, color='black', normed=True) # plt.hist(fitted_dist_nb, bins, histtype='step', linestyle='solid', # linewidth=1.5, color='grey', normed=True) pdf_fitted = dist.pdf(bins, *param[:-2], loc=param[-2], scale=param[-1]) ## plt.plot(bins.astype(int), nbin.pmf(bins.astype('int')), label='NB {:.2f}'.format(pval_nb)) ## plt.plot(bins, pdf_fitted, label='{} {:.2f}'.format(distribution, pval[distnc])) fig_name = '/tmp/fitt_{}_{}.png'.format(distribution, distnc) plt.title('{} bp'.format(distnc)) plt.ylim(0, np.max(freq) + np.max(freq) * 0.2) plt.legend() plt.savefig(fig_name, dpi=200) plt.close() print "check {}".format(fig_name) print "good {}, bad {}, good_nb {}, bad_nb {}".format( good, bad, good_nb, bad_nb) return (mu, sigma)
def h_measure(true_class, probability): conn = pyRserve.connect() conn.voidEval(''' relabel <- function(labels){ if (length(levels(as.factor(labels)))==1){ stop('Only one class is present in the dataset. Need both classes to be represented.') } if (length(levels(as.factor(labels)))>2){ stop('More than two classes present, but code can only handle binary classification.') } labels <- as.factor(as.character(labels)) input.labels <- levels(labels) cond.temp <- ( identical(input.labels,c('case','non-case')) | identical(input.labels,c('Case','Non-case')) | identical(input.labels,c('case','noncase')) | identical(input.labels,c('Case','Non-case')) ) if (cond.temp) { levels(labels) <- c('1', '0') message('Class labels have been switched from (', paste(input.labels[1],input.labels[2], sep=','), ') to (', paste('1', '0', sep=','), ')') labels <- as.factor(labels) labels <- 2-as.numeric(labels) # turn into numeric array of 0s and 1s } else { levels(labels) <- c('0', '1') if (!(identical(input.labels,c('0', '1')))){ message('Class labels have been switched from (', paste(input.labels[1],input.labels[2], sep=','), ') to (', paste('0', '1', sep=','), ')') } labels <- as.factor(labels) labels <- as.numeric(labels)-1 # turn into numeric array of 0s and 1s } return(labels) } misclassCounts <- function(predicted.class,true.class){ true.class <- as.array(true.class) predicted.class <- as.array(predicted.class) # make sure the same convention is employed for both true and predicted # check <- relabel(c(true.class,predicted.class)) # l <- length(check) # true.class <- check[1:(l/2)] # predicted.class <- check[(l/2+1):l] TP <- sum(predicted.class == 1 & true.class == 1) FP <- sum(predicted.class == 1 & true.class == 0) TN <- sum(predicted.class == 0 & true.class == 0) FN <- sum(predicted.class == 0 & true.class == 1) conf.matrix <- data.frame(pred.1=c(TP,FP),pred.0=c(FN,TN)) row.names(conf.matrix) <- c('actual.1','actual.0') ER <- (FP + FN)/(TP+FP+TN+FN) Sens <- TP/(TP+FN) Spec <- TN/(TN+FP) Precision <- TP/(TP+FP) Recall <- Sens TPR <- Recall FPR <- 1-Spec F <- 2/(1/Precision+1/Sens) Youden <- Sens + Spec -1 metrics <- data.frame(ER=ER, Sens=Sens,Spec=Spec,Precision=Precision, Recall=Recall, TPR=TPR, FPR=FPR, F=F, Youden=Youden) return(list(conf.matrix=conf.matrix,metrics=metrics)) } HMeasure <- function(true.class, scores, severity.ratio=NA, threshold=0.5, level=0.95 ){ #################### ### INPUT CHECKS ### #################### # try to catch mistaken order of arguments if (is.matrix(true.class) || is.data.frame(true.class)){ stop( 'True class should be a vector, not a matrix / data frame. Consider the order of the arguments.' ) } # no missing values in the labels allowed if (any(is.na(true.class))){ stop('Missing values in class labels are not allowed.')} # relabel, and make sure there are only 2 class labels true.class <- relabel(true.class) # row names can confuse and are otherwise useless - remove them rownames(scores) <- NULL rownames(true.class) <- NULL # turn scores into a data frame (if it were not one already) if (is.vector(scores)){ scores <- as.data.frame(scores) # message('Scores coerced from vector to data frame') } if (is.matrix(scores)){ n <- dim(scores)[1] k <- dim(scores)[2] # in the case of a matrix, throw a warning if columns (classifiers) > rows (data) if (n < k) { warning(gettextf( 'Consider transposing score matrix: number of classifiers (columns) = %d exceeds number %d of datapoints (rows)', k, n), domain = NA) } scores <- as.data.frame(scores) # message('Scores coerced from matrix to data frame') } if (dim(scores)[1]!=length(true.class)){ stop('Label vector provided has different length than respective classifier scores') } # only look at complete cases in the score data frame if (any(is.na(scores))){ warning( 'Missing entries detected in matrix of scores. Respective entries will be disregarded' ) } complete.rows <- complete.cases(scores) scores <- subset(scores,subset=complete.rows) true.class <- subset(true.class,subset=complete.rows) rownames(scores) <- NULL rownames(true.class) <- NULL # now that format is correct, get sample size and number of classifiers n <- dim(scores)[1] k <- dim(scores)[2] # THRESHOLD - if only one value for the threshold has been provided # (e.g., the default of 0.5), use the same for all classifiers # else check that the array of thresholds has one per classifier if (length(threshold) == 1){ threshold <- rep(threshold,k) } else { if (length(threshold)<k){ warning( 'Threshold must either be a single value, or a vector of length equal to the number of classifiers employed. The default value of 0.5 will be used.') } } ############################ ### INPUT CHECK COMPLETE ### ############################ ############################################# ### SINGLE CLASSIFIER FUNCTION DEFINITION ### ############################################# # to keep the code tidy, we implement an internal function for a single classifier HMeasure.single <- function(y, s, classifier.name=NULL, severity.ratio=severity.ratio, threshold=threshold, level=level ){ # PROCESSING n <- length(s) # this is a numeric version of the class labels n1 <- sum(y) n0 <- n-n1 pi0 <- n0/n pi1 <- n1/n # retrieve severity ratio - set to default if absent if (is.na(severity.ratio)){ severity.ratio <- pi1/pi0 } # order data into increasing scores zord <- order(s) sc <- s[zord] # note: we make no assumptions about the range of s # COMPUTE ROC CURVE # Calculate raw ROC, replacing any tied sequences by a diagonal # Raw ROC starts at F0[1]=0, F1[1]=0, and ends at F0[K1]=1, F1[K1]=1. Get.Score.Distributions <- function(y,s,n1,n0){ # tapply(y,s,sum) counts the instances of each unique score, and ranks them by score s1 <- unname(tapply(y, s, sum))/n1 s1 <- c(0,s1,1-sum(s1)) # make sure to add the points 0,0 and 1,1 s0 <- unname(tapply(1-y, s, sum))/n0 s0 <- c(0,s0,1-sum(s0)) # make sure to add the points 0,0 and 1,1 # number of unique scores S <- length(s1) # what were r0i and r1i in ML paper are now the empirical cdfs F1 <- cumsum(s1) F0 <- cumsum(s0) return(list(F1=F1,F0=F0,s1=s1,s0=s0,S=S)) } out.scores <- Get.Score.Distributions(y=y,s=s,n1=n1,n0=n0) AUC <- 1- sum(out.scores$s0 * (out.scores$F1 - 0.5 * out.scores$s1)) # if the AUC < .5, switch signs and repeat switched <- FALSE the.criterion <- AUC < 0.5 if (the.criterion){ switched <- TRUE s <- 1-s out.scores <- Get.Score.Distributions(y,s,n1,n0) if (is.null(classifier.name)){ warning('ROC curve mostly lying under the diagonal. Switching scores.', domain = NA) } else { warning(gettextf( 'ROC curve of %s mostly lying under the diagonal. Switching scores.', classifier.name), domain = NA) } } F1 <- out.scores$F1 F0 <- out.scores$F0 s0 <- out.scores$s0 s1 <- out.scores$s1 S <- out.scores$S # get misclassification statistics misclass.out <- misclassCounts(as.numeric(s>threshold),true.class) misclass.metrics <- misclass.out$metrics temp <- misclass.out$conf.matrix misclass.conf <- data.frame( TP=temp[1,1], FP=temp[2,1], TN=temp[2,2], FN=temp[1,2]) # get aggregate statistics: AUC <- 1- sum(s0 * (F1 - 0.5 * s1)) # REPLACING TIED SCORES BY A DIAGONAL Gini <- 2*AUC - 1 KS <- max(abs(F0 - F1)) cost.parameter <- severity.ratio/(1+severity.ratio) MER <- min(pi0*(1-F0)+pi1*F1) MWL <- 2*min(cost.parameter*pi0*(1-F0)+(1-cost.parameter)*pi1*F1) Look.Up.AUC <- function(xcurve,ycurve,x=0){ # assumes the curve is monotonic result <- NA if (all(diff(xcurve) >= 0)){ ind <- which(xcurve-x>0)[1] x1 <- xcurve[ind-1] x2 <- xcurve[ind] y1 <- ycurve[ind-1] y2 <- ycurve[ind] if (x2-x1 > 0) { pos <- (x2-x)/(x2-x1) result <- (1-pos)*y1 + pos*y2 } else {result <- y2} } return(result) } SensFixed <- matrix(NA,1,length(level)) SpecFixed <- matrix(NA,1,length(level)) temp <- array(NA,length(level)) for (l in 1:length(level)){ SensFixed[l] <- c(Look.Up.AUC(F0,1-F1,x=level[l])) temp[l] <- paste('Sens.Spec',floor(level[l]*100),sep='') } SensFixed <- as.data.frame(SensFixed) colnames(SensFixed) <- temp for (l in 1:length(level)){ SpecFixed[l] <- Look.Up.AUC(F1,F0,x=1-level[l]) temp[l] <- paste('Spec.Sens',floor(level[l]*100),sep='') } SpecFixed <- as.data.frame(SpecFixed) colnames(SpecFixed) <- temp # restrict to upper convex hull by considering ROC above diagonal only chull.points <- chull(1-F0,pmax(1-F1,1-F0)) G0 <- 1-F0[chull.points] G1 <- 1-F1[chull.points] hc <- length(chull.points) sG0 <- c(0,G0[c(2:length(G0))] - G0[c(1:(length(G0)-1))]) sG1 <- c(0,G1[c(2:length(G1))] - G1[c(1:(length(G1)-1))]) AUCH <- sum(sG0 * (G1 - 0.5 * sG1)) # get sorted scoring densities s.class0 <- sort(s[y==0]) s.class1 <- sort(s[y==1]) # Calculate the LHshape1 value cost <- c(1:(hc+1)) b0 <- c(1:hc+1) b1 <- c(1:hc+1) # extract shape if (severity.ratio > 0){ shape1 <- 2 shape2 <- 1+(shape1-1)*1/severity.ratio } if (severity.ratio < 0){ shape1 <- pi1+1 shape2 <- pi0+1 } cost[1] <- 0 cost[hc+1] <- 1 b00 <- beta(shape1,shape2) b10 <- beta(1+shape1,shape2) b01 <- beta(shape1,1+shape2) b0[1] <- pbeta(cost[1], shape1=(1+shape1), shape2=shape2)*b10/b00 b1[1] <- pbeta(cost[1], shape1=shape1, shape2=(1+shape2))*b01/b00 b0[hc+1] <- pbeta(cost[hc+1], shape1=(1+shape1), shape2=shape2)*b10/b00 b1[hc+1] <- pbeta(cost[hc+1], shape1=shape1, shape2=(1+shape2))*b01/b00 ### NB: can become massively faster for (i in 2:hc){ cost[i] <- pi1*(G1[i]-G1[i-1]) / (pi0*(G0[i]-G0[i-1]) + pi1*(G1[i]-G1[i-1])) b0[i] <- pbeta(cost[i], shape1=(1+shape1), shape2=shape2)*b10/b00 b1[i] <- pbeta(cost[i], shape1=shape1, shape2=(1+shape2))*b01/b00 } LHshape1 <- 0 for (i in 1:hc){ LHshape1 <- LHshape1 + pi0*(1-G0[i])*(b0[(i+1)]-b0[i]) + pi1*G1[i]*(b1[(i+1)]-b1[i]) } B0 <- pbeta(pi1, shape1=(1+shape1), shape2=shape2)*b10/b00 B1 <- pbeta(1, shape1=shape1, shape2=(1+shape2))*b01/b00 - pbeta(pi1, shape1=shape1, shape2=(1+shape2))*b01/b00 H <- 1 - LHshape1/(pi0*B0 + pi1*B1) data <- list(F0=F0, F1=F1, G0=G0, G1=G1, cost=cost, pi1=pi1, pi0=pi0, n0=n0, n1=n1, n=n, hc=hc, s.class0=s.class0, s.class1=s.class1, severity.ratio=severity.ratio) metrics <- data.frame(H=H, Gini=Gini, AUC=AUC, AUCH=AUCH, KS=KS, MER=MER, MWL=MWL) metrics <- cbind(metrics,SpecFixed,SensFixed) metrics <- cbind(metrics,misclass.metrics,misclass.conf) return(list(data=data,metrics=metrics)) } ############################################# ### SINGLE CLASSIFIER DEFINITION complete ### ############################################# ###################################### ### PROCESS CLASSIFIERS ONE BY ONE ### ###################################### data <- list() for (count in 1:k){ name.now <- colnames(scores)[count] s <- scores[,count] threshold.now <- threshold[count] output <- HMeasure.single(y=true.class, s=s, classifier.name=name.now, severity.ratio=severity.ratio, threshold=threshold.now,level=level) if (count == 1){ metrics <- output$metrics } if (count > 1){metrics <- rbind(metrics,output$metrics)} # retrieve data for plotting purposes data[[count]] <- output$data } # name the rows by classifier rownames(metrics) <- colnames(scores) # name the data output by classifier names(data) <- colnames(data) # construct output hmeasure <- list(metrics=metrics) attr(hmeasure,'data') <- data class(hmeasure) <- 'hmeasure' # return(hmeasure) new.object <- unclass(hmeasure) return(as.data.frame(new.object$metrics)) } ''') trueclass_string = str(tuple(true_class)) scores_string = str(tuple(probability)) trueclass = conn.eval('c' + trueclass_string) scores = conn.eval('c' + scores_string) results = conn.r.HMeasure(trueclass, scores) conn.close() return (results[0])
def performModel(input_files, tool_config, client, subtool_name=False): ''' input_files is the set of data to analyze from the NMTK server tool_config is the "header" part of the input client is an object of type NMTK_apps.helpers.server_api.NMTKClient subtool_name is provided if the tool manages multiple configurations ''' logger=performModel.get_logger() logger.debug("input_files: %s"%(input_files,)) logger.debug("tool_config\n%s\n"%(tool_config,)) # Use exception handling to generate "error" resulta -- everything that # doesn't generate good results should throw an exception Use the extra # 'with' syntax to ensure temporary files are promptly cleaned up after tool # execution. With luck, the tool server will also do periodic garbage # collection on tools that don't pick up after themselves. # AccessR - Dispatch to subtools with Config.Job(input_files,tool_config) as job: try: job.setup() job.logger = logger # in case we need it... job.tempfiles = [] job.R = pyRserve.connect() if subtool_name in doSubTool: results = doSubTool[subtool_name](job,client) if results: client.updateResults(result_field=results.get("field",None), units=results.get("units",None), result_file=results.get("result_file",None), files=results.get("files",None) ) else: raise Exception("No results returned from subtool '%s'"%(subtool_name,)) else: raise Exception("SubTool not found: "+subtool_name) except Exception as e: # Every failure should result in an Exception # use job.fail to add additional failure messages before # raising the Exception, as illustrated here msg = 'Job failed.' logger.exception(msg) logger.exception(str(e)) job.fail(msg) job.fail(str(e)) client.updateResults(payload={'errors': job.failures }, failure=True, files={} ) finally: if hasattr(job,"tempfiles"): if not hasattr(job,"R"): # low likelihood... job.R = pyRserve.connect() for file in job.tempfiles: job.R.r.unlink(file) if hasattr(job,"R")and job.R: job.R.close()
def __init__(self): self.connection = pyRserve.connect() self.connection self.checkConnection(True)
def connect(): return R.connect()
import sys import numpy import pyRserve from flask import request, render_template, send_file from PIL import Image from . import main from . import model THREE_MONTH_AGO = '2017-02-01' SIX_MONTH_AGO = '2016-11-01' ONE_YEAR_AGO = '2016-05-01' TWO_YEAR_AGO = '2015-05-01' rConn = pyRserve.connect(host='localhost', port=6311) model = model.Lottery() @main.route('/histogram/') def serve_diagram(): duration = request.args.get('duration') d = None if duration == '3m': d = THREE_MONTH_AGO elif duration == '6m': d = SIX_MONTH_AGO elif duration == '1y': d = ONE_YEAR_AGO
def performModel(input_files, tool_config, client, subtool_name=False): """ input_files is the set of data to analyze from the NMTK server tool_config is the "header" part of the input client is an object of type NMTK_apps.helpers.server_api.NMTKClient subtool_name is provided if the tool manages multiple configurations """ logger = performModel.get_logger() logger.debug("input_files: %s" % (input_files,)) logger.debug("tool_config\n%s\n" % (tool_config,)) # Use exception handling to generate "error" resulta -- everything that # doesn't generate good results should throw an exception Use the extra # 'with' syntax to ensure temporary files are promptly cleaned up after tool # execution. With luck, the tool server will also do periodic garbage # collection on tools that don't pick up after themselves. # Prepare a connection to R R = None with Config.Job(input_files, tool_config) as job: try: # Initialize the job setup (cant do in __init__ as we would need to # try too hard) job.setup() # Set up a master directory of parameters parameters = {} compute = parameters["compute"] = {} raster = parameters["raster"] = {} image = parameters["image"] = {} ######################################## # Computation (Python/R) compute_factors = job.getParameters("computation_params") logger.debug(compute_factors) # Determine specific computational engines to use, if any computetype = compute["type"] = compute_factors.get("computetype", "None") # Notify the user via a status update if computetype != "None": compute_R = compute["with_R"] = computetype in ["R", "Both"] compute_Python = compute["with_Python"] = computetype in ["Python", "Both"] if compute_R or compute_Python: computemsg = "Computation will occur using" if compute_R: computemsg += " R" if compute_Python: computemsg += " and" if compute_Python: computemsg += " Python" else: computemsg = "Computation will not occur" # Determine parameter; default is to square it same as /tool_config compute["power"] = compute_factors.get("raisetopower", 2) # Determine input (file/constant data) / we'll iterate later compute_file = job.getFeatures("computation") # Determine what to return (result file) compute_output = job.getParameters("computation_output") compute["PythonName"] = compute_output.get("python_result", "PowerOfPython") compute["RName"] = compute_output.get("r_result", "PowerOfR") else: computemsg = "Computation was not requested." client.updateStatus(computemsg) ######################################## # Rasterization (desired, input file provided, default to use instead) raster_factors = job.getParameters("rasterization_params") # Set up the default files default_vector_name = os.path.join(settings.STATIC_ROOT, "Configurator/Vector_Test.geojson") default_raster_file = os.path.join(settings.STATIC_ROOT, "Configurator/Raster_Test.tif") # Check if rasterization was requested raster["do"] = raster_factors.get("dorasterize", 0) # Get the filename to rasterize, substituting in a default if no file is # provided. We won't load the file data since we're just going to hand # the file path to R for processing. try: logger.debug("File: %s" % (job.datafile("rasterize"))) raster["vectorfile"] = job.datafile("rasterize") # the file name except Exception as e: # No file provided, so we'll pull out the default logger.debug(str(e)) logger.debug("Using default vector file for rasterizing") raster["vectorfile"] = default_vector_name # Pull the rastervalue from the job configuration. We don't care if it's # a literal numeric value or a property name. The R function to # rasterize the file will use the value as a constant if provided, or # will use a string as the name of a feature attribute to provide the # raster value for that feature. raster["value"] = 1 raster_value_set = job.getParameters("rasterize") if "rastervalue" in raster_value_set: raster["value"] = raster_value_set.get("rastervalue", 1) else: raster["value"] = 1 raster["x_dim"] = raster_value_set.get("raster_x", 300) raster["y_dim"] = raster_value_set.get("raster_y", 300) # raster["proportional"] = raster_value_set.get('proportional',0) # raster["smoothing"] = raster_value_set.get('smoothing',0) # Set output format (Rdata-RDS, Erdas IMAGINE, geoTIFF) raster_output = job.getParameters("rasterization_output") raster["returnvector"] = raster_output.get("return_vector", 0) raster["format"] = raster_output.get("return_raster", "geoTIFF") rasterformat = RasterFormatTable.get(raster["format"], {}) if not rasterformat: # Did not request return of raster raster["returnraster"] = 0 msg = "Invalid raster format %s" % (raster["format"],) client.UpdateStatus(msg) raster["rasterfile"] = "" raster["mimetype"] = "" raster["displayname"] = "" raster["savefunc"] = "" raster["loadfunc"] = "" else: raster["returnraster"] = 1 rasterbasename = raster_output.get("raster_basename", "raster") if not raster["do"]: raster["format"] = "geoTIFF" rasterformat = RasterFormatTable.get(raster["format"], {}) raster["rasterfile"] = os.tempnam() + rasterformat["extension"] raster["mimetype"] = rasterformat["mimetype"] raster["displayname"] = ( rasterbasename + rasterformat["extension"] ) # The name to offer when the raw raster is sent back if raster["do"]: raster["savefunc"] = rasterformat["save"] % ( raster["rasterfile"], ) # R Function to save a dataset to rastername in selected format else: raster[ "savefunc" ] = ( "savefunc<-function(obj){invisible(0)}" ) # don't save over default file (shouldn't call, but just in case!) raster["loadfunc"] = rasterformat["load"] % ( raster["rasterfile"], ) # R Function to load a raster for plotting if raster["do"]: # don't bother setting up unless rasterization requested client.updateStatus("Rasterization successfully configured.") else: client.updateStatus("Rasterization was not requested") ######################################## # Image Generation (desired, output format) image_selection = job.getParameters("imaging_params") image["vector"] = image_selection.get("imagevector", 0) image["raster"] = image_selection.get("imageraster", 0) image_output = job.getParameters("image_output") image["format"] = image_output["imageformat"] imageformat = ImageFormatTable.get(image["format"][0:3], {}) if not imageformat: # Unknown format, don't do images image["vector"] = image["raster"] = 0 msg = "Invalid image format: %s (%s)" % (image["format"], image["format"][0:3]) client.updateStatus(msg) if image["vector"] or image["raster"]: client.updateStatus("Imaging successfully configured.") else: client.updateStatus("Imaging was not requested.") client.updateStatus("Parameter & data file validation complete.") ################################### # Now perform the requested actions ################################### # Configuration Summary # Assemble an output file of what was configured (essentially for debugging) config_summary = StringIO.StringIO() dw = csv.DictWriter(config_summary, fieldnames=("Description", "Value"), extrasaction="ignore") dw.writeheader() for section in ["compute", "raster", "image"]: if section in parameters: dw.writerow({"Description": "Section", "Value": section}) for description, value in parameters[section].iteritems(): dw.writerow({"Description": "Parameter-%s-%s" % (section, description), "Value": str(value)}) del dw ################################### # Computation # Remember that all parameters, regardless of their stated type, arrive # in the tool as string representations (the promise is just that the # string will probably convert successfully to the tool_config type). # Thus all the computation code should perform idempotent conversions... if compute_Python: pyPower = decimal.Decimal(str(compute["power"])) if compute_R: if not R: R = pyRserve.connect() else: R.connect() R.r.rpower = compute["power"] # R.r.r... # The JSON parser (used in displaying NMTK results) chokes on a NaN # returned directly from R because it doesn't recognize an unquoted # NaN as numeric and sees it as a string without quotes; We'll # account for that in the R function and return a string R.r( """ # Fun with R closure magic: convert the power from string to number # once then embed that in a function and return the function, which # we promptly call with the power to make the actual computational # function. Note parenthetical priorities... compute <- (function(rp) { rpower <- as.numeric(rp) function(value) { result <- as.numeric(value) ** rpower if (is.nan(result)||is.na(result)) result<-"Nan-R" result } })(rpower) # Later, just call compute(value) """, void=True, ) for row in compute_file: # Loop over the rows in the input file for field, value in row.iteritems(): if compute_Python: try: pyValue = decimal.Decimal(str(value)) except: pyValue = decimal.Decimal.from_float(float("nan")) if not pyValue.is_nan(): pyResult = pyValue ** pyPower else: pyResult = "NaN-Python" compute_file.addResult(compute["PythonName"] + "_" + field, pyResult) if compute_R: Rresult = R.r.compute(value) logger.debug( "Computed R result for field %s, Result %s of value %s ** power %s" % (field, Rresult, value, R.r.rpower) ) compute_file.addResult(compute["RName"] + "_" + field, Rresult) if R: R.close() client.updateStatus("Done with computations") ################################### # Rasterization # If requested, take the input vector (either a supplied or default # file) and pass it through the R rasterization # If NOT requested, but imaging of a raster was presented, just # use the default raster from the world of static data if raster["do"]: if not R: R = pyRserve.connect() else: R.connect() R.r.vectorfile = raster["vectorfile"] # File to rasterize # Note that output file is built into "savefunc" R.r.xdim = raster["x_dim"] # Desired raster resolution, x and y R.r.ydim = raster["y_dim"] R.r.rastervalue = raster["value"] # Value for raster cells, either text/fieldname or numeric value R.r(raster["savefunc"]) # Load the function to save the raster in desired format # Actions: # Load vector file # Create extent from the file # Create a blank raster with the right resolution (use default values) # Rasterize the input file; raster.field can flexibly be a field name or a value # Write it out in a suitable format for later plotting R.r( """ require(rgdal) require(sp) require(raster) input.file <- readOGR(vectorfile,layer="OGRGeoJSON") e <- extent(input.file) t <- raster(e,nrows=ydim,ncols=ydim) rsa <- rasterize(input.file,t,field=rastervalue) savefunc(rsa) """, void=True, ) if R: R.close() ################################### # Imaging # If requested, take either the vector, the rasterized result or both # and pass them through R # Image file is raster["vectorfile"] image["vectorplotfile"] = "" image["rasterplotfile"] = "" if image["vector"] or image["raster"]: if not R: R = pyRserve.connect() else: R.connect() # TODO: Include basic plot parameters (e.g title of what we're plotting) R.r.plotformat = imageformat["R-device"] # Select R image output device R.r( """ plotfunc <- function(to.plot, outfile) { plotdev <- get(plotformat) plotdev(file=outfile) plot(to.plot) dev.off() } """, void=True, ) if image["vector"]: try: R.r.plotfile = raster["vectorfile"] R.r.outfile = image["vectorplotfile"] = os.tempnam() R.r( """ library(sp) library(rgdal) to.plot <- readOGR(plotfile,layer="OGRGeoJSON") plotfunc(to.plot,outfile) """, void=True, ) except Exception as e: logger.debug(str(e)) client.updateStatus("Imaging failure(vector): " + str(e)) if image["raster"]: try: # Change to use RasterFormatTable Load function to obtain the to.plot dataset R.r(raster["loadfunc"]) # install load function for raster in requested format R.r.outfile = image["rasterplotfile"] = os.tempnam() R.r( """ library(raster) to.plot <- loadfunc() plotfunc(to.plot,outfile) """, void=True, ) except Exception as e: logger.debug(str(e)) client.updateStatus("Imaging failure(raster): " + str(e)) if R: R.close() ################################### # Prepare results outfiles = {} main_result = "summary" comp_result = "computations" vector_input = "vectorinput" raster_file = "rasterfile" vector_plot = "vectorplotfile" raster_plot = "rasterplotfile" # Result files are a dictionary with a key (the multi-part POST slug), # plus a 3-tuple consisting of the recommended file name, the file data, # and a MIME type outfiles[main_result] = ("summary.csv", config_summary.getvalue(), "text/csv") if compute_R or compute_Python: outfiles[comp_result] = ( "computation.%s" % (compute_file.extension,), compute_file.getDataFile(), compute_file.content_type, ) if image["vectorplotfile"] or raster["do"] or image["rasterplotfile"]: # There really should always be an "R" in this case if not R: R = pyRserve.connect() else: R.connect() if raster["returnvector"]: try: vecbase = open(raster["vectorfile"]) outfiles[vector_input] = ("vectorbase.geojson", vecbase.read(), "application/json") client.updateStatus("Returning input vector file as geojson") vecbase.close() except Exception as e: logger.debug(str(e)) client.updateStatus("Return vector failure: " + str(e)) if image["vectorplotfile"]: try: vecimg = open(image["vectorplotfile"], "rb") outfiles[vector_plot] = ( "vectorplot.%s" % (imageformat["extension"],), vecimg.read(), imageformat["mimetype"], ) vecimg.close() client.updateStatus("Removing temporary vector file: " + image["vectorplotfile"]) R.r.unlink(image["vectorplotfile"]) # Get R to unlink the temporary file so we have permission except Exception as e: logger.debug(str(e)) client.updateStatus("Preparing vector image output file failed: " + str(e)) if raster["returnraster"]: # if we are expected to return a raster try: rasterfile = open(raster["rasterfile"], "rb") outfiles[raster_file] = (raster["displayname"], rasterfile.read(), raster["mimetype"]) rasterfile.close() except Exception as e: logger.debug(str(e)) client.updateStatus("Preparing raw raster output file failed: " + str(e)) if raster["do"]: # clean up the temporary rasterization file (may have done this without return raw file) try: client.updateStatus("Removing temporary raster file: " + raster["rasterfile"]) R.r.unlink(raster["rasterfile"]) # Get R to unlink the temporary file so we have permission except Exception as e: logger.debug(str(e)) client.updateStatus("Preparing raw raster output file failed: " + str(e)) if image["rasterplotfile"]: try: rstimg = open(image["rasterplotfile"], "rb") outfiles[raster_plot] = ( "rasterplot.%s" % (imageformat["extension"],), rstimg.read(), imageformat["mimetype"], ) rstimg.close() client.updateStatus("Removing temporary raster file: " + image["rasterplotfile"]) R.r.unlink(image["rasterplotfile"]) # Get R to unlink the temporary file so we have permission except Exception as e: logger.debug(str(e)) client.updateStatus("Preparing raster image output file failed: " + str(e)) if outfiles: client.updateResults( result_field=None, # Default field to thematize in result_file units=None, # Text legend describing the units of 'result_field' result_file=main_result, # Supply the file 'key' (see outfiles above) files=outfiles, # Dictionary of tuples providing result files ) if R: R.close() except Exception as e: msg = "Job failed." logger.exception(msg) logger.exception(str(e)) job.fail(msg) job.fail(str(e)) client.updateResults(payload={"errors": job.failures}, failure=True, files={}) # Clean up R after all is done (harmless if R is None, cleans up # connection to Rserve otherwise) del R