コード例 #1
0
    def create_rserve_connection(cls, host_IN=None, port_IN=None):

        # return reference
        value_OUT = None

        # declare variables
        got_host = False
        my_host = ""
        got_port = False
        my_port = -1

        # make Rserve Connection instance.

        # got a host?
        if (host_IN is not None) and (host_IN != ""):

            # yes
            my_host = host_IN
            got_host = True

        else:

            # no
            my_host = cls.DEFAULT_HOST
            got_host = False

        # -- END check to see if we have a host. --#

        # got a port?
        if (port_IN is not None) and (port_IN != "") and (port_IN > 0):

            # yes
            my_port = port_IN
            got_port = True

        else:

            # no
            my_port = cls.DEFAULT_PORT
            got_port = False

        # -- END check to see if we have a host. --#

        # do we have either a host or a port?
        if (got_host == True) or (got_port == True):

            # we do.  Include in connect() call.
            value_OUT = pyRserve.connect(host=my_host, port=my_port)

        else:

            # no.  Just call connect().
            value_OUT = pyRserve.connect()

        # -- END check to see if host or port. --#

        return value_OUT
コード例 #2
0
ファイル: views.py プロジェクト: shabab7/DjangoRpyDemo
 def getRConnection():
     global conn
     if conn and type(conn) is pyRserve.rconn.RConnector and not conn.isClosed:
         return conn.r
     conn = pyRserve.connect(host=rServeHost, port=6311)
     conn.eval(rFuncStr)
     return conn.r
コード例 #3
0
 def __init__(self):
     self.conn = pyRserve.connect()
     self.conn.voidEval('setwd("' + config.SCRIPTS_DIR + '")')
     for pkg in reqd_packages:
         self.conn.voidEval('library("' + pkg + '")')
     for src in reqd_sources:
         self.conn.voidEval('source("' + src + '")')
コード例 #4
0
def rzindex_wrapper(reqId, cand):
	conn=pyRserve.connect()
	conn.voidEval('source("/home/pandera/RCode/zindex_main.r",chdir=T)')
	score=conn.r.zindex_main(reqid,'r',cand)
	j=len(score[0])
	zindex=[]
	for num in range(0,j):
		zend=[]
		dist={}
		dist1={}
		dist["candidate_id"]=score[0][num]
		dist["requisition_id"]=score[4][num]
		dist["zindex_score"]=score[5][num]
		dist1["name"]="Experience"
		dist1["score"]=score[3][num]
		zend.append(dist1)
		dist1={}
		dist1["name"]="Skills"
		dist1["score"]=score[1][num]
		zend.append(dist1)
		dist1={}
		dist1["name"]="Job Fit"
		dist1["score"]=score[2][num]
		zend.append(dist1)
		dist["zindex_distribution"]=zend
		zindex.append(dist)
	conn.close()
	return(zindex)
コード例 #5
0
def manual_ISA_gen_seeds(binary_mat, est_col_width, pre_exclude_gene_indices):
    num_row = binary_mat.shape[0]
    num_col = binary_mat.shape[1]
    num_seeds = int(num_col / est_col_width)
    print 'in manual_ISA num_col = ' + str(num_col)
    print 'in manual_ISA num_seeds = ' + str(num_seeds)
    conn = pyRserve.connect()
    conn.r('require("isa2")')
    seeds_mat = conn.r('generate.seeds(' + str(num_col) + ',count = ' +
                       str(num_seeds) + ',sparsity=' + str(est_col_width) +
                       ')')
    seeds_list = []
    num_seeds = seeds_mat.shape[1]

    #make a ndarray that having 1 at pre_excluded_indices
    pre_exclude_mask = numpy.zeros(num_col)
    for pre_exclude_gene_index in pre_exclude_gene_indices:
        pre_exclude_mask[pre_exclude_gene_index] = 1

    for i in range(num_seeds):
        curr_seed_vec = seeds_mat[:, i]
        #if curr_seed_vec contains pre_excluded gene_indices, remove this seed
        #the product is greater than 0 only when index matches
        if numpy.dot(curr_seed_vec, pre_exclude_mask) > 0:
            continue
        seeds_list.append(curr_seed_vec)

    return seeds_list
コード例 #6
0
    def __call__(self):
        conn = pyRserve.connect(host=self.host, port=self.port)

        if self.conn.eval("1+1") != 2:
            raise IOError("Unable to execute on R connection")

        return RConnection(conn)
コード例 #7
0
    def get(self, session_id):
        if session_mgr.is_ok(session_id):
            try:
                # Call the mlrMBO R script to actually propose a point.
                conn = pyRserve.connect()
                conn.eval('setwd("..")')

                # Prepare call in R
                commandline = 'propose("%s")' % str(session_id)

                # Run that function
                point = conn.eval(commandline)

                # Close connection
                conn.close()

                # Debugging
                point = json.loads(point)

            except Exception as e:
                print(e)
                session_mgr.close(session_id)
                return error_mgr.internal_error()
            return point, 200
        else:
            return error_mgr.no_session()
コード例 #8
0
def rzindex_wrapper(ReqId, cand):
	zindex={}
	try:
		conn=pyRserve.connect()
		conn.voidEval(r_conn_string)
		score=conn.r.zindex_main(ReqId,'c',cand)
		conn.close()
		j=len(score[0])
		for num in range(0,j):
			candidate_id=score[0][num]
			zindex[candidate_id] = {
				"zindex_distribution" : [
					{
						"name" : "Experience",
						"score" : score[3][num]
					},
					{
						"name" : "Skills",
						"score" : score[1][num]
					},
					{
						"name" : "Job Fit",
						"score" : score[2][num]
					}
				],
				"zindex_score" : score[5][num]
			}
	except Exception as e:
		DebugException(e)
		conn.close();

	return zindex
コード例 #9
0
ファイル: app.py プロジェクト: JovingeLabSoftware/det-handler
def det_syncer():

    # only have to sync if changes were made to our two inventory forms
    if request.form['instrument'] == 'pbmc' or request.form['instrument'] == 'cytokine':

        # connect to our Rserve instance
        conn = pyRserve.connect()

        # set our REDCap token appropriately
        if request.form['project_id'] == '65':
            conn.eval("tok <- readRDS('~/.redcap/controls/token.rds')")
        elif request.form['project_id'] == '52':
            conn.eval("tok <- readRDS('~/.redcap/ecmo/token.rds')")
        else:
            print 'No token for this project id...'
            print str(request.form)
            return make_response(jsonify({'result': 'failed'}), 400)


        print 'Syncing the following data:'
        print str(request.form)
        print '----------'


        # fill in our script & run it
        to_run = sync_template.format(**request.form)
        conn.eval(to_run)

    else:
        print 'Nothing to sync...'
        print str(request.form)
        print '----------'

    return make_response(jsonify({'result': 'success'}), 201)
コード例 #10
0
ファイル: rserve_resources.py プロジェクト: D-I-L/pydgin
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request just the documents required from Rserve. '''
        try:
            filterable = getattr(view, 'filter_fields', [])
            filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])

            mid1 = filters.get('marker', 'rs2476601')
            dataset = filters.get('dataset', 'EUR').replace('-', '')
            query = ElasticQuery(BoolQuery(must_arr=[Query.term("id", mid1)]), sources=['seqid', 'start'])
            elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=1)
            doc = elastic.search().docs[0]
            seqid = getattr(doc, 'seqid')

            rserve = getattr(settings, 'RSERVE')
            conn = pyRserve.connect(host=rserve.get('HOST'), port=rserve.get('PORT'))
            pop_str = conn.r.get_pop(dataset, seqid, mid1)

            pops = json.loads(str(pop_str))
            populations = []
            for pop in pops:
                pops[pop]['population'] = pop
                populations.append(pops[pop])
            conn.close()
            return [ElasticObject(initial={'populations': populations, 'marker': mid1})]
        except (TypeError, ValueError, IndexError, ConnectionError):
            return [ElasticObject(initial={'populations': None, 'marker': mid1})]
コード例 #11
0
 def __init__(self):
     self.conn = pyRserve.connect()
     self.conn.voidEval('setwd("' + config.SCRIPTS_DIR + '")')
     for pkg in reqd_packages:
         self.conn.voidEval('library("' + pkg + '")')
     for src in reqd_sources:
         self.conn.voidEval('source("' + src + '")')
コード例 #12
0
ファイル: rconnection.py プロジェクト: erickramer/sparrow
	def __call__(self):
		conn = pyRserve.connect(host = self.host, port = self.port)

		if self.conn.eval("1+1") != 2:
			raise IOError("Unable to execute on R connection")

		return RConnection(conn)
コード例 #13
0
def compute_importance_step1(vectors, limit):
    conn = pyRserve.connect()
    try:
        conn.eval('library(\'relaimpo\')', void=True)
        conn.eval('df <- data.frame()', void=True)
        size = 0
        anomalies = []
        results = {}
        for i in range(len(vectors)):
            v = vectors[i]
            if v[-1] <= limit:
                conn.r.x = v
                conn.eval('df <- rbind(df,x)', void=True)
                size += 1
                if size == 1:
                    col_names = map(lambda x: 'API' + str(x), range(len(v) - 1))
                    col_names.append('Total')
                    conn.r.df_names = col_names
                    conn.eval('names(df) <- df_names', void=True)
                if size > len(v):
                    results[i] = compute_importance_internal(conn)
                else:
                    results[i] = 'Insufficient Data'
            else:
                anomalies.append(i)
        return anomalies, results
    finally:
        conn.close()
コード例 #14
0
    def __init__(self, port=6311, host='localhost'):
        self.port = int(port)
        self.host = str(host)

        try:
            self.conn = pyRserve.connect(host=host, port=port)
        except RConnectionRefused:
            # try to start Rserver on localhost if possible
            if host == "localhost":
                os.system("R CMD Rserve --RS-port %i --no-save" % port)
                self.conn = pyRserve.connect(host=host, port=port)
            else:
                raise RConnectionRefused

        res = self.conn.eval("1+1")
        if res != 2:
            raise IOError("Unable to execute on R connection")
コード例 #15
0
ファイル: FV.py プロジェクト: BenJamesbabala/Syft
    def __init__(self,conn=None):
        """Creates connection to R server and loads HE library containing FV."""

        if(conn is None):
            self.conn = pyRserve.connect()
        else:
            self.conn = conn
        self.conn.r('library("HomomorphicEncryption")',void=True)
コード例 #16
0
ファイル: rconnection.py プロジェクト: erickramer/sparrow
	def __init__(self, port = 6311, host = 'localhost'):
		self.port = int(port)
		self.host = str(host)

		try:
			self.conn = pyRserve.connect(host = host, port = port)
		except RConnectionRefused:
			# try to start Rserver on localhost if possible
			if host == "localhost":
				os.system("R CMD Rserve --RS-port %i --no-save" % port)
				self.conn = pyRserve.connect(host = host, port = port)
			else:
				raise RConnectionRefused

		res = self.conn.eval("1+1")
		if res != 2:
			raise IOError("Unable to execute on R connection")
コード例 #17
0
def createRServeConn():
    global Rconn
    global RconnStatus
    Rconn = pyRserve.connect(host=RServeIP, port=6311)
    Rconn.voidEval('library("quantmod")')
    Rconn.voidEval('library("PerformanceAnalytics")')
    Rconn.voidEval('library("RMySQL")')
    Rconn.voidEval('library("xts")')
    RconnStatus = True
コード例 #18
0
def crossvalidate(profiles, true_group_name, holdout_group_name=None, 
                  sva=False, train=NNClassifier):
    profiles.assert_not_isnan()
    keys = profiles.keys()
    true_labels = regroup(profiles, true_group_name)
    labels = list(set(true_labels.values()))

    if holdout_group_name:
        holdouts = regroup(profiles, holdout_group_name)
    else:
        holdouts = dict((k, k) for k in keys)

    confusion = {}
    for ho in set(holdouts.values()):
        test_set_mask = np.array([tuple(holdouts[k]) == ho for k in keys], 
                                 dtype=bool)
        training_features = profiles.data[~test_set_mask, :]
        test_features = profiles.data[test_set_mask, :]
        training_labels = [labels.index(true_labels[tuple(k)]) 
                           for k, m in zip(keys, ~test_set_mask) if m]
        if sva:
            import pyRserve
            conn = pyRserve.connect()
            conn.r.traininglabels = np.array(training_labels, dtype='i4')
            conn.r.trainData = training_features.ravel().reshape(training_features.shape, order='F').T
            conn.r.testData = test_features.ravel().reshape(test_features.shape, order='F').T
            #import pdb; pdb.set_trace()
            #assert conn.r('trainData[1,2]') == training_features[1, 0]
            #assert conn.r('testData[1,2]') == test_features[1, 0]
            conn.r('library(sva)')
            conn.r('trainData <- as.matrix(trainData)')
            conn.r('testData <- as.matrix(testData)')
            conn.r('trainpheno <- data.frame(label=traininglabels)')
            #conn.r('write.table(trainData, "/tmp/trainData.txt")')
            #conn.r('write.table(testData, "/tmp/testData.txt")')
            #conn.r('write.table(trainpheno, "/tmp/trainpheno.txt")')
            conn.r('trainMod <- model.matrix(~as.factor(label), trainpheno)')
            nsv = conn.r('num.sv(trainData, trainMod)')
            print nsv, 'surrogate variables'
            conn.r('trainMod0 <- model.matrix(~1, trainpheno)')
            conn.r('trainSv <- sva(trainData, trainMod, trainMod0, B=1)')
            conn.r('fsvaobj <- fsva(trainData, trainMod, trainSv, testData)')
            filtered_train = getattr(conn.r, 'fsvaobj$db').T
            filtered_test = getattr(conn.r, 'fsvaobj$new').T
        else:
            filtered_train = training_features
            filtered_test = test_features

        model = train(filtered_train, training_labels)
        for k, f, m in zip(keys, profiles.data, test_set_mask):
            if not m:
                continue
            true = true_labels[k]
            predicted = labels[model.classify(f)]
            confusion[true, predicted] = confusion.get((true, predicted), 0) + 1
    return confusion
コード例 #19
0
def rzindex_wrapper_insert(ReqId, cand):
	try:
		conn=pyRserve.connect()
		conn.voidEval(r_conn_string)
		status=conn.r.zindex_main(ReqId,'c',cand)
		conn.close()
		print("Success")
	except Exception as e:
		DebugException(e)
		conn.close();
コード例 #20
0
def rzindex_candidate(Candlist):
	try:
		conn=pyRserve.connect()
		conn.voidEval(r_conn_candidate)
		status=conn.r.candidate_incremental(Candlist)
		conn.close()
	except Exception as e:
		print("HERE - %s" % e)
		DebugException(e)
		conn.close();
コード例 #21
0
def rzindex_wrapper_newreq(ReqId):
	try:
		conn=pyRserve.connect()
		conn.voidEval(r_conn_requisition)
		status=conn.r.reqScoring(ReqId)
		conn.close()
		print("Success")
	except Exception as e:
		DebugException(e)
		conn.close();
コード例 #22
0
def get_connection():
    try:
        return True, pyRserve.connect(host='localhost',
                                      port=6311,
                                      atomicArray=True)
    except Exception as ex:
        return False, {
            'error': 'Unable to connect to R server!',
            'code': 500,
            'details': str(ex)
        }
コード例 #23
0
def run_hmm(series, nStates):   
    conn= pyRserve.connect()
    srcpath= os.getcwd()    # assumes that hmm.r is in the same directory as this python file
    conn.eval('setwd("' + config.CORE_DIR + '")')
    conn.eval('source("hmm.r")')
    conn.eval('library("RHmm")')
    conn.r.sc= series
    conn.r.nStates= nStates
    streval= 'v <- run_hmm(sc, n_states= nStates )'
    conn.eval(streval)
    return conn.r.v
コード例 #24
0
ファイル: analytics.py プロジェクト: ranjanprj/simfin
    def get_r_plot(self):
        self.cursor.execute(sql.profits)

        res = self.cursor.fetchall()
        self.arr = []
        for i in res:
            self.arr.append(float(i[0]))


        conn = pyRserve.connect()
        conn.r.xvar = self.arr
        return conn.eval(rprog.prog)
コード例 #25
0
def run_hmm(series, nStates):
    conn = pyRserve.connect()
    srcpath = os.getcwd(
    )  # assumes that hmm.r is in the same directory as this python file
    conn.eval('setwd("' + config.CORE_DIR + '")')
    conn.eval('source("hmm.r")')
    conn.eval('library("RHmm")')
    conn.r.sc = series
    conn.r.nStates = nStates
    streval = 'v <- run_hmm(sc, n_states= nStates )'
    conn.eval(streval)
    return conn.r.v
コード例 #26
0
def manual_ISA(args):
    start_time = time.time()
    #pdb.set_trace()
    binary_mat = args[0]
    abs_cutoff = args[1]
    per_cutoff = args[2]
    converge_epsilon = args[3]
    converge_depth = args[4]
    seed0 = args[5]
    #print '\nthis thread seed0 = '+ str(numpy.nonzero(seed0))

    num_row = binary_mat.shape[0]
    num_col = binary_mat.shape[1]
    if False:  #using threadPool, seed0 will be passed in
        conn = pyRserve.connect()
        conn.r('require("isa2")')
        seeds = conn.r('generate.seeds(' + str(num_col) + ',count = 1)')
        num_seeds = seeds.shape[1]
        len_each_seed = seeds.shape[0]
        seed0 = seeds[:, 0]

    prev_cols = seed0
    curr_depth = 0
    #print 'seed0 = ' + str(numpy.nonzero(seed0))
    while True:
        curr_rows = manual_ISA_filter_row(binary_mat, prev_cols, abs_cutoff,
                                          per_cutoff)
        curr_cols = manual_ISA_filter_col(binary_mat, curr_rows, abs_cutoff,
                                          per_cutoff)
        if not numpy.any(
                curr_cols):  #if converge to empty hole, terminate early
            print 'xxx ABORTION, ALL ZERO'
            break
        if converge(curr_cols, prev_cols, converge_epsilon):
            print '$$$ REAL CONVERGE num_rows = ' + str(
                numpy.count_nonzero(curr_rows)) + '     num_cols = ' + str(
                    numpy.count_nonzero(curr_cols))
            #pdb.set_trace()
            a = 1
            break
        elif curr_depth > converge_depth:
            print 'xxx TIME CONVERGE'
            #pdb.set_trace()
            a = 1
            break
        else:
            prev_cols = curr_cols  #iterate
            curr_depth = curr_depth + 1

    #pdb.set_trace()
    #print 'this thread takes {} seconds'.format(time.time() - start_time)
    return curr_rows, curr_cols
コード例 #27
0
ファイル: wrapper.py プロジェクト: EduardoGarrido90/bopc
def main(job_id, params):

    params = copy.deepcopy(params)

    np.random.seed(NUM_EXP)

    #Parsing parameters.
    tests = ["cor", "zf", "mi-g", "mi-g-sh"]
    test = tests[np.argmax(params['b_test'])]
    alpha = np.power(10, float(params['c_alpha']))

    #Experiments to be carried out.
    total_SHD = 0.0
    size_nodes = np.array([25, 50, 75, 100])
    size_neighbors = np.array([2, 8])
    size_samples = np.array([10, 50, 100, 500])
    total_experiments = size_nodes.shape[0] * size_neighbors.shape[
        0] * size_samples.shape[0]
    true_bn_fr = ""
    sample_bn_fr = ""
    #Connecting to pyRserve, will launch an exception is Rserve is not listening in port.
    conn = pyRserve.connect(port=PORT)
    i = 0
    for node_example in size_nodes:
        for neighbor_example in size_neighbors:
            true_bn_fr = str(node_example) + "_" + str(
                neighbor_example) + "_r" + str(NUM_EXP) + ".rds"
            for sample_example in size_samples:
                script = "library(\"bnlearn\"); "
                script += "bn_true <- readRDS(\"" + DATA_ROUTE + "/" + true_bn_fr + "\"); "
                sample_bn_fr = str(node_example) + "_" + str(
                    neighbor_example) + "_r" + str(NUM_EXP) + "_" + str(
                        sample_example) + ".rds"
                script += "bn_data <- readRDS(\"" + DATA_ROUTE + "/" + sample_bn_fr + "\"); "
                script += (
                    "bn_learned <- bnlearn::pc.stable(x = bn_data, test = \"" +
                    test + "\", alpha = " + str(alpha) + "); ")
                script += "result <- shd(bn_learned, bn_true);"
                #We send the script and wait for evaluation.
                conn.eval(script)
                #Once the script is finished, we retreive the result variable.
                shd = conn.eval("result")
                shd_norm = shd / float(
                    (node_example * (node_example - 1) / 2.0))
                total_SHD += shd_norm
                i += 1
                print i

    conn.close()

    return {'shd': total_SHD / float(total_experiments)}
コード例 #28
0
ファイル: views.py プロジェクト: alanpoon/rgis
def gwr_initialize(request):

    if request.method == "POST":

        data = json.loads(request.body)

        shapefile_filename = data['namespace']

        shapefile_object = Shapefile.objects.get(name=shapefile_filename)

        conn = pyRserve.connect()

        # get the path to shapefile
        shapefile_filename = shapefile_object.get_full_path() + "projected"

        # load the function
        functionFile = open(settings.BASE_DIR + '/fileupload/new.r')
        ##windows path
        # functionFile = open(settings.BASE_DIR + '\\fileupload\\new.r')
        functionContent = functionFile.read()

        conn.voidEval(functionContent)

        try:
            # get the list of columns in this shapefile
            nameslist = conn.r.getshpheader(shapefile_filename)

            nameslist = list(nameslist)

        except:
            message = "error running function in r"
            return HttpResponse(json.dumps({"status":"error", "message":message}), content_type="application/json")

        finally:
            conn.close()

        response_obj = {}
        response_obj['status'] = "success"
        response_obj['variables'] = nameslist

        return HttpResponse(json.dumps(response_obj), content_type="application/json")

    else:
        form = GWRInitializeForm() # an empty, unbound form

        return render_to_response(
            'gwr_initialize_form.html',
            {'form': form},
            context_instance=RequestContext(request)
        )
コード例 #29
0
    def filter_queryset(self, request, queryset, view):
        """ Override this method to request just the documents required from Rserve. """
        try:
            filterable = getattr(view, "filter_fields", [])
            filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])

            mid1 = filters.get("m1")
            if mid1 is None or mid1 == "":
                return [ElasticObject(initial={"error": "No marker ID provided."})]

            dataset = filters.get("dataset", "EUR").replace("-", "")
            mid2 = filters.get("m2")
            window_size = int(filters.get("window_size", 1000000))
            dprime = filters.get("dprime", 0.0)
            rsq = filters.get("rsq", 0.8)
            maf = filters.get("maf", False)
            if maf:
                maf = True
            build_version = filters.get("build", "GRCh38").lower()
            pos = filters.get("pos", False)
            if pos:
                pos = True

            query = ElasticQuery(BoolQuery(must_arr=[Query.term("id", mid1)]), sources=["seqid", "start"])
            elastic = Search(search_query=query, idx=ElasticSettings.idx("MARKER", "MARKER"), size=1)
            doc = elastic.search().docs[0]
            seqid = getattr(doc, "seqid")

            rserve = getattr(settings, "RSERVE")
            conn = pyRserve.connect(host=rserve.get("HOST"), port=rserve.get("PORT"))
            ld_str = conn.r.ld_run(
                dataset,
                seqid,
                mid1,
                marker2=mid2,
                window_size=window_size,
                dprime=dprime,
                rsq=rsq,
                maf=maf,
                position=pos,
                build_version=build_version,
            )
            ld_str = ld_str.replace("D.prime", "dprime").replace("R.squared", "rsquared")
            conn.close()

            return [ElasticObject(initial=json.loads(str(ld_str)))]
        except (TypeError, ValueError, IndexError, ConnectionError):
            raise Http404
コード例 #30
0
def getCorrelationMatrix(*args):
    # connect to Rserve (running as daemon process - configured for port 9999)
    # to start run on command line 'R CMD Rserve --RS-port 9999'
    conn = pyRserve.connect(host='localhost', port=9999)

    # combine streams by column - will create a matrix with dimensions = #ofstreams by number of elements
    #inputDataStreams = conn.r.cbind(stream1,stream2,stream3,stream4)
    inputDataStreams = conn.r.cbind(*args)
    #print(inputDataStreams)

    # Create a correlation matrix based on input streams
    corrMatrix = conn.r.cor(inputDataStreams)
    #print(corrMatrix)

    # Always good to close connections
    conn.close()
    return corrMatrix
コード例 #31
0
def getCorrelationMatrix(*args):
	# connect to Rserve (running as daemon process - configured for port 9999)
	# to start run on command line 'R CMD Rserve --RS-port 9999'
	conn = pyRserve.connect(host='localhost',port=9999)

	# combine streams by column - will create a matrix with dimensions = #ofstreams by number of elements
	#inputDataStreams = conn.r.cbind(stream1,stream2,stream3,stream4)
	inputDataStreams = conn.r.cbind(*args)
	#print(inputDataStreams)

	# Create a correlation matrix based on input streams
	corrMatrix = conn.r.cor(inputDataStreams)
	#print(corrMatrix)

	# Always good to close connections
	conn.close()
	return corrMatrix
コード例 #32
0
    def __init__(self, dump_stdout=True, r_exe=None, do_log=False, port=None, **kw):

        if port is None:
            s = socket.socket()
            s.bind(("127.0.0.1", 0))
            port = s.getsockname()[1]
            s.close()
        rip = RInterpreter(r_exe=r_exe, **kw)
        rip.execute("""if("Rserve" %in% rownames(installed.packages()) == FALSE)
                        {
                          install.packages("Rserve", repos="http://cran.rstudio.com");
                        }
                    """)
        cmd = """library(Rserve);
                 Rserve::run.Rserve(port=%d);
              """ % port
        thread.start_new_thread(rip.execute, (cmd,))
        self.__dict__["dump_stdout"] = dump_stdout

        for _ in range(10):
            try:
                self.__dict__["conn"] = pyRserve.connect(host="127.0.0.1", port=port)
            except pyRserve.rexceptions.RConnectionRefused:
                time.sleep(0.1)
            else:
                break
        else:
            raise Exception("connection failed after 10 trials over one second in total")

        def on_die(killed_ref, conn=self.conn):
            # we pass conn here because access to self ist not allowed in this handler.
            # further we import socket inside because this function maybe called when the Python
            # interpreter is shut down and a globally imported module socket might not be available
            # any more
            import socket
            try:
                conn.shutdown()
            except socket.error:
                pass

        # instead of implementing __del__ to force shutdown if the object dies, we use the
        # weakref trick to trigger shutdown at the end of self's life:
        self.__dict__["_del_ref"] = weakref.ref(self, on_die)
コード例 #33
0
ファイル: PopulateQuiz.py プロジェクト: ric1280/VC-Market
def compute(input):
    try:
        conn = pyRserve.connect()
    except:
        print "RServe not running... execute Rserve"
        return

    quiz = """\n collatz <- function(n, acc=0) {
                if(n==1) return(acc);
                collatz(ifelse(n%%2==0, n/2, 3*n +1), acc+1)
                }
                
                quiz<-collatz(""" + str(input) + ")"

    print "computing collatz(" + str(input) + ")"

    output = conn.eval(quiz)

    conn.close()
    return output
コード例 #34
0
def connect_to_rserve(host, port, wait_time=2, wait_loop=10):
  logging.info("Connecting to Rserve at %s:%d" % (host, port))
  i = 0
  conn = None
  exception = None

  while i < wait_loop:
    i += 1
    logging.info("Connection attempt %d of %d " % (i, wait_loop))
    try:
      conn = pyRserve.connect(host=host, port=port)
      break
    except pyRserve.rexceptions.RConnectionRefused as e:
      exception = e
    time.sleep(wait_time)
  if conn is None:
    raise exception

  logging.info("Connection to Rserve successful.")
  return conn
コード例 #35
0
ファイル: tasks.py プロジェクト: dsmurrell/smmpdb
def predict_logs(molecule_file_path, email_address):
    print molecule_file_path
    print email_address

    logger.debug('TESTING LOGGING FROM CELERY')

    conn = pyRserve.connect()
    conn.eval('library(smpredict)')
    logger.debug(conn.eval('PredictPropertytoCSV("LogS", csv.file="predictions.csv", structures.file="' + molecule_file_path + '", error.variance=TRUE)'))
    logger.debug(conn.eval('getwd()'))

    mail = EmailMessage('Your LogS Predictions',
    """Dear User

Thank you for using our service.
here are your LogS predictions.

Kind regards
smpredict team""", 'smpredict', [email_address])
    mail.attach_file('' + conn.eval('getwd()') + '/predictions.csv')
    mail.send()
コード例 #36
0
ファイル: server.py プロジェクト: P1R/RHPy
def consultar_hive(consulta) :
	conn = pyRserve.connect(host='10.71.1.30', port=6311)
	conn.atomicArray = True
	R_Loadder = """library(rJava)
	library(RJDBC)
	.jinit()
	#Sys.setenv(HADOOP_JAR= paste0("", collapse=.Platform$path.sep))
	driverclass = "org.apache.hive.jdbc.HiveDriver"
	classPath = c("/usr/lib/hive/apache-hive-1.2.1-bin/lib/hive-jdbc-1.2.1-standalone.jar", "/etc/hadoop-2.7.1/share/hadoop/common/lib/commons-configuration-1.6.jar", "/etc/hadoop-2.7.1/share/hadoop/common/hadoop-common-2.7.1.jar")
	dr2 = JDBC(driverclass,classPath, identifier.quote = "`")
	Sys.setenv(HADOOP_JAR= paste0(classPath, collapse=.Platform$path.sep))
	url = paste0("jdbc:hive2://", "10.71.1.30", ":", "10000", "/default",";auth=noSasl")
	dbConnect(dr2, url) -> conn"""
	# Abrimos conexión
	print "...Conectando..."
	conn.eval(R_Loadder)
	print "...Consultando..."
	conn.eval(sql_query('hive_rqt', consulta))
	print conn.eval('hive_rqt')
	respuesta = conn.eval('hive_rqt')
	return str(respuesta)
コード例 #37
0
ファイル: views.py プロジェクト: ErwinKomen/RU-nstylo
def getRConnObject():
    """Establish a connection with Rserve and load it with our program"""

    global conn
    rServeHost = 'localhost'
    rServePort = 6311

    # Check if a connection alread exists
    if conn and type(conn) is pyRserve.rconn.RConnector and not conn.isClosed:
        # Return the existing connection
        return conn
    # There's no connection yet: establish one
    try:
        conn = pyRserve.connect(host=rServeHost, port=rServePort)
    except:
        # This probably means that Rserve is not running
        return None
    # Load the function that needs to be there
    conn.eval(rFuncStr)
    # Return the connection, which now contains our function
    return conn
コード例 #38
0
ファイル: r.py プロジェクト: atbentley/rpad
    def eval(self, expr, conn=0):
        """Evaluate an R expression on a particular connection and
        return a list of (result, type) pairs, where type is the
        actual R type of the result.

        In the case that expr contains multiple expressions
        (e.g. '1+1;2+2'), each expression will be evaluated independently.
        """
        if conn not in self.connections:
            # Create connection if it doesn't exist
            self.connections[conn] = pyRserve.connect()
        elif self.connections[conn].isClosed:
            # Re-open the connection if it is closed
            self.connections[conn].connect()

        results = []
        for chunk in RChunker(expr).chunk():
            try:
                result = self.connections[conn].eval(chunk)
                if isinstance(result, pyRserve.rparser.Closure):
                    # Result is most likely a function, I have not currently
                    # come up with a good way to handle this situation yet.
                    type_ = '__closure__'
                else:
                    # Set the .Last.value variable since pyRserve or Rserve
                    # doesn't do this for some reason.
                    self.connections[conn].r.__setattr__('.Last.value', result)
                    # Get the type of .Last.value
                    type_ = self.connections[conn].eval('class(.Last.value)')
                    # Re-set .Last.value
                    self.connections[conn].r.__setattr__('.Last.value', result)
            except pyRserve.rexceptions.REvalError as error:
                result = str(error)
                if not result:
                    result = 'Error: unable to parse R code'
                type_ = '__error__'
            results.append((result, type_))
        return results
コード例 #39
0
def R_discover_sub_clusters_PLAID(gene_p_qs, binarize_cutoff):
    start_time = time.time()
    #p_m = R_build_matrix(gene_p_qs)
    binary_mat = R_build_numpy_matrix_from_gene_p_qs(gene_p_qs,
                                                     binarize_cutoff)
    conn = pyRserve.connect()
    conn.r('require("biclust")')
    R_args = {
        'x': binary_mat,
        'method': 'BCPlaid',
        'cluster': 'b',
        # 'fit.model':'y~m+a+b',
        #'background':False,
        'row.release': 0.2,
        'col.release': 0.2,
        #'shuffle':3,
        #'shuffle':3,
        #'back.fit':0,
        #'max.layers':20,
        #'iter.startup':5,
        #'iter.layer':10,
        #'verbose':True,
    }
    result = conn.r.biclust(**R_args)
    result = conn.r.biclust(binary_mat, method="BCPlaid")

    attr = result.lexeme.attr

    #pdb.set_trace()
    disease_names = gene_p_qs.keys(
    )  #FIXME temporarily comment off for testing p_m
    clusters = R_parse_cluster_result(attr, disease_names)

    print 'found ' + str(len(clusters)) + ' clusters\n'
    #clusters = R_filter_clusters(clusters,gene_p_qs,row_percent,row_cutoff,col_percent,col_cutoff)
    print("sub_clustering took --- %s seconds ---" %
          (time.time() - start_time))
    return clusters
コード例 #40
0
ファイル: rserve_resources.py プロジェクト: D-I-L/pydgin
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request just the documents required from Rserve. '''
        try:
            filterable = getattr(view, 'filter_fields', [])
            filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])

            mid1 = filters.get('m1', 'rs2476601')
            dataset = filters.get('dataset', 'EUR').replace('-', '')
            mid2 = filters.get("m2")
            window_size = int(filters.get('window_size', 1000000))
            dprime = filters.get("dprime", 0.)
            rsq = filters.get("rsq", 0.8)
            maf = filters.get("maf", False)

            if maf:
                maf = True
            build_version = filters.get("build", 'GRCh38').lower()
            pos = filters.get("pos", False)
            if pos:
                pos = True

            query = ElasticQuery(BoolQuery(must_arr=[Query.term("id", mid1)]), sources=['seqid', 'start'])
            elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=1)
            doc = elastic.search().docs[0]
            seqid = getattr(doc, 'seqid')

            rserve = getattr(settings, 'RSERVE')
            conn = pyRserve.connect(host=rserve.get('HOST'), port=rserve.get('PORT'))
            ld_str = conn.r.ld_run(dataset, seqid, mid1, marker2=mid2,
                                   window_size=window_size, dprime=dprime,
                                   rsq=rsq, maf=maf, position=pos, build_version=build_version)
            ld_str = ld_str.replace('D.prime', 'dprime').replace('R.squared', 'rsquared')
            conn.close()
            return [ElasticObject(initial=json.loads(str(ld_str)))]
        except (TypeError, ValueError, IndexError, ConnectionError):
            return [ElasticObject(initial={'ld': None})]
コード例 #41
0
 def connect(self):
     self.conn = R.connect()
コード例 #42
0
ファイル: isidro_tasks.py プロジェクト: dsmurrell/smmpdb
def predict_NCI60(molecule_file_path, email_address):

#################################################################################################   
   # 1. Load molecules
#################################################################################################   
   import repo.bioalerts as bioalerts
   import os
   import numpy as np
   import sklearn
   from sklearn.ensemble import RandomForestRegressor
   
   try:
      print "Reading input file.\n"
      molecules = bioalerts.LoadMolecules(molecule_file_path, verbose=False)
      molecules.ReadMolecules()
      print "Total number of input molecules correctly processed: ", len(molecules.mols)   
   except:
      print "ERROR: The input molecules could not be processed.\n The extension of the input file might not be supported\n"
      mail = EmailMessage('NCI60 Sensitivity Predictions',
      """Dear User,
    
      The requested cell line sensitivity predictions on the NCI60 panel could
      not be calculated. 
      
      It is likely that (i) the input file was corrupted or (ii) the format of the input molecules not supported.

      Kind regards
      Cancer Cell Line Profiler team""", 'CancerCellLineProfiler', [email_address])
      mail.send()
  # Check whether the file is huge..
   if (os.path.getsize(molecule_file_path) >> 20) > 1:
      mail = EmailMessage('NCI60 Sensitivity Predictions',
      """Dear User,
    
      The requested cell line sensitivity predictions on the NCI60 panel could
      not be calculated because the size of the file was higher than 1Mb (maximum input file size supported).
      
      Kind regards
      Cancer Cell Line Profiler team""", 'CancerCellLineProfiler', [email_address])
      mail.send()
   
   if len(molecules.mols) == 0:
          print "ERROR: None of the input molecules was processed successfully\n"
          mail = EmailMessage('NCI60 Sensitivity Predictions',
          """Dear User,
    
          The requested cell line sensitivity predictions on the NCI60 panel could
          not be calculated, because the input file was empty or none of the input molecules
          was processed correctly.

          Kind regards
          Cancer Cell Line Profiler team""", 'CancerCellLineProfiler', [email_address])
          mail.send()
          raise
#################################################################################################   
   # 2. Calculate Morgan fps for the input molecules
#################################################################################################   
   print "Calculating Morgan fingerprints for the input molecules\n"
   mols_info = bioalerts.GetDataSetInfo()
   #mols_info.extract_substructure_information(radii=[0,1,2],mols=molecules.mols)
   fps_input_molecules = bioalerts.CalculateFPs(mols=molecules.mols,radii=[0,1,2])
   fps_input_molecules.calculate_hashed_fps(nBits=256)
   #hashed_binary = fps_input_molecules.fps_hashed_binary
   hashed_counts = fps_input_molecules.fps_hashed_counts
   mean_fps = np.load("./NCI60/server_model/mean_fps_server_NCI60.npy")
   std_fps = np.load("NCI60/server_model/std_fps_server_NCI60.npy")
   hashed_counts = (hashed_counts - mean_fps) / std_fps

   
#################################################################################################   
   # 3. load cell line descriptors (pathways 1000)
#################################################################################################   
   nb_input_mols = len(molecules.mols)
   cell_descs = np.genfromtxt('./NCI60/pathway_descriptors_most_var.csv',delimiter=",",skiprows=1)
   cell_names = np.genfromtxt('./NCI60/pathway_descriptors_most_var_CELL_NAMES.csv',skiprows=0,dtype="|S40")
   mean_cell_descs = np.mean(cell_descs,axis=0)
   std_cell_descs = np.std(cell_descs,axis=0)
   cell_descs = (cell_descs-mean_cell_descs) / std_cell_descs
   #cell_descs = np.repeat(cell_descs,molecules.mols,axis=0)                                                                                                                      
   # tile and repeat the cell line and compound descriptors
   hashed_counts = np.tile(hashed_counts,(59,1))  
   input_mols_names = np.tile(molecules.mols_ids,(59,1))
   cell_descs = np.repeat(cell_descs,nb_input_mols,axis=0) 
   cell_names = np.repeat(cell_names,nb_input_mols,axis=0)
   
   X = np.hstack((hashed_counts,cell_descs))

#################################################################################################   
   # 4. Load point prediction and error models
#################################################################################################      
   from sklearn.externals import joblib  
   point_prediction_model = joblib.load('./NCI60/server_model/point_prediction_model_NCI60.pkl')
   error_prediction_model = joblib.load('./NCI60/server_model/error_prediction_model_NCI60.pkl')
   
#################################################################################################   
   # 5. Predict the activities
#################################################################################################   
   point_predictions = point_prediction_model.predict(X)
   error_prediction = error_prediction_model.predict(X)
   
#################################################################################################   
   # 6. Calculate the confidence intervals (70, 80, 90%)
#################################################################################################   
   alphas = np.load("./NCI60/server_model/alphas_NCI60.npy")
   alpha_70 = alphas[np.round(len(alphas)*0.7,decimals=0)]
   alpha_80 = alphas[np.round(len(alphas)*0.8,decimals=0)]
   alpha_90 = alphas[np.round(len(alphas)*0.9,decimals=0)]
   
   confi_70 = error_prediction * alpha_70
   confi_80 = error_prediction * alpha_80
   confi_90 = error_prediction * alpha_90
   
#################################################################################################   
   # 7. Write predictions to .csv
#################################################################################################   
   fich = open("./NCI60/predictions_NCI60.csv","w")
   fich.write("Cell_line\tCompound_ID\tPredicted_pGI50\tCI_70\tCI_80\tCI_90\n" %())
   for i in range(0,len(input_mols_names)):
     fich.write("%s\t%s\t%f\t%f\t%f\t%f\n" %(cell_names[i],input_mols_names[i][0],point_predictions[i],confi_70[i],confi_80[i],confi_90[i]))

   fich.close()

#################################################################################################   
   # 8. Generate plot with R of the barplot for the NCI60
#################################################################################################   
   conn = pyRserve.connect()
   logger.debug(conn.eval('source("barplot_NCI60.R")'))
   
   mail = EmailMessage('NCI60 Sensitivity Predictions',
   """Dear User,

   Thank you for using our service.
   Here are the (i) predicted pGI50 values, and
   (ii) the 70, 80 and 90% confidence intervals calculated with conformal prediction
   for your input molecules.
   
   In addition, you will find a pdf displaying the bioactivity profile of each input molecule across the NCI60 panel.

   Kind regards
   Cancer Cell Line Profiler team""", 'CancerCellLineProfiler', [email_address])
   mail.attach_file('./NCI60/predictions_NCI60.csv')
   mail.attach_file('./NCI60/predicted_profiles_NCI60.pdf')
   mail.send()

#################################################################################################   
   # 9. Remove generated files
#################################################################################################   
   import os, os.path
   if os.path.exists('./NCI60/predictions_NCI60.csv'):
       os.remove('./NCI60/predictions_NCI60.csv')
コード例 #43
0
ファイル: views.py プロジェクト: alanpoon/rgis
def gwr_plot(request):

    if request.method == "POST":

        data = json.loads(request.body)

        shapefile_filename = data['namespace']
        dependent = data['dependent']
        independent = data['independent']

        # get the filepath to this shapefile
        shapefile_object = Shapefile.objects.get(name=shapefile_filename)

        # get the path to shapefile
        shapefile_file = shapefile_object.get_full_path() + "projected"

        # based on the dependent and independent variables, prepare the formula
        prepared_formula = dependent + " ~ "

        for variable in independent:
            prepared_formula = prepared_formula + variable + " + "

        # remove the last +
        prepared_formula = prepared_formula[:-3]

        # get a connection to rserve
        conn = pyRserve.connect()

        # prepare the r function
        functionFile = open(settings.BASE_DIR + '/fileupload/plotGWR.r')
        ##windows path
        # functionFile = open(settings.BASE_DIR + '\\fileupload\\plotGWR.r')
        functionContent = functionFile.read()

        conn.voidEval(functionContent)

        # set the file path for the output shapefile
        output_path = os.path.join(settings.BASE_DIR, 'gwroutputs')
        ##windows path
        # output_path = settings.BASE_DIR + '\\gwroutputs\\'
        output_name = shapefile_filename + str(uuid.uuid4()).replace("-", "")[:10]

        try:
            # get the statistics of this particular input
            # this function will also create a shapefile
            function_output = conn.r.gwr_function(shapefile_file, prepared_formula, output_path, output_name)
            # print 'variables'
            # print list(function_output['variables'])
            # print 'significance'
            # print function_output['significance']
            # print 'variance_inflation_factors'
            # print function_output['variance_inflation_factors']
            variables = list(function_output)

        except:
            message = "error running function in r"
            return HttpResponse(json.dumps({"status":"error", "message":message}), content_type="application/json")
        finally:
            conn.close();

        # convert to geojson
        source_filename = os.path.join(output_path, output_name + ".shp")
        output_filename = os.path.join(output_path, output_name + ".geojson")
        print subprocess.call("ogr2ogr -f GeoJSON -s_srs EPSG:4326 -t_srs EPSG:4326 " + output_filename + " " + source_filename)

        # output the geojson
        with open (os.path.join(output_path, output_name + ".geojson"), "rb") as geojsonfile:
            outputgeojson = json.loads(geojsonfile.read().replace('\n', ''))
            geojsonfile.close()
        # prepare response
        response = {}
        response['variables'] = variables
        response['outputgeojson'] = outputgeojson

        return HttpResponse(json.dumps(response), content_type="application/json")

    else:
        return HttpResponse("coming soon")
コード例 #44
0
ファイル: views.py プロジェクト: alanpoon/rgis
def kde_function(request):

    if request.method == "POST":

        data = json.loads(request.body)

        point_filename = data['point']
        window_filename = data['window']
        bandwidth = float(data['bandwidth'])

        # get the relevant files
        point_object = Shapefile.objects.get(name=point_filename)
        window_object = Shapefile.objects.get(name=window_filename)

        conn = pyRserve.connect()

        # read the shapefile
        window_filename = window_object.get_full_path() + "projected"
        point_filename = point_object.get_full_path() + "projected"
        # print window_filename

        # load the function
        functionFile = open(settings.BASE_DIR + '/fileupload/kdefunction.r')
        ##windows path
        # functionFile = open(settings.BASE_DIR + '\\fileupload\\kdefunction.r')
        print functionFile
        functionContent = functionFile.read()

        conn.voidEval(functionContent)

        # output_path = settings.BASE_DIR + '/kdeoutputs/'
        ##windows path
        output_path = os.path.join(settings.BASE_DIR,'kdeoutputs')
        output_name = str(uuid.uuid4()).replace("-", "")

        print output_path
        print output_name

        try:
            # note that KDE function only returns the status
            # it creates the shapefile of contour lines
            resultsJson = conn.r.KDE_function(window_filename, point_filename, bandwidth, output_path, output_name)

            # convert to geojson
            source_filename = os.path.join(output_path, output_name + ".shp")
            output_filename = os.path.join(output_path, output_name + ".geojson")
            print subprocess.call("ogr2ogr -f GeoJSON -s_srs EPSG:4326 -t_srs EPSG:4326 " + output_filename + " " + source_filename)

            # output the geojson
            with open (os.path.join(output_path,output_name + ".geojson"), "rb") as geojsonfile:
                outputgeojson = json.loads(geojsonfile.read().replace('\n', ''))
                geojsonfile.close()

        except:
            message = "error running kde function in r"
            return HttpResponse(json.dumps({"status":"error", "message":message}), content_type="application/json")
        finally:
            conn.close()

        response = json.dumps(outputgeojson, indent=2)

        finalresponse = HttpResponse(response, content_type="application/json")

        finalresponse["Access-Control-Allow-Origin"] = "*"
        finalresponse["Access-Control-Allow-Methods"] = "POST, GET, OPTIONS"
        finalresponse["Access-Control-Max-Age"] = "1000"
        finalresponse["Access-Control-Allow-Headers"] = "*"

        return finalresponse

    else:
        form = KfunctionKDEInitializeForm() # an empty, unbound form

        return render_to_response(
            'kfunction_kde.html',
            {'form': form},
            context_instance=RequestContext(request)
        )
コード例 #45
0
ファイル: RSession.py プロジェクト: MatthewASimonson/r-orange
        #print "made connection shuttin down port to reopen"
        #s.shutdown(socket.SHUT_RDWR)
        #s.close()
        #validConnection = True
    #except Exception as inst:
        #print str(inst)
        #i+=1
i = getOpenPort()

print "Starting R with port %s" % str(i)
startRserve(i)
import time
con = None
while con==None:
    try:
        con = pyRserve.connect(host = 'localhost', port=i)
    except:
        print "R connection not active"
        time.sleep(1)
#print 'done importing conversion'
import redRLog
#print 'Rsession loaded'
# import redRi18n

mutex = QMutex()


def assign(name, object):
    try:
        rpy.r.assign(name, object)
        redRLog.log(redRLog.R, redRLog.DEBUG, _('Assigned object to %s') % name)
コード例 #46
0
ファイル: utilities.py プロジェクト: wonaya/HiCExplorer
def fitNegBinom_Rserve(countsByDistance,
                       plot_distribution=False,
                       per_chr=False):
    """
    Fits a negative binomial distribution to the counts
    found at each different distance.

    The fitting is attempted first using a python method, and
    if this fails R is used through Rserve.

    For the fitting, the outliers are removed. Outliers are
    defined as those having a z-score higher than 3.4. This
    number as defined after exploring different values
    of z-scores and estimating the best goodness of fit.
    The HiC data is expected to contain outliers but they
    are problematic to fit and test the goodness of fit of a
    distribution, that's why there are removed.
    """

    # if the counts are per chromosome,
    # use the function recursively
    if per_chr:
        size = {}
        prob = {}
        for chrom in countsByDistance.keys():
            sys.stderr.write('computing negative binomial for '
                             '{}\n'.format(chrom))
            size[chrom], prob[chrom] = \
                fitNegBinom_Rserve(countsByDistance[chrom],
                                   plot_distribution=plot_distribution)
        return size, prob

    import pyRserve
    import matplotlib.pyplot as plt
    try:
        conn = pyRserve.connect()
        conn.r('library("MASS")')

    except:
        print "Could not connect to Rserve. Check that Rserve is up and running"
        exit(1)
    size = {}
    mu = {}
    prob = {}
    pval = {}
    good = 0
    bad = 0

    for dist in np.sort(countsByDistance.keys()):
        if dist == -1:  # skip intra chromosomal counts
            continue
        size[dist] = np.nan
        mu[dist] = np.nan
        prob[dist] = np.nan
        if sum(countsByDistance[dist]) == 0.0:
            print "no counts for bins at distance {}".format(dist)
            continue
        if np.any(np.isnan(countsByDistance[dist])) is True:
            exit("ERROR: matrix contains NaN values\n")

        counts = remove_outliers(countsByDistance[dist])
        if len(counts) <= 20:
            continue
        # the values in countsByDistance of a corrected matrix
        # are float values, but integers are needed for
        # the negative binomial fitting in R.
        counts_int = np.round(counts).astype('int')

        # try first using the python fit for the
        # negative binomial
        try:
            size[dist], prob[dist] = fit_nbinom(counts)
        except ValueError:
            # try with R..
            try:
                res = conn.r.fitdistr(counts_int, 'negative binomial')
            except:
                continue
            size[dist] = res[0]['size']
            mu[dist] = res[0]['mu']

            if np.isnan(size[dist]) or np.isnan(mu[dist]):
                sys.stderr.write("for dist={}, size={}, mu={}, "
                                 "len={}\n".format(dist, size[dist], mu[dist],
                                                   len(counts)))
                continue

            # The output from 'fitdistr' are size and mu.
            # but the scipy function that is based on the negative binomial
            # needs size and probability as parameters. However,
            # prob = size / ( size + mu )
            prob[dist] = size[dist] / (size[dist] + mu[dist])

        sys.stderr.write(".")  # print a . to show progress

        # evaluate fit of the counts distribution with respect to
        # the negative binomial  distribution using the parameters
        # returned by R
        fitted_dist = scipy.stats.nbinom.rvs(size[dist],
                                             prob[dist],
                                             size=len(counts) * 2)
        pval[dist] = scipy.stats.ks_2samp(counts_int, fitted_dist)[1]

        #        pval[dist] = scipy.stats.wilcoxon(counts, fitted_dist)[1]
        if pval[dist] < 0.01:
            bad += 1
            sys.stderr.write(
                "\nThe fit p-value {} for {} is too low to consider "
                "the distribution negative binomial".format(pval[dist], dist))
        else:
            good += 1

        if (plot_distribution and dist
                in [50000] + range(0, max(countsByDistance.keys()), 1000000)):
            # actual and fitted distributions are plotted
            # next to each other

            diff = counts.max() - counts.min()
            if diff >= 1000:
                nbins = 50
            elif 1000 > diff >= 100:
                nbins = 30
            elif 100 > diff >= 50:
                nbins = diff / 2
            else:
                nbins = (counts.max() - counts.min())
            freq, bins = np.histogram(counts.astype(int), nbins, normed=True)
            plt.hist(counts, bins, linewidth=0.1, alpha=0.8, normed=True)
            # plt.hist(fitted_dist, bins, histtype='step', linestyle='solid',
            #          linewidth=1.5, color='black', normed=True)
            pdf_fitted = scipy.stats.nbinom.pmf(bins.astype('int'), size[dist],
                                                prob[dist])
            plt.plot(bins.astype(int),
                     pdf_fitted,
                     label='fitted nbinom gf={:.3f}'.format(pval[dist]))

            fig_name = '/tmp/fitt_{}_{}.png'.format('nbinom', dist)
            plt.title('{} bp; size: {}, prob: {}'.format(
                dist, size[dist], prob[dist]))
            plt.ylim(0, np.max(freq) + np.max(freq) * 0.2)
            plt.legend()
            plt.savefig(fig_name, dpi=200)
            plt.close()
            sys.stderr.write("check {}".format(fig_name))

    sys.stderr.write("good {}, bad {}\n".format(good, bad))

    return size, prob
コード例 #47
0
ファイル: RfromPY.py プロジェクト: CfTO/hackbikeshareTO
import pyRserve

rcmd = pyRserve.connect(host='localhost', port=6311)



print(rcmd('rnorm(20, mean=2, sd=0.1)'))

rcmd('b <- c(1,3,5,7,9)')
print(rcmd('b'))

rcmd('a <- c("COL1","COL1","COL1","COL2","COL2","COL2","COL3","COL3")')
rcmd('b <- c("Item1","Item1","Item2","Item2","Item3","Item3","Item3","Item3")')
rcmd('results <- table(a,b)')
print(rcmd('results'))

rcmd('x <- seq(-20,20,by=.5)')
rcmd('y <- dt(x,df=10)')
rcmd('plot(x,y)')

This is the output (the graph is plotted in R window and not shown here)
[ 2.02055894  2.05137019  1.97653928  1.99654565  2.08948691  2.07250623
  1.95475797  2.11145948  1.97653835  2.01341228  2.05299939  2.14354837
  2.06876532  1.94614396  1.9924665   2.08839507  1.87483786  2.08817775
  1.97000129  2.26570712]
[ 1.  3.  5.  7.  9.]
[[2 0 0]
 [1 1 0]
 [0 2 2]]
コード例 #48
0
# -*- coding: utf-8 -*-
# 启动RServe http://blog.fens.me/r-rserve-server/
import pyRserve
conn = pyRserve.connect()
conn.eval('''source('test.R')''')
conn.r.testLoadRecord('600016')


コード例 #49
0
def checkJobResult(client_ip, volunteer_session, jobId, RData_output,
                   RData_input):
    mid = volunteerAuth.volunteer_sessions[volunteer_session]["mid"]
    machines = None

    #####Validate if this machine was assigned for the job
    try:
        cur.execute("""SELECT mid,status FROM machine_job WHERE jobId=%s""" %
                    jobId)
        machines = cur.fetchall()
    except:
        return "Error getting machines for the job " + str(jobId)

    if machines == None:
        return "There is no volunteers assigned to job " + str(jobId)

    legitMachine = False
    for entry in machines:
        if entry[0] == mid:
            legitMachine = True
            break

    if not legitMachine:
        return "The machine" + mid + " was not assigned to job with id:" + str(
            jobId)

    ####Get the number of machines assigned for the job
    quorum_machines = len(machines)

    ###Get validated jobs for this job ID

    machines_for_job[jobId][mid] = {
        "mid": mid,
        "status": "Computing",
        "vars": None,
        "filename": None
    }

    ###Collect the jobs already checked
    checked_jobs_machines = []
    for machineID in machines_for_job[jobId]:
        machine = machines_for_job[jobId][machineID]
        if machine["status"] == "Error" or machine[
                "status"] == "Wrong" or machine["status"] == "Success":
            checked_jobs_machines.append(machine)

    #Verify RData_output
    #If there is no RData_Output then there was an execution error
    if RData_output == None:
        try:
            query = "UPDATE machine_job SET status = 'Error' WHERE mid = " + str(
                volunteerAuth.volunteer_sessions[volunteer_session]
                ["mid"]) + " AND jobId =" + str(jobId)
            cur.execute(query)
            con.commit()
        except:
            return "Error executing query: " + query
            con.rollback()

        if quorum_machines == 1:
            try:
                query = "UPDATE job SET Status = 'Error' WHERE jobId = " + str(
                    jobId)
                cur.execute(query)
                con.commit()

            except:
                print "Could not execute query: " + query
                con.rollback()

        machines_for_job[jobId][mid]["status"] = "Error"
        checked_jobs_machines.append(machines_for_job[jobId][mid])

        if quorum_machines == len(
                checked_jobs_machines) and quorum_machines > 1:
            ##majority report
            majorityReport(checked_jobs_machines)

        ###Release the machine to receive more jobs
        volunteerAuth.volunteer_sessions[volunteer_session]["State"] = "FREE"
        return

    ## IF there is an output RDATA then start an Rserve session to validate the result

    #connect to R
    try:
        conn = pyRserve.connect()
    except:
        print "RServe not running... execute Rserve"
        return

    path = conn.eval('getwd()')
    filename = str(path) + "/" + str(jobId) + "_" + str(mid) + "_output.RData"
    handle = open(filename, "wb")

    handle.write(RData_output.data)
    handle.close()

    ##Clean all variables from R environment
    conn.eval("rm(list=ls())")

    ###Run the file with the R code
    conn.voidEval('load("' + filename + '")')

    ### Extract value from the quiz variable
    quiz = conn.eval("quiz")

    #Job validation
    #criteria1 - validate the quiz variable

    machines_for_job[jobId][mid]["filename"] = filename

    ### Get the expected output for the quiz
    try:
        query = """SELECT output FROM market_quiz INNER JOIN job_quiz ON market_quiz.input=job_quiz.input WHERE jobId=""" + str(
            jobId)
        cur.execute(query)
        quiz_output = cur.fetchone()[0]

    except:
        return "Error executing query: " + query

    ### compare the quiz with the expected quiz result
    if quiz == quiz_output:
        criteria1 = True
    else:
        criteria1 = False

    #criteria2 - validate if the variables updated/created are available on output.RData
    #This method can generate false positives because the test pass if no new variables were created in this job computation
    #However, this method does not generate false negatives because if the expected variables are not in the RData file so the job
    #execution is corrupted or failed

    variables = conn.eval(
        "ls()[!sapply(ls(), function(x) is.function(get(x)))]")

    if (set(jobBuffer[jobId]["vars"]).issubset(set(variables))):
        criteria2 = True
    else:
        criteria2 = False

    #del jobBuffer[jobId]
    vars = dict()
    for var in jobBuffer[jobId]["vars"]:
        vars[var] = conn.eval(var)

    machines_for_job[jobId][mid]["vars"] = vars

    ###If both criterias are granted then the job was computed successfully
    if criteria1 and criteria2:
        print "The computed job with the id " + str(
            jobId) + " was successfully validated!"

        try:
            query = """SELECT InitTime FROM job WHERE jobId= """ + str(jobId)
            cur.execute(query)
            initTime = cur.fetchone()[0]

        except:
            print "error with query: " + query

        try:
            execTime = time.time() - initTime
            query = "UPDATE job SET ExecTime = " + str(
                execTime) + " WHERE jobId = " + str(jobId)
            cur.execute(query)
            con.commit()

        except:
            print "Could not execute query: " + query
            con.rollback()

        try:
            query = "UPDATE machine_job SET status = 'Success' WHERE mid = " + str(
                mid) + " AND jobId =" + str(jobId)
            cur.execute(query)
            con.commit()

        except:
            print "Could not execute query: " + query
            con.rollback()

        if quorum_machines == 1:
            try:
                update_credibility(cur, con, mid, "Success")
                query = "UPDATE job SET Status = 'Success' WHERE jobId = " + str(
                    jobId)
                cur.execute(query)
                con.commit()

            except:
                print "Could not execute query: " + query
                con.rollback()

        machines_for_job[jobId][mid]["status"] = "Success"
        checked_jobs_machines.append(machines_for_job[jobId][mid])

        if quorum_machines == len(
                checked_jobs_machines) and quorum_machines > 1:
            ##majority report
            checked_jobs_machines = majorityReport(checked_jobs_machines)

    else:

        #the volunteer computed a wrong result
        # scj is now set to 1
        # credibility is updated with the new scj

        try:
            query = "UPDATE machine_job SET status = 'Wrong' WHERE mid = " + str(
                mid) + " AND jobId =" + str(jobId)
            cur.execute(query)
            con.commit()
        except:
            print "Could not execute query: " + query
            con.rollback()

        if quorum_machines == 1:
            try:
                update_credibility(cur, con, mid, "Wrong")
                query = "UPDATE job SET Status = 'Wrong' WHERE jobId = " + str(
                    jobId)
                cur.execute(query)
                con.commit()
            except:
                print "Could not execute query: " + query
                con.rollback()

        machines_for_job[jobId][mid]["status"] = "Wrong"
        checked_jobs_machines.append(machines_for_job[jobId][mid])

        if quorum_machines == len(
                checked_jobs_machines) and quorum_machines > 1:
            ##majority report
            checked_jobs_machines = majorityReport(checked_jobs_machines)

    conn.close()
    if conn.isClosed:
        print "Rserve connection is closed"

    if quorum_machines == 1:
        try:
            query = "UPDATE job SET RDataPath = '" + str(
                machine["filename"]) + "' WHERE jobId = " + str(jobId)
            cur.execute(query)
            con.commit()

        except:
            print "Could not execute query: " + query
            con.rollback()

    #update volunteer state to FREE
    volunteerAuth.volunteer_sessions[volunteer_session]["State"] = "FREE"
コード例 #50
0
ファイル: __init__.py プロジェクト: jehoons/sbie_weinberg
def open_rserv():
    global conn 
    conn = pyRserve.connect()
コード例 #51
0
ファイル: utilities.py プロジェクト: wonaya/HiCExplorer
def fitDistribution(countsByDistance, distribution, plot_distribution=False):
    """
    Generic method to fit continuous
    distributions to the  HiC countsByDistance
    The distribution names are the ones supported
    by scipy.
    """
    mu = {}
    sigma = {}
    pval = {}
    good = 0
    bad = 0
    good_nb = 0
    bad_nb = 0
    import pyRserve
    try:
        conn = pyRserve.connect()
        conn.r('library("MASS")')
    except:
        print "Could not connect to Rserve. Check that Rserve is up and running"
        exit(1)

    import sys
    for distnc in np.sort(countsByDistance.keys()):
        if distnc == -1:  # skip intra chromosomal counts
            continue
        if sum(countsByDistance[distnc]) == 0.0:
            print "no counts for bins at distance {}".format(distnc)
            continue
        if len(countsByDistance[distnc]) <= 2:
            continue
        sys.stderr.write('.')
        # TEMP code to compare with negative binomial ###

        # the values in countsByDistance of a corrected matrix
        # are float values, but integers are needed for
        # the negative binomial.

        counts_nb = remove_outliers(
            np.round(countsByDistance[distnc]).astype('int'))

        # try first using the python fit for the
        # negative binomial
        try:
            size, prob = fit_nbinom(remove_outliers(countsByDistance[distnc]))
        except ValueError:
            # try with R..
            res = conn.r.fitdistr(counts_nb, 'negative binomial')
            size = res[0]['size']
            mu_ = res[0]['mu']

            if np.isnan(size) or np.isnan(mu_):
                print "for dist={}, size={}, mu={}, len={}".format(
                    distnc, size, mu_, len(counts_nb))
                continue

            prob = size / (size + mu_)
        nbin = scipy.stats.nbinom(size, prob)
        #####

        counts = remove_outliers(countsByDistance[distnc])
        counts[counts == 0] = 0.01
        dist = getattr(scipy.stats, distribution)
        param = dist.fit(counts, floc=0)
        if np.any(np.isnan(param)):
            sys.stderr.write('\n{} no params computed'.format(distnc))
            import ipdb
            ipdb.set_trace()
        mu[distnc] = param[-1]
        sigma[distnc] = param[0]

        # estimate the goodness of fit pvalue
        fitted_dist = dist.rvs(*param[:-2],
                               loc=param[-2],
                               scale=param[-1],
                               size=len(counts) * 2)
        pval[distnc] = scipy.stats.ks_2samp(counts, fitted_dist)[1]
        fitted_dist_nb = scipy.stats.nbinom.rvs(size,
                                                prob,
                                                size=len(counts_nb) * 2)

        pval_nb = scipy.stats.ks_2samp(counts_nb, fitted_dist_nb)[1]
        if pval[distnc] < 0.01:
            bad += 1
        else:
            good += 1

        if pval_nb < 0.01:
            bad_nb += 1
        else:
            good_nb += 1
        if pval[distnc] < 0.01:
            sys.stderr.write("\nproblem with {}, p-value for "
                             "{} fit: {} (NB fit: {})".format(
                                 distnc, distribution, pval[distnc], pval_nb))

        if (plot_distribution and distnc in range(
                50000, max(countsByDistance.keys()), 500000)):

            import matplotlib.pyplot as plt
            freq, bins = np.histogram(counts, 30, normed=True)
            plt.close()  # to avoid overlaps
            plt.hist(counts, bins, linewidth=0.1, alpha=0.8, normed=True)
            #            plt.hist(fitted_dist, bins, histtype='step', linestyle='solid',
            #                      linewidth=1.5, color='black', normed=True)
            #            plt.hist(fitted_dist_nb, bins, histtype='step', linestyle='solid',
            #                      linewidth=1.5, color='grey', normed=True)
            pdf_fitted = dist.pdf(bins,
                                  *param[:-2],
                                  loc=param[-2],
                                  scale=param[-1])
            ##
            plt.plot(bins.astype(int),
                     nbin.pmf(bins.astype('int')),
                     label='NB {:.2f}'.format(pval_nb))
            ##
            plt.plot(bins,
                     pdf_fitted,
                     label='{} {:.2f}'.format(distribution, pval[distnc]))
            fig_name = '/tmp/fitt_{}_{}.png'.format(distribution, distnc)
            plt.title('{} bp'.format(distnc))
            plt.ylim(0, np.max(freq) + np.max(freq) * 0.2)
            plt.legend()
            plt.savefig(fig_name, dpi=200)
            plt.close()
            print "check {}".format(fig_name)
    print "good {}, bad {}, good_nb {}, bad_nb {}".format(
        good, bad, good_nb, bad_nb)
    return (mu, sigma)
コード例 #52
0
def h_measure(true_class, probability):

    conn = pyRserve.connect()

    conn.voidEval('''
    relabel <- function(labels){
    
    
      if (length(levels(as.factor(labels)))==1){
        stop('Only one class is present in the dataset. Need both classes to be represented.')
      }
      if (length(levels(as.factor(labels)))>2){
        stop('More than two classes present, but code can only handle binary classification.')
      }
      
    
    
       labels <- as.factor(as.character(labels))
       input.labels <- levels(labels)
       
    
       cond.temp <- (	identical(input.labels,c('case','non-case')) |
                        identical(input.labels,c('Case','Non-case')) |
                        identical(input.labels,c('case','noncase'))  |
                        identical(input.labels,c('Case','Non-case')) )
    
       if (cond.temp) {
          levels(labels) <- c('1', '0')
          message('Class labels have been switched from (',
            paste(input.labels[1],input.labels[2], sep=','), ') to (', 
            paste('1', '0', sep=','), ')')   
            labels <- as.factor(labels)
            labels <- 2-as.numeric(labels) # turn into numeric array of 0s and 1s
       } else {
          levels(labels) <- c('0', '1')
          if (!(identical(input.labels,c('0', '1')))){
              message('Class labels have been switched from (',
                paste(input.labels[1],input.labels[2], sep=','), ') to (', 
                paste('0', '1', sep=','), ')')
                }
          labels <- as.factor(labels)
          labels <- as.numeric(labels)-1 # turn into numeric array of 0s and 1s
    
        } 
    
        return(labels)	
    }
    
    
    misclassCounts <- function(predicted.class,true.class){
     
        true.class <- as.array(true.class)
        predicted.class <- as.array(predicted.class)
        
        # make sure the same convention is employed for both true and predicted
    #    check <- relabel(c(true.class,predicted.class))
    #	l <- length(check)
    #	true.class <- check[1:(l/2)]
    #	predicted.class <- check[(l/2+1):l]
        
    
        TP <- sum(predicted.class == 1 & true.class == 1)
        FP <- sum(predicted.class == 1 & true.class == 0)
        TN <- sum(predicted.class == 0 & true.class == 0)
        FN <- sum(predicted.class == 0 & true.class == 1)
    
        conf.matrix <- data.frame(pred.1=c(TP,FP),pred.0=c(FN,TN))
        row.names(conf.matrix) <- c('actual.1','actual.0')
    
        ER <- (FP + FN)/(TP+FP+TN+FN)
    
        Sens <- TP/(TP+FN)
        Spec <- TN/(TN+FP)
    
        Precision <- TP/(TP+FP)
        Recall <- Sens 
    
        TPR <- Recall
        FPR <- 1-Spec
    
        F <- 2/(1/Precision+1/Sens)
        Youden <- Sens + Spec -1
        metrics <- data.frame(ER=ER,
            Sens=Sens,Spec=Spec,Precision=Precision,
            Recall=Recall, TPR=TPR, FPR=FPR, F=F, Youden=Youden)
        return(list(conf.matrix=conf.matrix,metrics=metrics))
    }
    
    HMeasure <- function(true.class, scores,
                         severity.ratio=NA, threshold=0.5, level=0.95
                         ){
    
    
      ####################
      ### INPUT CHECKS ###
      ####################
    
      # try to catch mistaken order of arguments
      if (is.matrix(true.class) || is.data.frame(true.class)){
        stop(	'True class should be a vector, not a matrix / data frame.  Consider the order of the arguments.'	)
      }
    
      # no missing values in the labels allowed
      if (any(is.na(true.class))){
        stop('Missing values in class labels are not allowed.')}
    
    
      # relabel, and make sure there are only 2 class labels
      true.class <- relabel(true.class) 
      
      # row names can confuse and are otherwise useless - remove them
      rownames(scores) <- NULL
      rownames(true.class) <- NULL
     
      # turn scores into a data frame (if it were not one already)
      if (is.vector(scores)){
        scores <- as.data.frame(scores)
        # message('Scores coerced from vector to data frame')
        
      }
    
      if (is.matrix(scores)){
        n <- dim(scores)[1]
        k <- dim(scores)[2]
      
      # in the case of a matrix, throw a warning if columns (classifiers) > rows (data)
        if (n < k) {
          warning(gettextf(
            'Consider transposing score matrix: number of classifiers (columns) = %d exceeds number %d of datapoints (rows)', 
            k, n), domain = NA)
        }
    
        scores <- as.data.frame(scores)
        # message('Scores coerced from matrix to data frame')
        
      }
      
    
      
      if (dim(scores)[1]!=length(true.class)){
        stop('Label vector provided has different length than respective classifier scores')
      }
    
      # only look at complete cases in the score data frame
      if (any(is.na(scores))){
        warning(	'Missing entries detected in matrix of scores. Respective entries will be disregarded'	)
      }
      complete.rows <- complete.cases(scores)
      scores <- subset(scores,subset=complete.rows)
      true.class <- subset(true.class,subset=complete.rows)
      rownames(scores) <- NULL
      rownames(true.class) <- NULL
    
      # now that format is correct, get sample size and number of classifiers  
      n <- dim(scores)[1]
      k <- dim(scores)[2]
         
       
         
      # THRESHOLD - if only one value for the threshold has been provided
      # (e.g., the default of 0.5), use the same for all classifiers
      # else check that the array of thresholds has one per classifier
      
      if (length(threshold) == 1){
        threshold <- rep(threshold,k)
      } else {  
        if (length(threshold)<k){
            warning(	'Threshold must either be a single value, or a vector of length equal to the number of classifiers employed. The default value of 0.5 will be used.')
        }
      }
    
      ############################
      ### INPUT CHECK COMPLETE ###
      ############################
    
      #############################################
      ### SINGLE CLASSIFIER FUNCTION DEFINITION ###
      #############################################
      # to keep the code tidy, we implement an internal function for a single classifier 
      HMeasure.single <- function(y, s, classifier.name=NULL,
                    severity.ratio=severity.ratio,
                    threshold=threshold, level=level	){
    
        # PROCESSING 
        n <- length(s)
    
        
        # this is a numeric version of the class labels
        n1 <- sum(y) 
        n0 <- n-n1
        pi0 <- n0/n
        pi1 <- n1/n
      
        # retrieve severity ratio - set to default if absent
        if (is.na(severity.ratio)){
          severity.ratio <- pi1/pi0
        } 
    
    
    
        # order data into increasing scores
        zord <- order(s)
        sc <- s[zord]
    
        # note: we make no assumptions about the range of s
    
        # COMPUTE ROC CURVE
    
        # Calculate raw ROC, replacing any tied sequences by a diagonal
        # Raw ROC starts at F0[1]=0, F1[1]=0, and ends at F0[K1]=1, F1[K1]=1.
        Get.Score.Distributions <- function(y,s,n1,n0){
          # tapply(y,s,sum) counts the instances of each unique score, and ranks them by score
          s1 <- unname(tapply(y, s, sum))/n1
          s1 <- c(0,s1,1-sum(s1)) # make sure to add the points 0,0 and 1,1
          s0 <- unname(tapply(1-y, s, sum))/n0
          s0 <- c(0,s0,1-sum(s0)) # make sure to add the points 0,0 and 1,1
          
          # number of unique scores
          S <- length(s1)
          # what were r0i and r1i in ML paper are now the empirical cdfs
          F1 <- cumsum(s1)
          F0 <- cumsum(s0)
          return(list(F1=F1,F0=F0,s1=s1,s0=s0,S=S))
        }
    
    
    
        out.scores <- Get.Score.Distributions(y=y,s=s,n1=n1,n0=n0)
        AUC <- 1- sum(out.scores$s0 * (out.scores$F1 - 0.5 * out.scores$s1))
        # if the AUC < .5, switch signs and repeat
        switched <- FALSE
        the.criterion <- AUC < 0.5
        if (the.criterion){
          switched <- TRUE      
          s <- 1-s
          out.scores <- Get.Score.Distributions(y,s,n1,n0)
          if (is.null(classifier.name)){
          warning('ROC curve mostly lying under the diagonal. Switching scores.', domain = NA)
        } else {
          warning(gettextf( 'ROC curve of %s mostly lying under the diagonal. Switching scores.',
            classifier.name), domain = NA)
        }
        }
        F1 <- out.scores$F1
        F0 <- out.scores$F0 
        s0 <- out.scores$s0
        s1 <- out.scores$s1
        S <- out.scores$S
    
    
        # get misclassification statistics
        misclass.out <- misclassCounts(as.numeric(s>threshold),true.class)
        misclass.metrics <- misclass.out$metrics
        temp <- misclass.out$conf.matrix
        misclass.conf <- data.frame(
            TP=temp[1,1], FP=temp[2,1],
            TN=temp[2,2], FN=temp[1,2])
                    
        
        # get aggregate statistics:
        AUC <- 1- sum(s0 * (F1 - 0.5 * s1)) # REPLACING TIED SCORES BY A DIAGONAL
        Gini <- 2*AUC - 1
        KS <- max(abs(F0 - F1))
        cost.parameter <- severity.ratio/(1+severity.ratio)
        MER <- min(pi0*(1-F0)+pi1*F1)
        MWL <- 2*min(cost.parameter*pi0*(1-F0)+(1-cost.parameter)*pi1*F1)
    
    
    
        Look.Up.AUC <- function(xcurve,ycurve,x=0){
          # assumes the curve is monotonic
          result <- NA
          if (all(diff(xcurve) >= 0)){
            ind <- which(xcurve-x>0)[1]
            x1 <- xcurve[ind-1]
            x2 <- xcurve[ind]
            y1 <- ycurve[ind-1]
            y2 <- ycurve[ind]
    
            if (x2-x1 > 0) {
              pos <- (x2-x)/(x2-x1)
              result <- (1-pos)*y1 + pos*y2         
            } else {result <- y2}
          }
          return(result)
        }
    
        SensFixed <- matrix(NA,1,length(level))
        SpecFixed <- matrix(NA,1,length(level))
        temp <- array(NA,length(level))
        for (l in 1:length(level)){
          SensFixed[l] <- c(Look.Up.AUC(F0,1-F1,x=level[l]))
          temp[l] <- paste('Sens.Spec',floor(level[l]*100),sep='')
        } 
        SensFixed <- as.data.frame(SensFixed)
        colnames(SensFixed) <- temp
    
        for (l in 1:length(level)){
          SpecFixed[l] <- Look.Up.AUC(F1,F0,x=1-level[l])
          temp[l] <- paste('Spec.Sens',floor(level[l]*100),sep='')
        }
        SpecFixed <- as.data.frame(SpecFixed)
        colnames(SpecFixed) <- temp
    
        # restrict to upper convex hull by considering ROC above diagonal only
        chull.points <- chull(1-F0,pmax(1-F1,1-F0))
        G0 <- 1-F0[chull.points]
        G1 <- 1-F1[chull.points] 
        hc <- length(chull.points)
        sG0 <- c(0,G0[c(2:length(G0))] - G0[c(1:(length(G0)-1))])
        sG1 <- c(0,G1[c(2:length(G1))] - G1[c(1:(length(G1)-1))])
        AUCH <- sum(sG0 * (G1 - 0.5 * sG1))
    
    
        # get sorted scoring densities
        s.class0 <- sort(s[y==0])
        s.class1 <- sort(s[y==1])
    
    
        # Calculate the LHshape1 value
        cost <- c(1:(hc+1))
        b0 <- c(1:hc+1)
        b1 <- c(1:hc+1)
        
        # extract shape
        if (severity.ratio > 0){
          shape1 <- 2
          shape2 <- 1+(shape1-1)*1/severity.ratio
        }
        if (severity.ratio < 0){
          shape1 <- pi1+1
          shape2 <- pi0+1
        }
        cost[1] <- 0
        cost[hc+1] <- 1
    
        b00 <- beta(shape1,shape2)
        b10 <- beta(1+shape1,shape2)
        b01 <- beta(shape1,1+shape2)
    
    
        b0[1] <-
          pbeta(cost[1], shape1=(1+shape1), shape2=shape2)*b10/b00
    
        b1[1] <-
          pbeta(cost[1], shape1=shape1, shape2=(1+shape2))*b01/b00
    
        b0[hc+1] <-
          pbeta(cost[hc+1], shape1=(1+shape1), shape2=shape2)*b10/b00
    
        b1[hc+1] <-
          pbeta(cost[hc+1], shape1=shape1, shape2=(1+shape2))*b01/b00
    
        ### NB: can become massively faster
        for (i in 2:hc){
          cost[i] <- pi1*(G1[i]-G1[i-1]) / 
          (pi0*(G0[i]-G0[i-1]) + pi1*(G1[i]-G1[i-1]))
    
          b0[i] <-
            pbeta(cost[i], shape1=(1+shape1), shape2=shape2)*b10/b00
    
          b1[i] <-
            pbeta(cost[i], shape1=shape1, shape2=(1+shape2))*b01/b00
        }
    
        LHshape1 <- 0
        for (i in 1:hc){
          LHshape1 <- LHshape1 + pi0*(1-G0[i])*(b0[(i+1)]-b0[i]) + pi1*G1[i]*(b1[(i+1)]-b1[i])
        }
    
        B0 <- 
          pbeta(pi1, shape1=(1+shape1), shape2=shape2)*b10/b00
    
        B1 <-
          pbeta(1, shape1=shape1, shape2=(1+shape2))*b01/b00 -
          pbeta(pi1, shape1=shape1, shape2=(1+shape2))*b01/b00
    
        H <- 1 - LHshape1/(pi0*B0 + pi1*B1)
    
        data <- list(F0=F0, F1=F1, G0=G0, G1=G1, cost=cost,
                     pi1=pi1, pi0=pi0, n0=n0, n1=n1, n=n, hc=hc,
                     s.class0=s.class0, s.class1=s.class1,
                     severity.ratio=severity.ratio)
    
        metrics <- data.frame(H=H, Gini=Gini, AUC=AUC, AUCH=AUCH, KS=KS, MER=MER, MWL=MWL)
        metrics <- cbind(metrics,SpecFixed,SensFixed)
        metrics <- cbind(metrics,misclass.metrics,misclass.conf)
    
        return(list(data=data,metrics=metrics))
      }
    
    
      #############################################
      ### SINGLE CLASSIFIER DEFINITION complete ###
      #############################################
    
    
      ######################################
      ### PROCESS CLASSIFIERS ONE BY ONE ###
      ######################################
    
        
      data <- list()
      for (count in 1:k){
        name.now <- colnames(scores)[count]
        s <- scores[,count]
        threshold.now <- threshold[count]
        output <- HMeasure.single(y=true.class, s=s, classifier.name=name.now,
                                  severity.ratio=severity.ratio, 
                                  threshold=threshold.now,level=level)
    
        if (count == 1){
          metrics <- output$metrics
        }
        if (count > 1){metrics <- rbind(metrics,output$metrics)}
        
        # retrieve data for plotting purposes
        data[[count]] <- output$data
            
      }
    
      # name the rows by classifier
      rownames(metrics) <- colnames(scores)
      # name the data output by classifier
      names(data) <- colnames(data)
    
      # construct output
      hmeasure <- list(metrics=metrics)
      attr(hmeasure,'data') <- data
      class(hmeasure) <- 'hmeasure'
    #  return(hmeasure)
    
      new.object <- unclass(hmeasure)
      return(as.data.frame(new.object$metrics))
    } ''')

    trueclass_string = str(tuple(true_class))
    scores_string = str(tuple(probability))

    trueclass = conn.eval('c' + trueclass_string)
    scores = conn.eval('c' + scores_string)

    results = conn.r.HMeasure(trueclass, scores)

    conn.close()
    return (results[0])
コード例 #53
0
ファイル: tasks.py プロジェクト: jrawbits/AccessR
def performModel(input_files,
                 tool_config,
                 client,
                 subtool_name=False):
    '''
    input_files is the set of data to analyze from the NMTK server
    tool_config is the "header" part of the input
    client is an object of type NMTK_apps.helpers.server_api.NMTKClient
    subtool_name is provided if the tool manages multiple configurations
    '''
    logger=performModel.get_logger()
    logger.debug("input_files: %s"%(input_files,))
    logger.debug("tool_config\n%s\n"%(tool_config,))

    # Use exception handling to generate "error" resulta -- everything that
    # doesn't generate good results should throw an exception Use the extra
    # 'with' syntax to ensure temporary files are promptly cleaned up after tool
    # execution.  With luck, the tool server will also do periodic garbage
    # collection on tools that don't pick up after themselves.

    # AccessR - Dispatch to subtools
    with Config.Job(input_files,tool_config) as job:
        try:
            job.setup()
            job.logger = logger  # in case we need it...
            job.tempfiles = []
            job.R = pyRserve.connect()
            if subtool_name in doSubTool:
                results = doSubTool[subtool_name](job,client)
                if results:
                    client.updateResults(result_field=results.get("field",None),
                                         units=results.get("units",None),
                                         result_file=results.get("result_file",None),
                                         files=results.get("files",None)
                                     )
                else:
                    raise Exception("No results returned from subtool '%s'"%(subtool_name,))
            else:
                raise Exception("SubTool not found: "+subtool_name)

        except Exception as e:
            # Every failure should result in an Exception
            # use job.fail to add additional failure messages before
            # raising the Exception, as illustrated here
            msg = 'Job failed.'
            logger.exception(msg)
            logger.exception(str(e))
            job.fail(msg)
            job.fail(str(e))
            client.updateResults(payload={'errors': job.failures },
                                 failure=True,
                                 files={}
                             )
        finally:
            if hasattr(job,"tempfiles"):
                if not hasattr(job,"R"): # low likelihood...
                    job.R = pyRserve.connect()
                for file in job.tempfiles:
                    job.R.r.unlink(file)
            if hasattr(job,"R")and job.R:
                job.R.close()
コード例 #54
0
ファイル: RModule.py プロジェクト: ckoryom/MiningGitHub
 def __init__(self):
     self.connection = pyRserve.connect()
     self.connection
     self.checkConnection(True)
コード例 #55
0
def connect():
    return R.connect()
コード例 #56
0
import sys

import numpy
import pyRserve
from flask import request, render_template, send_file
from PIL import Image

from . import main
from . import model

THREE_MONTH_AGO = '2017-02-01'
SIX_MONTH_AGO = '2016-11-01'
ONE_YEAR_AGO = '2016-05-01'
TWO_YEAR_AGO = '2015-05-01'

rConn = pyRserve.connect(host='localhost', port=6311)

model = model.Lottery()


@main.route('/histogram/')
def serve_diagram():
    duration = request.args.get('duration')

    d = None
    if duration == '3m':
        d = THREE_MONTH_AGO
    elif duration == '6m':
        d = SIX_MONTH_AGO
    elif duration == '1y':
        d = ONE_YEAR_AGO
コード例 #57
0
ファイル: tasks.py プロジェクト: jrawbits/Configurator
def performModel(input_files, tool_config, client, subtool_name=False):
    """
    input_files is the set of data to analyze from the NMTK server
    tool_config is the "header" part of the input
    client is an object of type NMTK_apps.helpers.server_api.NMTKClient
    subtool_name is provided if the tool manages multiple configurations
    """
    logger = performModel.get_logger()
    logger.debug("input_files: %s" % (input_files,))
    logger.debug("tool_config\n%s\n" % (tool_config,))

    # Use exception handling to generate "error" resulta -- everything that
    # doesn't generate good results should throw an exception Use the extra
    # 'with' syntax to ensure temporary files are promptly cleaned up after tool
    # execution.  With luck, the tool server will also do periodic garbage
    # collection on tools that don't pick up after themselves.

    # Prepare a connection to R
    R = None

    with Config.Job(input_files, tool_config) as job:

        try:
            # Initialize the job setup (cant do in __init__ as we would need to
            # try too hard)
            job.setup()

            # Set up a master directory of parameters
            parameters = {}
            compute = parameters["compute"] = {}
            raster = parameters["raster"] = {}
            image = parameters["image"] = {}

            ########################################
            # Computation (Python/R)
            compute_factors = job.getParameters("computation_params")
            logger.debug(compute_factors)

            #   Determine specific computational engines to use, if any
            computetype = compute["type"] = compute_factors.get("computetype", "None")

            #   Notify the user via a status update
            if computetype != "None":
                compute_R = compute["with_R"] = computetype in ["R", "Both"]
                compute_Python = compute["with_Python"] = computetype in ["Python", "Both"]

                if compute_R or compute_Python:
                    computemsg = "Computation will occur using"
                    if compute_R:
                        computemsg += " R"
                        if compute_Python:
                            computemsg += " and"
                    if compute_Python:
                        computemsg += " Python"
                else:
                    computemsg = "Computation will not occur"

                #   Determine parameter; default is to square it same as /tool_config
                compute["power"] = compute_factors.get("raisetopower", 2)

                #   Determine input (file/constant data) / we'll iterate later
                compute_file = job.getFeatures("computation")

                #   Determine what to return (result file)
                compute_output = job.getParameters("computation_output")
                compute["PythonName"] = compute_output.get("python_result", "PowerOfPython")
                compute["RName"] = compute_output.get("r_result", "PowerOfR")
            else:
                computemsg = "Computation was not requested."

            client.updateStatus(computemsg)

            ########################################
            # Rasterization (desired, input file provided, default to use instead)
            raster_factors = job.getParameters("rasterization_params")

            # Set up the default files
            default_vector_name = os.path.join(settings.STATIC_ROOT, "Configurator/Vector_Test.geojson")
            default_raster_file = os.path.join(settings.STATIC_ROOT, "Configurator/Raster_Test.tif")

            #   Check if rasterization was requested
            raster["do"] = raster_factors.get("dorasterize", 0)

            # Get the filename to rasterize, substituting in a default if no file is
            # provided.  We won't load the file data since we're just going to hand
            # the file path to R for processing.
            try:
                logger.debug("File: %s" % (job.datafile("rasterize")))
                raster["vectorfile"] = job.datafile("rasterize")  # the file name
            except Exception as e:  # No file provided, so we'll pull out the default
                logger.debug(str(e))
                logger.debug("Using default vector file for rasterizing")
                raster["vectorfile"] = default_vector_name

            # Pull the rastervalue from the job configuration.  We don't care if it's
            # a literal numeric value or a property name.  The R function to
            # rasterize the file will use the value as a constant if provided, or
            # will use a string as the name of a feature attribute to provide the
            # raster value for that feature.
            raster["value"] = 1
            raster_value_set = job.getParameters("rasterize")
            if "rastervalue" in raster_value_set:
                raster["value"] = raster_value_set.get("rastervalue", 1)
            else:
                raster["value"] = 1
            raster["x_dim"] = raster_value_set.get("raster_x", 300)
            raster["y_dim"] = raster_value_set.get("raster_y", 300)
            # raster["proportional"] = raster_value_set.get('proportional',0)
            # raster["smoothing"] = raster_value_set.get('smoothing',0)

            #   Set output format (Rdata-RDS, Erdas IMAGINE, geoTIFF)
            raster_output = job.getParameters("rasterization_output")
            raster["returnvector"] = raster_output.get("return_vector", 0)
            raster["format"] = raster_output.get("return_raster", "geoTIFF")
            rasterformat = RasterFormatTable.get(raster["format"], {})
            if not rasterformat:  # Did not request return of raster
                raster["returnraster"] = 0
                msg = "Invalid raster format %s" % (raster["format"],)
                client.UpdateStatus(msg)
                raster["rasterfile"] = ""
                raster["mimetype"] = ""
                raster["displayname"] = ""
                raster["savefunc"] = ""
                raster["loadfunc"] = ""
            else:
                raster["returnraster"] = 1
                rasterbasename = raster_output.get("raster_basename", "raster")
                if not raster["do"]:
                    raster["format"] = "geoTIFF"
                    rasterformat = RasterFormatTable.get(raster["format"], {})
                raster["rasterfile"] = os.tempnam() + rasterformat["extension"]
                raster["mimetype"] = rasterformat["mimetype"]
                raster["displayname"] = (
                    rasterbasename + rasterformat["extension"]
                )  # The name to offer when the raw raster is sent back
                if raster["do"]:
                    raster["savefunc"] = rasterformat["save"] % (
                        raster["rasterfile"],
                    )  # R Function to save a dataset to rastername in selected format
                else:
                    raster[
                        "savefunc"
                    ] = (
                        "savefunc<-function(obj){invisible(0)}"
                    )  # don't save over default file (shouldn't call, but just in case!)
                raster["loadfunc"] = rasterformat["load"] % (
                    raster["rasterfile"],
                )  # R Function to load a raster for plotting

            if raster["do"]:  # don't bother setting up unless rasterization requested
                client.updateStatus("Rasterization successfully configured.")
            else:
                client.updateStatus("Rasterization was not requested")

            ########################################
            # Image Generation (desired, output format)
            image_selection = job.getParameters("imaging_params")
            image["vector"] = image_selection.get("imagevector", 0)
            image["raster"] = image_selection.get("imageraster", 0)

            image_output = job.getParameters("image_output")
            image["format"] = image_output["imageformat"]
            imageformat = ImageFormatTable.get(image["format"][0:3], {})
            if not imageformat:
                # Unknown format, don't do images
                image["vector"] = image["raster"] = 0
                msg = "Invalid image format: %s (%s)" % (image["format"], image["format"][0:3])
                client.updateStatus(msg)
            if image["vector"] or image["raster"]:
                client.updateStatus("Imaging successfully configured.")
            else:
                client.updateStatus("Imaging was not requested.")

            client.updateStatus("Parameter & data file validation complete.")

            ###################################
            # Now perform the requested actions

            ###################################
            # Configuration Summary
            # Assemble an output file of what was configured (essentially for debugging)
            config_summary = StringIO.StringIO()
            dw = csv.DictWriter(config_summary, fieldnames=("Description", "Value"), extrasaction="ignore")
            dw.writeheader()
            for section in ["compute", "raster", "image"]:
                if section in parameters:
                    dw.writerow({"Description": "Section", "Value": section})
                    for description, value in parameters[section].iteritems():
                        dw.writerow({"Description": "Parameter-%s-%s" % (section, description), "Value": str(value)})
            del dw

            ###################################
            # Computation

            # Remember that all parameters, regardless of their stated type, arrive
            # in the tool as string representations (the promise is just that the
            # string will probably convert successfully to the tool_config type).
            # Thus all the computation code should perform idempotent conversions...
            if compute_Python:
                pyPower = decimal.Decimal(str(compute["power"]))
            if compute_R:
                if not R:
                    R = pyRserve.connect()
                else:
                    R.connect()
                R.r.rpower = compute["power"]  # R.r.r...
                # The JSON parser (used in displaying NMTK results) chokes on a NaN
                # returned directly from R because it doesn't recognize an unquoted
                # NaN as numeric and sees it as a string without quotes; We'll
                # account for that in the R function and return a string
                R.r(
                    """
                # Fun with R closure magic: convert the power from string to number
                # once then embed that in a function and return the function, which
                # we promptly call with the power to make the actual computational
                # function.  Note parenthetical priorities...

                compute <- (function(rp) {
                    rpower <- as.numeric(rp)
                    function(value) {
                        result <- as.numeric(value) ** rpower
                        if (is.nan(result)||is.na(result)) result<-"Nan-R"
                        result
                    }
                })(rpower)
                # Later, just call compute(value)
                """,
                    void=True,
                )

            for row in compute_file:  # Loop over the rows in the input file
                for field, value in row.iteritems():
                    if compute_Python:
                        try:
                            pyValue = decimal.Decimal(str(value))
                        except:
                            pyValue = decimal.Decimal.from_float(float("nan"))
                        if not pyValue.is_nan():
                            pyResult = pyValue ** pyPower
                        else:
                            pyResult = "NaN-Python"
                        compute_file.addResult(compute["PythonName"] + "_" + field, pyResult)
                    if compute_R:
                        Rresult = R.r.compute(value)
                        logger.debug(
                            "Computed R result for field %s, Result %s of value %s ** power %s"
                            % (field, Rresult, value, R.r.rpower)
                        )
                        compute_file.addResult(compute["RName"] + "_" + field, Rresult)
            if R:
                R.close()

            client.updateStatus("Done with computations")

            ###################################
            # Rasterization

            # If requested, take the input vector (either a supplied or default
            # file) and pass it through the R rasterization
            # If NOT requested, but imaging of a raster was presented, just
            # use the default raster from the world of static data

            if raster["do"]:
                if not R:
                    R = pyRserve.connect()
                else:
                    R.connect()
                R.r.vectorfile = raster["vectorfile"]  # File to rasterize
                # Note that output file is built into "savefunc"
                R.r.xdim = raster["x_dim"]  # Desired raster resolution, x and y
                R.r.ydim = raster["y_dim"]
                R.r.rastervalue = raster["value"]  # Value for raster cells,  either text/fieldname or numeric value
                R.r(raster["savefunc"])  # Load the function to save the raster in desired format
                # Actions:
                #   Load vector file
                #   Create extent from the file
                #   Create a blank raster with the right resolution (use default values)
                #   Rasterize the input file; raster.field can flexibly be a field name or a value
                #   Write it out in a suitable format for later plotting
                R.r(
                    """
                    require(rgdal)
                    require(sp)
                    require(raster)
                    input.file <- readOGR(vectorfile,layer="OGRGeoJSON")
                    e <- extent(input.file)
                    t <- raster(e,nrows=ydim,ncols=ydim)
                    rsa <- rasterize(input.file,t,field=rastervalue)
                    savefunc(rsa)
                    """,
                    void=True,
                )
                if R:
                    R.close()

            ###################################
            # Imaging

            # If requested, take either the vector, the rasterized result or both
            # and pass them through R

            # Image file is raster["vectorfile"]
            image["vectorplotfile"] = ""
            image["rasterplotfile"] = ""
            if image["vector"] or image["raster"]:
                if not R:
                    R = pyRserve.connect()
                else:
                    R.connect()
                # TODO: Include basic plot parameters (e.g title of what we're plotting)
                R.r.plotformat = imageformat["R-device"]  # Select R image output device
                R.r(
                    """
                plotfunc <- function(to.plot, outfile) {
                    plotdev <- get(plotformat)
                    plotdev(file=outfile)
                    plot(to.plot)
                    dev.off()
                }
                """,
                    void=True,
                )

                if image["vector"]:
                    try:
                        R.r.plotfile = raster["vectorfile"]
                        R.r.outfile = image["vectorplotfile"] = os.tempnam()
                        R.r(
                            """
                        library(sp)
                        library(rgdal)
                        to.plot <- readOGR(plotfile,layer="OGRGeoJSON")
                        plotfunc(to.plot,outfile)
                        """,
                            void=True,
                        )
                    except Exception as e:
                        logger.debug(str(e))
                        client.updateStatus("Imaging failure(vector): " + str(e))

                if image["raster"]:
                    try:
                        # Change to use RasterFormatTable Load function to obtain the to.plot dataset
                        R.r(raster["loadfunc"])  # install load function for raster in requested format
                        R.r.outfile = image["rasterplotfile"] = os.tempnam()
                        R.r(
                            """
                        library(raster)
                        to.plot <- loadfunc()
                        plotfunc(to.plot,outfile)
                        """,
                            void=True,
                        )
                    except Exception as e:
                        logger.debug(str(e))
                        client.updateStatus("Imaging failure(raster): " + str(e))
                if R:
                    R.close()

            ###################################
            # Prepare results
            outfiles = {}
            main_result = "summary"
            comp_result = "computations"
            vector_input = "vectorinput"
            raster_file = "rasterfile"
            vector_plot = "vectorplotfile"
            raster_plot = "rasterplotfile"

            # Result files are a dictionary with a key (the multi-part POST slug),
            # plus a 3-tuple consisting of the recommended file name, the file data,
            # and a MIME type
            outfiles[main_result] = ("summary.csv", config_summary.getvalue(), "text/csv")
            if compute_R or compute_Python:
                outfiles[comp_result] = (
                    "computation.%s" % (compute_file.extension,),
                    compute_file.getDataFile(),
                    compute_file.content_type,
                )

            if image["vectorplotfile"] or raster["do"] or image["rasterplotfile"]:
                # There really should always be an "R" in this case
                if not R:
                    R = pyRserve.connect()
                else:
                    R.connect()

            if raster["returnvector"]:
                try:
                    vecbase = open(raster["vectorfile"])
                    outfiles[vector_input] = ("vectorbase.geojson", vecbase.read(), "application/json")
                    client.updateStatus("Returning input vector file as geojson")
                    vecbase.close()
                except Exception as e:
                    logger.debug(str(e))
                    client.updateStatus("Return vector failure: " + str(e))
            if image["vectorplotfile"]:
                try:
                    vecimg = open(image["vectorplotfile"], "rb")
                    outfiles[vector_plot] = (
                        "vectorplot.%s" % (imageformat["extension"],),
                        vecimg.read(),
                        imageformat["mimetype"],
                    )
                    vecimg.close()
                    client.updateStatus("Removing temporary vector file: " + image["vectorplotfile"])
                    R.r.unlink(image["vectorplotfile"])  # Get R to unlink the temporary file so we have permission
                except Exception as e:
                    logger.debug(str(e))
                    client.updateStatus("Preparing vector image output file failed: " + str(e))
            if raster["returnraster"]:  # if we are expected to return a raster
                try:
                    rasterfile = open(raster["rasterfile"], "rb")
                    outfiles[raster_file] = (raster["displayname"], rasterfile.read(), raster["mimetype"])
                    rasterfile.close()
                except Exception as e:
                    logger.debug(str(e))
                    client.updateStatus("Preparing raw raster output file failed: " + str(e))
            if raster["do"]:  # clean up the temporary rasterization file (may have done this without return raw file)
                try:
                    client.updateStatus("Removing temporary raster file: " + raster["rasterfile"])
                    R.r.unlink(raster["rasterfile"])  # Get R to unlink the temporary file so we have permission
                except Exception as e:
                    logger.debug(str(e))
                    client.updateStatus("Preparing raw raster output file failed: " + str(e))
            if image["rasterplotfile"]:
                try:
                    rstimg = open(image["rasterplotfile"], "rb")
                    outfiles[raster_plot] = (
                        "rasterplot.%s" % (imageformat["extension"],),
                        rstimg.read(),
                        imageformat["mimetype"],
                    )
                    rstimg.close()
                    client.updateStatus("Removing temporary raster file: " + image["rasterplotfile"])
                    R.r.unlink(image["rasterplotfile"])  # Get R to unlink the temporary file so we have permission
                except Exception as e:
                    logger.debug(str(e))
                    client.updateStatus("Preparing raster image output file failed: " + str(e))

            if outfiles:
                client.updateResults(
                    result_field=None,  # Default field to thematize in result_file
                    units=None,  # Text legend describing the units of 'result_field'
                    result_file=main_result,  # Supply the file 'key' (see outfiles above)
                    files=outfiles,  # Dictionary of tuples providing result files
                )
            if R:
                R.close()

        except Exception as e:
            msg = "Job failed."
            logger.exception(msg)
            logger.exception(str(e))
            job.fail(msg)
            job.fail(str(e))
            client.updateResults(payload={"errors": job.failures}, failure=True, files={})

    # Clean up R after all is done (harmless if R is None, cleans up
    # connection to Rserve otherwise)
    del R