def _call_surveillance_algo(self, sts, detection_range): control = r.list( range=detection_range, k=self.threshold, ret=self.upperbound_statistic, maxUpperboundCases=self.max_upperbound_cases, ) surv = surveillance.outbreakP(sts, control=control) return surv
def _call_surveillance_algo(self, sts, detection_range): control = r.list( range=detection_range, b=self.years_back, m=self.window_half_width, alpha=self.alpha, ) surv = surveillance.algo_cdc(sts, control=control) return surv
def _call_surveillance_algo(self, sts, detection_range): control = r.list( range=detection_range, b=self.years_back, w=self.window_half_width, actY=self.include_recent_year, ) surv = surveillance.rki(sts, control=control) return surv
def _call_surveillance_algo(self, sts, detection_range): control = r.list( range=detection_range, method=self.method, baseline=self.baseline, minSigma=self.min_sigma, alpha=self.alpha, ) surv = surveillance.earsC(sts, control=control) return surv
def process(outf, dti_f, bval_f, python=False): """ Take a list of lists of files DTI and b-val files, returns a gzip R file with all B0 data arrays stored on it. """ if python: import collections b0s = collections.OrderedDict() for idx, scan in enumerate(bval_f): print scan basename = os.path.basename(scan) print basename bval = np.loadtxt(scan) bval[np.where(bval==np.min(bval))] = 0 im = nb.load(dti_f[idx]) b0_loc = np.where(bval==np.min(bval))[0][0] dti = im.get_data()[:,:,:,b0_loc] if python: b0s[basename] = np.ravel(dti) else: ro = numpy2ri(np.ravel(dti+1)) rr = robj.Matrix(ro) if idx is 0: myl = r.list(basename=rr) else: myl = r.c(myl, r.list(basename=rr)) if python: import pickle # write python dict to a file #mydict = {'a': 1, 'b': 2, 'c': 3} output = open(outf, 'wb') pickle.dump(b0s, output) output.close() # read python dict back from the file # pkl_file = open('myfile.pkl', 'rb') # mydict2 = pickle.load(pkl_file) # pkl_file.close() else: r.assign('bar', myl) r("save(bar, file='"+outf+"', compress=TRUE)")
def _call_surveillance_algo(self, disprog_obj, detection_range): control = r.list( range=detection_range, Mtilde=self.n_observations, noStates=self.n_hidden_states, trend=self.trend, noHarmonics=self.n_harmonics, covEffectEqual=self.equal_covariate_effects, ) surv = surveillance.algo_hmm(disprog_obj, control=control) return surv
def _call_surveillance_algo(self, disprog_obj, detection_range): control = r.list( range=detection_range, b=self.years_back, w=self.window_half_width, reweight=self.reweight, alpha=self.alpha, trend=self.trend, limit54=r.c(self.min_cases_in_past_periods, self.past_period_cutoff), powertrans=self.power_transform, ) surv = surveillance.algo_farrington(disprog_obj, control=control) return surv
def _call_surveillance_algo(self, sts, detection_range): control = r.list( **{ "range": detection_range, "c.ARL": self.glr_test_threshold, "m0": robjects.NULL, # Mtilde is set to 1, since that is the only valid value for "epi" and "intercept" "Mtilde": 1, "M": self.m, "change": self.change, # Role of theta: If NULL then the GLR scheme is used. If not NULL the prespecified value for κ or λ is used in a recursive LR scheme, which is faster.""" "theta": robjects.NULL, "dir": r.c(*self.direction), "ret": self.upperbound_statistic, }) surv = surveillance.glrpois(sts, control=control) return surv
def _call_surveillance_algo(self, sts, detection_range): control = r.list( range=detection_range, b=self.years_back, w=self.window_half_width, reweight=self.reweight, weightsThreshold=self.weights_threshold, alpha=self.alpha, trend=self.trend, trend_threshold=self.trend_threshold, limit54=r.c(self.min_cases_in_past_periods, self.past_period_cutoff), powertrans=self.power_transform, pastWeeksNotIncluded=self.past_weeks_not_included, thresholdMethod=self.threshold_method, ) surv = surveillance.farringtonFlexible(sts, control=control) return surv
def _call_surveillance_algo(self, sts, detection_range): control = r.list( **{ "range": detection_range, "c.ARL": self.glr_test_threshold, "m0": robjects.NULL, "alpha": self.alpha, # Mtilde is set to 1, since that is the only valid value for "epi" and "intercept" "Mtilde": 1, "M": self.m, "change": self.change, "theta": robjects.NULL, "dir": r.c(*self.direction), "ret": self.upperbound_statistic, "xMax": self.x_max, }) surv = surveillance.glrnb(sts, control=control) return surv
def _call_surveillance_algo(self, sts, detection_range): try: importr("INLA") except RRuntimeError: raise ImportError( "For the Boda algortihm to run you need the INLA package (http://www.r-inla.org/). " 'Install it by running install.packages("INLA", repos = c(getOption("repos"), INLA = "https://inla.r-inla-download.org/R/stable"), dep = TRUE) ' "in the R console.") control = r.list( **{ "range": detection_range, "X": robjects.NULL, "trend": self.trend, "season": self.season, "prior": self.prior, "alpha": self.alpha, "mc.munu": self.mc_munu, "mc.y": self.mc_y, "samplingMethod": self.sampling_method, "quantileMethod": self.quantile_method, }) surv = surveillance.boda(sts, control=control) return surv
def convert_genes_2_GO(genes_2_GO): "@return: An R object that can be used in to map genes to GO identifiers." return r.list(**genes_2_GO)
def main( argv = None ): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version = "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage = globals()["__doc__"] ) parser.add_option("-a", "--gtf-a", dest="gtf_a", type="string", help="supply a gtf file - will compress uncompressed files" ) parser.add_option("-b", "--gtf-b", dest = "gtf_b", type = "string", help="supply a second gtf file - will compress uncompressed files") parser.add_option("-s", "--scripts-dir", dest = "scripts_dir", type = "string", help="supply a location for accessory scripts") parser.add_option( "--no-venn", dest = "no_venn", action="store_true", help="set if no venn is to be drawn") ## add common options (-h/--help, ...) and parse command line (options, args) = E.Start( parser, argv = argv ) gtf_files = [options.gtf_a, options.gtf_b] merged_files = [] prefices = [] E.info("merging gtf files") for gtf in gtf_files: if gtf.endswith(".gtf.gz"): outfile = P.snip(gtf, ".gtf.gz") + ".merged.gtf.gz" prefices.append(P.snip(gtf, ".gtf.gz")) merged_files.append(outfile) statement = '''zcat %s | python %s/gtf2gtf.py --merge-transcripts --log=%s.log | gzip > %s''' % (gtf, options.scripts_dir, outfile, outfile) P.run() elif gtf.endswith(".gtf"): outfile = P.snip(gtf, ".gtf") + ".merged.gtf.gz" prefices.append(P.snip(gtf,".gtf")) merged_files.append(outfile) statement = '''cat %s | python %s/gtf2gtf.py --merge-transcripts --log=%s.log | gzip > %s''' % (gtf, options.scripts_dir, outfile, outfile) P.run() else: raise ValueError("cannot perform merge on %s: is not a gtf file" % gtf) for prefix in prefices: if options.gtf_a.find(prefix) != -1: gtf_a = prefix + ".merged.gtf.gz" prefix_a = prefix elif options.gtf_b.find(prefix) != -1: gtf_b = prefix + ".merged.gtf.gz" prefix_b = prefix E.info("intersecting gtf files") # intersect the resulting merged files scriptsdir = options.scripts_dir intersection_out = "_vs_".join([prefix_a, prefix_b]) + ".intersection.gtf.gz" statement = '''intersectBed -a %(gtf_a)s -b %(gtf_b)s -s -wa | python %(scriptsdir)s/gtf2gtf.py --merge-transcripts --log=log | gzip > %(intersection_out)s''' P.run() if not options.no_venn: E.info("producing venn diagram for %s vs %s..." % (options.gtf_a, options.gtf_b)) # produce the venn diagram intersection_file = intersection_out gtf_a_merged = gtf_a gtf_b_merged = gtf_b # create dictionary key gtf_pair = (gtf_a_merged, gtf_b_merged) # containers for counts count_gtf_merged_a = 0 count_gtf_merged_b = 0 count_intersection = 0 # create GTF iterator objects gtf_iterator_a = GTF.iterator(IOTools.openFile(gtf_pair[0])) gtf_iterator_b = GTF.iterator(IOTools.openFile(gtf_pair[1])) gtf_iterator_intersection = GTF.iterator(IOTools.openFile(intersection_file)) # do the counts for each file E.info("counting entries in %s" % gtf_a) for entry in gtf_iterator_a: count_gtf_merged_a += 1 print "counts for gtf-a: ",count_gtf_merged_a E.info("counting entries in %s" % gtf_b) for entry in gtf_iterator_b: count_gtf_merged_b += 1 print "counts for gtf-b: ",count_gtf_merged_b E.info("counting entries in %s" % intersection_file) for entry in gtf_iterator_intersection: count_intersection += 1 print "counts for intersection: ", count_intersection # this is the important bit - basically take an arbitrary list of numbers to represent the list of lincrna in the refnoncoding set # then use the intersection count to represent the overlapping section in the lincrna set and add a set of random numbers to this # set to make up the remaining - non-overlapping set result = {} E.info("assembling count lists") result[gtf_pair] = {"gtf-b" : map(str,xrange(count_gtf_merged_b)) , "gtf-a" : map(str,xrange(count_intersection)) + map(str, [random.random() for i in range(count_intersection,count_gtf_merged_a)] )} R_source = os.path.join(os.path.abspath(options.scripts_dir), "venn_diagram.R") R.source(R_source) prefix_a = prefix_a.replace(".", "_").replace("-", "_") prefix_b = prefix_b.replace(".", "_").replace("-", "_") R('''prefix.a <- "%s"''' % prefix_a) R('''prefix.b <- "%s"''' % prefix_b) E.info("drawing venn diagram to %s" % (prefix_a + "_vs_" + prefix_b + ".overlap.png")) R["venn.diagram2"](R.list( A = result[gtf_pair]["gtf-a"], B = result[gtf_pair]["gtf-b"]) , prefix_a + "_vs_" + prefix_b + ".overlap.png" , **{'cat.cex': 1.5 , 'main.fontfamily': "Arial" , 'cat.pos':FloatVector((0,0)) , 'cat.fontfamily':"Arial" , 'main.cex':1.8 , 'height':1000 , 'width':1000 , 'cex':2 , 'fontfamily':"Arial" , 'lwd':R.c(1,1) , 'fill':R.c(R.rgb(0,0,0.5,0.5), R.rgb(0.5,0,0,0.5)) , 'category.names':R.c(prefix_a, prefix_b) , 'margin' : R.c(0.1,0.1,0.1,0.1) }) ## write footer and output benchmark information. E.Stop()
# get reference window data for training print("\nGetting reference windows...\n") rw, tw = getRefWindow(1, 7, test_date) # move the dataframes over to R rw = rw.reshape((rw.shape[0], )) r_rw = ro.numpy2ri.py2ri(rw) r_tw = pandas2ri.py2ri(tw) #note: need to swap order of these later! #also: need to set N to proper length for each rw!! r_rwlist = r.list(N=780, x=r_rw) # convert R code into python... # train <- simulate(model, c(100,200), seed=1234, rand.emis=rnorm.hsmm) # # plot(train,xlim=c(0,100)) # init0 <- rep(1/J,J) # P0 <- matrix(1/J,nrow=J,ncol=J) # b0 <- list(mu=c(-3,1,3),sigma=c(1,1,1)) # startval <- hmmspec(init=init0, trans=P0,parms.emission=b0,dens.emission=dnorm.hsmm) # # h1 = hmmfit(train,startval,mstep=mstep.norm) # rstring=""" # function(rw_list){
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version= "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage=globals()["__doc__"]) parser.add_option( "-a", "--first-gtf-file", dest="gtf_a", type="string", help="supply a gtf file - will compress uncompressed files") parser.add_option( "-b", "--second-gtf-file", dest="gtf_b", type="string", help="supply a second gtf file - will compress uncompressed files") parser.add_option("-s", "--scripts-dir", dest="scripts_dir", type="string", help="supply a location for accessory scripts") parser.add_option("--no-venn", dest="no_venn", action="store_true", help="set if no venn is to be drawn") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) gtf_files = [options.gtf_a, options.gtf_b] merged_files = [] prefices = [] E.info("merging gtf files") for gtf in gtf_files: if gtf.endswith(".gtf.gz"): outfile = IOTools.snip(gtf, ".gtf.gz") + ".merged.gtf.gz" prefices.append(IOTools.snip(gtf, ".gtf.gz")) merged_files.append(outfile) statement = '''zcat %s | python %s/gtf2gtf.py --method=merge-transcripts --log=%s.log | gzip > %s''' % ( gtf, options.scripts_dir, outfile, outfile) P.execute(statement) elif gtf.endswith(".gtf"): outfile = IOTools.snip(gtf, ".gtf") + ".merged.gtf.gz" prefices.append(IOTools.snip(gtf, ".gtf")) merged_files.append(outfile) statement = '''cat %s | python %s/gtf2gtf.py --method=merge-transcripts --log=%s.log | gzip > %s''' % ( gtf, options.scripts_dir, outfile, outfile) E.execute(statement) else: raise ValueError("cannot perform merge on %s: is not a gtf file" % gtf) for prefix in prefices: if options.gtf_a.find(prefix) != -1: gtf_a = prefix + ".merged.gtf.gz" prefix_a = prefix elif options.gtf_b.find(prefix) != -1: gtf_b = prefix + ".merged.gtf.gz" prefix_b = prefix E.info("intersecting gtf files") # intersect the resulting merged files scriptsdir = options.scripts_dir intersection_out = "_vs_".join([prefix_a, prefix_b ]) + ".intersection.gtf.gz" statement = '''intersectBed -a %(gtf_a)s -b %(gtf_b)s -s -wa | python %(scriptsdir)s/gtf2gtf.py --method=merge-transcripts --log=log | gzip > %(intersection_out)s''' P.run() if not options.no_venn: E.info("producing venn diagram for %s vs %s..." % (options.gtf_a, options.gtf_b)) # produce the venn diagram intersection_file = intersection_out gtf_a_merged = gtf_a gtf_b_merged = gtf_b # create dictionary key gtf_pair = (gtf_a_merged, gtf_b_merged) # containers for counts count_gtf_merged_a = 0 count_gtf_merged_b = 0 count_intersection = 0 # create GTF iterator objects gtf_iterator_a = GTF.iterator(IOTools.openFile(gtf_pair[0])) gtf_iterator_b = GTF.iterator(IOTools.openFile(gtf_pair[1])) gtf_iterator_intersection = GTF.iterator( IOTools.openFile(intersection_file)) # do the counts for each file E.info("counting entries in %s" % gtf_a) for entry in gtf_iterator_a: count_gtf_merged_a += 1 print("counts for gtf-a: ", count_gtf_merged_a) E.info("counting entries in %s" % gtf_b) for entry in gtf_iterator_b: count_gtf_merged_b += 1 print("counts for gtf-b: ", count_gtf_merged_b) E.info("counting entries in %s" % intersection_file) for entry in gtf_iterator_intersection: count_intersection += 1 print("counts for intersection: ", count_intersection) # this is the important bit - basically take an arbitrary list of numbers to represent the list of lincrna in the refnoncoding set # then use the intersection count to represent the overlapping section in the lincrna set and add a set of random numbers to this # set to make up the remaining - non-overlapping set result = {} E.info("assembling count lists") result[gtf_pair] = { "gtf-b": list(map(str, range(count_gtf_merged_b))), "gtf-a": list(map(str, range(count_intersection))) + list( map(str, [ random.random() for i in range(count_intersection, count_gtf_merged_a) ])) } R_source = os.path.join(os.path.abspath(options.scripts_dir), "venn_diagram.R") R.source(R_source) prefix_a = prefix_a.replace(".", "_").replace("-", "_") prefix_b = prefix_b.replace(".", "_").replace("-", "_") R('''prefix.a <- "%s"''' % prefix_a) R('''prefix.b <- "%s"''' % prefix_b) E.info("drawing venn diagram to %s" % (prefix_a + "_vs_" + prefix_b + ".overlap.png")) R["venn.diagram2"](R.list(A=result[gtf_pair]["gtf-a"], B=result[gtf_pair]["gtf-b"]), prefix_a + "_vs_" + prefix_b + ".overlap.png", **{ 'cat.cex': 1.5, 'main.fontfamily': "Arial", 'cat.pos': FloatVector((0, 0)), 'cat.fontfamily': "Arial", 'main.cex': 1.8, 'height': 1000, 'width': 1000, 'cex': 2, 'fontfamily': "Arial", 'lwd': R.c(1, 1), 'fill': R.c(R.rgb(0, 0, 0.5, 0.5), R.rgb(0.5, 0, 0, 0.5)), 'category.names': R.c(prefix_a, prefix_b), 'margin': R.c(0.1, 0.1, 0.1, 0.1) }) # write footer and output benchmark information. E.Stop()
def dict_to_named_list(dct): if (isinstance(dct, dict) or isinstance(dct, Parameter) or isinstance(dct, pd.core.series.Series)): return r.list(**{key: val for key, val in dct.items()}) return dct