Beispiel #1
0
 def _call_surveillance_algo(self, sts, detection_range):
     control = r.list(
         range=detection_range,
         k=self.threshold,
         ret=self.upperbound_statistic,
         maxUpperboundCases=self.max_upperbound_cases,
     )
     surv = surveillance.outbreakP(sts, control=control)
     return surv
Beispiel #2
0
 def _call_surveillance_algo(self, sts, detection_range):
     control = r.list(
         range=detection_range,
         b=self.years_back,
         m=self.window_half_width,
         alpha=self.alpha,
     )
     surv = surveillance.algo_cdc(sts, control=control)
     return surv
Beispiel #3
0
    def _call_surveillance_algo(self, sts, detection_range):
        control = r.list(
            range=detection_range,
            b=self.years_back,
            w=self.window_half_width,
            actY=self.include_recent_year,
        )

        surv = surveillance.rki(sts, control=control)
        return surv
Beispiel #4
0
 def _call_surveillance_algo(self, sts, detection_range):
     control = r.list(
         range=detection_range,
         method=self.method,
         baseline=self.baseline,
         minSigma=self.min_sigma,
         alpha=self.alpha,
     )
     surv = surveillance.earsC(sts, control=control)
     return surv
def process(outf, dti_f, bval_f, python=False):
    """
    Take a list of lists of files DTI and b-val files, returns a
    gzip R file with all B0 data arrays stored on it.
    """
    if python:
        import collections
        b0s = collections.OrderedDict()

    for idx, scan in enumerate(bval_f):
        print scan
        basename = os.path.basename(scan)
        print basename
        bval = np.loadtxt(scan)
        bval[np.where(bval==np.min(bval))] = 0
        im = nb.load(dti_f[idx])
        b0_loc = np.where(bval==np.min(bval))[0][0]
        dti = im.get_data()[:,:,:,b0_loc]
        if python:
            b0s[basename] = np.ravel(dti)
        else:
            ro = numpy2ri(np.ravel(dti+1))
            rr = robj.Matrix(ro)
            if idx is 0:
                myl = r.list(basename=rr)
            else:
                myl = r.c(myl, r.list(basename=rr))
    if python:
        import pickle
        # write python dict to a file
        #mydict = {'a': 1, 'b': 2, 'c': 3}
        output = open(outf, 'wb')
        pickle.dump(b0s, output)
        output.close()

        # read python dict back from the file
        # pkl_file = open('myfile.pkl', 'rb')
        # mydict2 = pickle.load(pkl_file)
        # pkl_file.close()
    else:
        r.assign('bar', myl)
        r("save(bar, file='"+outf+"', compress=TRUE)")
Beispiel #6
0
 def _call_surveillance_algo(self, disprog_obj, detection_range):
     control = r.list(
         range=detection_range,
         Mtilde=self.n_observations,
         noStates=self.n_hidden_states,
         trend=self.trend,
         noHarmonics=self.n_harmonics,
         covEffectEqual=self.equal_covariate_effects,
     )
     surv = surveillance.algo_hmm(disprog_obj, control=control)
     return surv
Beispiel #7
0
    def _call_surveillance_algo(self, disprog_obj, detection_range):
        control = r.list(
            range=detection_range,
            b=self.years_back,
            w=self.window_half_width,
            reweight=self.reweight,
            alpha=self.alpha,
            trend=self.trend,
            limit54=r.c(self.min_cases_in_past_periods, self.past_period_cutoff),
            powertrans=self.power_transform,
        )

        surv = surveillance.algo_farrington(disprog_obj, control=control)
        return surv
Beispiel #8
0
    def _call_surveillance_algo(self, sts, detection_range):
        control = r.list(
            **{
                "range": detection_range,
                "c.ARL": self.glr_test_threshold,
                "m0": robjects.NULL,
                # Mtilde is set to 1, since that is the only valid value for "epi" and "intercept"
                "Mtilde": 1,
                "M": self.m,
                "change": self.change,
                # Role of theta: If NULL then the GLR scheme is used. If not NULL the prespecified value for κ or λ is used in a recursive LR scheme, which is faster."""
                "theta": robjects.NULL,
                "dir": r.c(*self.direction),
                "ret": self.upperbound_statistic,
            })

        surv = surveillance.glrpois(sts, control=control)
        return surv
Beispiel #9
0
    def _call_surveillance_algo(self, sts, detection_range):
        control = r.list(
            range=detection_range,
            b=self.years_back,
            w=self.window_half_width,
            reweight=self.reweight,
            weightsThreshold=self.weights_threshold,
            alpha=self.alpha,
            trend=self.trend,
            trend_threshold=self.trend_threshold,
            limit54=r.c(self.min_cases_in_past_periods, self.past_period_cutoff),
            powertrans=self.power_transform,
            pastWeeksNotIncluded=self.past_weeks_not_included,
            thresholdMethod=self.threshold_method,
        )

        surv = surveillance.farringtonFlexible(sts, control=control)
        return surv
Beispiel #10
0
    def _call_surveillance_algo(self, sts, detection_range):
        control = r.list(
            **{
                "range": detection_range,
                "c.ARL": self.glr_test_threshold,
                "m0": robjects.NULL,
                "alpha": self.alpha,
                # Mtilde is set to 1, since that is the only valid value for "epi" and "intercept"
                "Mtilde": 1,
                "M": self.m,
                "change": self.change,
                "theta": robjects.NULL,
                "dir": r.c(*self.direction),
                "ret": self.upperbound_statistic,
                "xMax": self.x_max,
            })

        surv = surveillance.glrnb(sts, control=control)
        return surv
Beispiel #11
0
 def _call_surveillance_algo(self, sts, detection_range):
     try:
         importr("INLA")
     except RRuntimeError:
         raise ImportError(
             "For the Boda algortihm to run you need the INLA package (http://www.r-inla.org/). "
             'Install it by running install.packages("INLA", repos = c(getOption("repos"), INLA = "https://inla.r-inla-download.org/R/stable"), dep = TRUE) '
             "in the R console.")
     control = r.list(
         **{
             "range": detection_range,
             "X": robjects.NULL,
             "trend": self.trend,
             "season": self.season,
             "prior": self.prior,
             "alpha": self.alpha,
             "mc.munu": self.mc_munu,
             "mc.y": self.mc_y,
             "samplingMethod": self.sampling_method,
             "quantileMethod": self.quantile_method,
         })
     surv = surveillance.boda(sts, control=control)
     return surv
Beispiel #12
0
def convert_genes_2_GO(genes_2_GO):
    "@return: An R object that can be used in to map genes to GO identifiers."
    return r.list(**genes_2_GO)
Beispiel #13
0
def convert_genes_2_GO(genes_2_GO):
    "@return: An R object that can be used in to map genes to GO identifiers."
    return r.list(**genes_2_GO)
def main( argv = None ):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv: argv = sys.argv

    # setup command line parser
    parser = E.OptionParser( version = "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", 
                                    usage = globals()["__doc__"] )

    parser.add_option("-a", "--gtf-a", dest="gtf_a", type="string",
                      help="supply a gtf file - will compress uncompressed files"  )
    parser.add_option("-b", "--gtf-b", dest = "gtf_b", type = "string",
                      help="supply a second gtf file - will compress uncompressed files")
    parser.add_option("-s", "--scripts-dir", dest = "scripts_dir", type = "string",
                      help="supply a location for accessory scripts")
    parser.add_option( "--no-venn", dest = "no_venn", action="store_true", 
                      help="set if no venn is to be drawn")

    
    ## add common options (-h/--help, ...) and parse command line 
    (options, args) = E.Start( parser, argv = argv )

    gtf_files = [options.gtf_a, options.gtf_b]

    merged_files = []
    prefices = []
    E.info("merging gtf files")
    for gtf in gtf_files:
        if gtf.endswith(".gtf.gz"):
            outfile = P.snip(gtf, ".gtf.gz") + ".merged.gtf.gz"
            prefices.append(P.snip(gtf, ".gtf.gz"))
            merged_files.append(outfile)
            statement = '''zcat %s | python %s/gtf2gtf.py --merge-transcripts --log=%s.log | gzip > %s''' % (gtf, options.scripts_dir, outfile, outfile)
            P.run()
        elif gtf.endswith(".gtf"):
            outfile = P.snip(gtf, ".gtf") + ".merged.gtf.gz"
            prefices.append(P.snip(gtf,".gtf"))
            merged_files.append(outfile)
            statement = '''cat %s | python %s/gtf2gtf.py --merge-transcripts --log=%s.log | gzip  > %s''' % (gtf, options.scripts_dir, outfile, outfile)
            P.run()
        else:
            raise ValueError("cannot perform merge on %s: is not a gtf file" % gtf)

    for prefix in prefices:
        if options.gtf_a.find(prefix) != -1:
            gtf_a = prefix + ".merged.gtf.gz"
            prefix_a = prefix
        elif options.gtf_b.find(prefix) != -1:
            gtf_b = prefix + ".merged.gtf.gz"
            prefix_b = prefix

    E.info("intersecting gtf files")
    # intersect the resulting merged files

    scriptsdir = options.scripts_dir
    intersection_out = "_vs_".join([prefix_a, prefix_b]) + ".intersection.gtf.gz" 
    statement = '''intersectBed -a %(gtf_a)s -b %(gtf_b)s -s -wa
                 | python %(scriptsdir)s/gtf2gtf.py --merge-transcripts --log=log | gzip > %(intersection_out)s'''
    P.run()

    if not options.no_venn:
        E.info("producing venn diagram for %s vs %s..." % (options.gtf_a, options.gtf_b))
        # produce the venn diagram
        intersection_file = intersection_out
        gtf_a_merged = gtf_a
        gtf_b_merged = gtf_b

        # create dictionary key
        gtf_pair = (gtf_a_merged, gtf_b_merged)

        # containers for counts
        count_gtf_merged_a = 0
        count_gtf_merged_b = 0
        count_intersection = 0

        # create GTF iterator objects
        gtf_iterator_a = GTF.iterator(IOTools.openFile(gtf_pair[0]))
        gtf_iterator_b = GTF.iterator(IOTools.openFile(gtf_pair[1]))
        gtf_iterator_intersection = GTF.iterator(IOTools.openFile(intersection_file))

        # do the counts for each file
        E.info("counting entries in %s" % gtf_a)
        for entry in gtf_iterator_a:
            count_gtf_merged_a += 1
        print "counts for gtf-a: ",count_gtf_merged_a

        E.info("counting entries in %s" % gtf_b)
        for entry in gtf_iterator_b:
            count_gtf_merged_b += 1
        print "counts for gtf-b: ",count_gtf_merged_b

        E.info("counting entries in %s" % intersection_file)
        for entry in gtf_iterator_intersection:
            count_intersection += 1
        print "counts for intersection: ", count_intersection

        # this is the important bit - basically take an arbitrary list of numbers to represent the list of lincrna in the refnoncoding set
        # then use the intersection count to represent the overlapping section in the lincrna set and add a set of random numbers to this 
        # set to make up the remaining - non-overlapping set

        result = {}
        E.info("assembling count lists")
        result[gtf_pair] = {"gtf-b" : map(str,xrange(count_gtf_merged_b))  , "gtf-a" : map(str,xrange(count_intersection)) + map(str, [random.random() for i in range(count_intersection,count_gtf_merged_a)]  )}

        R_source = os.path.join(os.path.abspath(options.scripts_dir), "venn_diagram.R")
        R.source(R_source)

        prefix_a = prefix_a.replace(".", "_").replace("-", "_")
        prefix_b = prefix_b.replace(".", "_").replace("-", "_")
        
        R('''prefix.a <- "%s"''' % prefix_a)
        R('''prefix.b <- "%s"''' % prefix_b) 
        E.info("drawing venn diagram to %s" % (prefix_a + "_vs_" + prefix_b + ".overlap.png"))
        
        R["venn.diagram2"](R.list( A = result[gtf_pair]["gtf-a"], B = result[gtf_pair]["gtf-b"])
        , prefix_a + "_vs_" + prefix_b + ".overlap.png"
        , **{'cat.cex': 1.5
             , 'main.fontfamily': "Arial"
             , 'cat.pos':FloatVector((0,0))
             , 'cat.fontfamily':"Arial"
             , 'main.cex':1.8                                                                                                                                                                                                              
             , 'height':1000
             , 'width':1000
             , 'cex':2                                                                                                                                                                                                                      
             , 'fontfamily':"Arial"                                                                                                                                                                                                         
             , 'lwd':R.c(1,1)                                                                                                                                                                                                               
             , 'fill':R.c(R.rgb(0,0,0.5,0.5), R.rgb(0.5,0,0,0.5))                                                                                                                                                         
             , 'category.names':R.c(prefix_a, prefix_b) 
             , 'margin' : R.c(0.1,0.1,0.1,0.1)
             })

    ## write footer and output benchmark information.
    E.Stop()
Beispiel #15
0
# get reference window data for training
print("\nGetting reference windows...\n")
rw, tw = getRefWindow(1, 7, test_date)

# move the dataframes over to R

rw = rw.reshape((rw.shape[0], ))

r_rw = ro.numpy2ri.py2ri(rw)
r_tw = pandas2ri.py2ri(tw)

#note: need to swap order of these later!
#also: need to set N to proper length for each rw!!

r_rwlist = r.list(N=780, x=r_rw)

# convert R code into python...

# train <- simulate(model, c(100,200), seed=1234, rand.emis=rnorm.hsmm)
#
# plot(train,xlim=c(0,100))
# init0 <- rep(1/J,J)
# P0 <- matrix(1/J,nrow=J,ncol=J)
# b0 <- list(mu=c(-3,1,3),sigma=c(1,1,1))
# startval <- hmmspec(init=init0, trans=P0,parms.emission=b0,dens.emission=dnorm.hsmm)
#
# h1 = hmmfit(train,startval,mstep=mstep.norm)

# rstring="""
#     function(rw_list){
Beispiel #16
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(
        version=
        "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option(
        "-a",
        "--first-gtf-file",
        dest="gtf_a",
        type="string",
        help="supply a gtf file - will compress uncompressed files")
    parser.add_option(
        "-b",
        "--second-gtf-file",
        dest="gtf_b",
        type="string",
        help="supply a second gtf file - will compress uncompressed files")
    parser.add_option("-s",
                      "--scripts-dir",
                      dest="scripts_dir",
                      type="string",
                      help="supply a location for accessory scripts")
    parser.add_option("--no-venn",
                      dest="no_venn",
                      action="store_true",
                      help="set if no venn is to be drawn")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    gtf_files = [options.gtf_a, options.gtf_b]

    merged_files = []
    prefices = []
    E.info("merging gtf files")
    for gtf in gtf_files:
        if gtf.endswith(".gtf.gz"):
            outfile = IOTools.snip(gtf, ".gtf.gz") + ".merged.gtf.gz"
            prefices.append(IOTools.snip(gtf, ".gtf.gz"))
            merged_files.append(outfile)
            statement = '''zcat %s | python %s/gtf2gtf.py --method=merge-transcripts --log=%s.log | gzip > %s''' % (
                gtf, options.scripts_dir, outfile, outfile)
            P.execute(statement)
        elif gtf.endswith(".gtf"):
            outfile = IOTools.snip(gtf, ".gtf") + ".merged.gtf.gz"
            prefices.append(IOTools.snip(gtf, ".gtf"))
            merged_files.append(outfile)
            statement = '''cat %s | python %s/gtf2gtf.py --method=merge-transcripts --log=%s.log | gzip  > %s''' % (
                gtf, options.scripts_dir, outfile, outfile)
            E.execute(statement)
        else:
            raise ValueError("cannot perform merge on %s: is not a gtf file" %
                             gtf)

    for prefix in prefices:
        if options.gtf_a.find(prefix) != -1:
            gtf_a = prefix + ".merged.gtf.gz"
            prefix_a = prefix
        elif options.gtf_b.find(prefix) != -1:
            gtf_b = prefix + ".merged.gtf.gz"
            prefix_b = prefix

    E.info("intersecting gtf files")
    # intersect the resulting merged files

    scriptsdir = options.scripts_dir
    intersection_out = "_vs_".join([prefix_a, prefix_b
                                    ]) + ".intersection.gtf.gz"
    statement = '''intersectBed -a %(gtf_a)s -b %(gtf_b)s -s -wa
                 | python %(scriptsdir)s/gtf2gtf.py --method=merge-transcripts --log=log | gzip > %(intersection_out)s'''
    P.run()

    if not options.no_venn:
        E.info("producing venn diagram for %s vs %s..." %
               (options.gtf_a, options.gtf_b))
        # produce the venn diagram
        intersection_file = intersection_out
        gtf_a_merged = gtf_a
        gtf_b_merged = gtf_b

        # create dictionary key
        gtf_pair = (gtf_a_merged, gtf_b_merged)

        # containers for counts
        count_gtf_merged_a = 0
        count_gtf_merged_b = 0
        count_intersection = 0

        # create GTF iterator objects
        gtf_iterator_a = GTF.iterator(IOTools.openFile(gtf_pair[0]))
        gtf_iterator_b = GTF.iterator(IOTools.openFile(gtf_pair[1]))
        gtf_iterator_intersection = GTF.iterator(
            IOTools.openFile(intersection_file))

        # do the counts for each file
        E.info("counting entries in %s" % gtf_a)
        for entry in gtf_iterator_a:
            count_gtf_merged_a += 1
        print("counts for gtf-a: ", count_gtf_merged_a)

        E.info("counting entries in %s" % gtf_b)
        for entry in gtf_iterator_b:
            count_gtf_merged_b += 1
        print("counts for gtf-b: ", count_gtf_merged_b)

        E.info("counting entries in %s" % intersection_file)
        for entry in gtf_iterator_intersection:
            count_intersection += 1
        print("counts for intersection: ", count_intersection)

        # this is the important bit - basically take an arbitrary list of numbers to represent the list of lincrna in the refnoncoding set
        # then use the intersection count to represent the overlapping section in the lincrna set and add a set of random numbers to this
        # set to make up the remaining - non-overlapping set

        result = {}
        E.info("assembling count lists")
        result[gtf_pair] = {
            "gtf-b":
            list(map(str, range(count_gtf_merged_b))),
            "gtf-a":
            list(map(str, range(count_intersection))) + list(
                map(str, [
                    random.random()
                    for i in range(count_intersection, count_gtf_merged_a)
                ]))
        }

        R_source = os.path.join(os.path.abspath(options.scripts_dir),
                                "venn_diagram.R")
        R.source(R_source)

        prefix_a = prefix_a.replace(".", "_").replace("-", "_")
        prefix_b = prefix_b.replace(".", "_").replace("-", "_")

        R('''prefix.a <- "%s"''' % prefix_a)
        R('''prefix.b <- "%s"''' % prefix_b)
        E.info("drawing venn diagram to %s" %
               (prefix_a + "_vs_" + prefix_b + ".overlap.png"))

        R["venn.diagram2"](R.list(A=result[gtf_pair]["gtf-a"],
                                  B=result[gtf_pair]["gtf-b"]),
                           prefix_a + "_vs_" + prefix_b + ".overlap.png", **{
                               'cat.cex':
                               1.5,
                               'main.fontfamily':
                               "Arial",
                               'cat.pos':
                               FloatVector((0, 0)),
                               'cat.fontfamily':
                               "Arial",
                               'main.cex':
                               1.8,
                               'height':
                               1000,
                               'width':
                               1000,
                               'cex':
                               2,
                               'fontfamily':
                               "Arial",
                               'lwd':
                               R.c(1, 1),
                               'fill':
                               R.c(R.rgb(0, 0, 0.5, 0.5),
                                   R.rgb(0.5, 0, 0, 0.5)),
                               'category.names':
                               R.c(prefix_a, prefix_b),
                               'margin':
                               R.c(0.1, 0.1, 0.1, 0.1)
                           })

    # write footer and output benchmark information.
    E.Stop()
Beispiel #17
0
def dict_to_named_list(dct):
    if (isinstance(dct, dict)
            or isinstance(dct, Parameter)
            or isinstance(dct, pd.core.series.Series)):
        return r.list(**{key: val for key, val in dct.items()})
    return dct