Python runCsiEm Examples

Programming Language: Python

Namespace/Package Name: csi

Method/Function: runCsiEm

Examples at hotexamples.com: 2

Python runCsiEm - 2 examples found. These are the top rated real world Python examples of csi.runCsiEm extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: __main__.py Project: ktpolanski/ip2

def main(args=None):
    if args is None:
        args = sys.argv[1:]

    # parse command line arguments
    (op,fname) = cmdparser(args)

    # extract out the logging level early
    log_level = logging.WARNING
    if   op.verbose == 1: log_level = logging.INFO
    elif op.verbose >= 2: log_level = logging.DEBUG

    # configure logger
    logging.basicConfig(level=log_level)
    logging.getLogger('GP').setLevel(logging.WARNING)
    logging.getLogger('parameters changed meta').setLevel(logging.WARNING)

    # make sure we're only processing a single file
    if len(fname) != 1:
        if len(fname) == 0:
            sys.stderr.write("Error: Please specify the filename to process, or run with '-h' for more options\n")
        else:
            sys.stderr.write("Error: Only one input filename currently supported\n")
        sys.exit(1)

    # pull out the parental set trunction depth and validate
    depth = op.depth
    if depth < 1:
        sys.stderr.write("Error: truncation depth must be greater than or equal to one")
        sys.exit(1)

    # sanity check!
    if depth == 1:
        logger.info("Truncation depth of 1 may not be very useful")

    numprocs = op.numprocs
    if numprocs is not None and numprocs < 1:
        sys.stderr.write("Error: must have one or more worker process")
        sys.exit(1)

    # figure out where our output is going
    if op.csvoutput is None or op.csvoutput == '-':
        csvout = csv.writer(sys.stdout)
    else:
        csvout = csv.writer(open(op.csvoutput,'w'))

    if op.jsonoutput:
        jsonoutput = open(op.jsonoutput,'w')
    else:
        jsonoutput = None

    if op.hdf5output:
        hdf5output = h5.File(op.hdf5output,'w')
    else:
        hdf5output = None

    # load the data from disk
    inp = csi.loadData(fname[0])

    # check whether the second level is sorted (currently check whether all
    # levels are sorted, need to fix!)
    assert (inp.columns.is_monotonic_increasing)
    # not sure whether I can do anything similar for the rows

    if op.verbose:
        logger.info("Genes: %s",
                    ", ".join([repr(x) for x in inp.index]))
        logger.info("Treatments: %s",
                    ", ".join([repr(x) for x in inp.columns.levels[0]]))
        logger.info("Time: %s",
                    ", ".join([repr(x) for x in inp.columns.levels[1]]))

    # figure out which genes/rows we're going to process
    genes = op.genes
    if genes is None:
        logger.debug("No genes specified, assuming all")
        genes = list(inp.index)
    else:
        missing = np.setdiff1d(genes, inp.index)
        if len(missing) > 0:
            sys.stderr.write("Error: The following genes were not found: {missing}\n".format(
                missing=', '.join(missing)))
            sys.exit(1)

    # TODO: how does the user specify the parental set?

    cc = csi.Csi(inp)
    em = cc.getEm()

    if hdf5output:
        cc.write_hdf5(hdf5output)
        hdf5output.flush()

    if op.weighttrunc:
        val = float(op.weightrunc)
        if 0 < val < 1:
            sys.stderr.write("Error: The weight truncation must be between zero and one\n")
            sys.exit(1)

        if val > 0.01:
            logger.warning("weight truncation should probably be less than 0.01")

        em.weightrunc = val

    if op.initweights:
        if op.initweights == 'uniform':
            em.sampleinitweights = False
        elif op.initweights == 'weighted':
            em.sampleinitweights = True
        else:
            sys.stderr.write("Error: Unrecognised initial weight mode: {initweights}\n".join(
                initweights=op.initweights))
            sys.exit(1)

    results = []
    for i,res in enumerate(csi.runCsiEm(em, genes, lambda gene: cc.allParents(gene,depth), numprocs)):
        res.writeCsv(csvout)
        results.append(res)
        if hdf5output:
            res.write_hdf5(hdf5output, i)
            hdf5output.flush()

    if jsonoutput is not None:
        json.dump(cc.to_dom(results), jsonoutput)

Example #2

Show file

File: main.py Project: cyversewarwick/hcsi

def main(args=None):
    if args is None:
        args = sys.argv[1:]

    # parse command line arguments
    (op,fname) = cmdparser(args)

    # extract out the logging level early
    log_level = logging.WARNING
    if   op.verbose == 1: log_level = logging.INFO
    elif op.verbose >= 2: log_level = logging.DEBUG

    # configure logger
    logging.basicConfig(level=log_level)
    logging.getLogger('GP').setLevel(logging.WARNING)
    logging.getLogger('parameters changed meta').setLevel(logging.WARNING)

    # make sure we're only processing a single file
    if len(fname) != 1:
        if len(fname) == 0:
            sys.stderr.write("Error: Please specify the filename to process, or run with '-h' for more options\n")
        else:
            sys.stderr.write("Error: Only one input filename currently supported\n")
        sys.exit(1)

    # pull out the parental set trunction depth and validate
    depth = op.depth
    if depth < 1:
        sys.stderr.write("Error: truncation depth must be greater than or equal to one")
        sys.exit(1)

    # sanity check!
    if depth == 1:
        logger.info("Truncation depth of 1 may not be very useful")

    numprocs = op.numprocs
    if numprocs is not None and numprocs < 1:
        #add automatic parallelisation
        if numprocs==0:
            numprocs = mp.cpu_count()
        else:
            sys.stderr.write("Error: can't have a negative worker process count")
            sys.exit(1)

    if op.gpprior is None or op.gpprior == 'uniform':
        gpprior = None
    else:
        try:
            gpprior = parse_gp_hyperparam_priors(op.gpprior)
        except ValueError(s):
            sys.stderr.write("Error: "+s)
            sys.exit(1)

    # figure out where our output is going
    if op.csvoutput is None:
        csvoutput = None
    else:
        if op.csvoutput == '-':
            fd = sys.stdout
        else:
            fd = open(op.csvoutput,'w')
        csvoutput = csv.writer(fd)

    if op.hdf5output:
        hdf5output = h5.File(op.hdf5output,'w')
    else:
        hdf5output = None

    if hdf5output is None and csvoutput is None:
        logger.warning("No output will be saved, "
                       "this is only useful for debugging and benchmarking.")

    # load the data from disk
    inp = csi.loadData(fname[0])

    # check whether the second level is sorted (currently check whether all
    # levels are sorted, need to fix!)
    assert (inp.columns.is_monotonic_increasing)
    # not sure whether I can do anything similar for the rows
    
    #normalise the data
    if op.normalise == 'standardise':
        inp[:][:] = sp.stats.mstats.zscore(inp,axis=1,ddof=1)
    elif op.normalise == 'center':
        inp[:][:] = inp[:][:] - np.mean(inp[:][:],axis=1)[:,None]

    if op.verbose:
        logger.info("Genes: %s",
                    ", ".join([repr(x) for x in inp.index]))
        logger.info("Treatments: %s",
                    ", ".join([repr(x) for x in inp.columns.levels[0]]))
        logger.info("Time: %s",
                    ", ".join([repr(x) for x in inp.columns.levels[1]]))
        if gpprior is None:
            logger.info("Hyperparameters: uniform")
        else:
            logger.info("Hyperparameters: Gamma({0},{1})".format(*gpprior))

    # figure out which genes/rows we're going to process
    genes = op.genes
    if genes is None:
        logger.debug("No genes specified, assuming all")
        genes = list(inp.index)
    else:
        missing = np.setdiff1d(genes, inp.index)
        if len(missing) > 0:
            sys.stderr.write("Error: The following genes were not found: {missing}\n".format(
                missing=', '.join(missing)))
            sys.exit(1)

    # TODO: how does the user specify the parental set?

    cc = csi.Csi(inp)
    em = cc.getEm()

    if gpprior:
        em.set_priors(gpprior[0], gpprior[1])

    if hdf5output:
        cc.write_hdf5(hdf5output)
        hdf5output.flush()

    if op.weighttrunc:
        val = float(op.weighttrunc)
        if not (0 < val < 1):
            sys.stderr.write("Error: The weight truncation must be between zero and one\n")
            sys.exit(1)

        if val > 0.01:
            logger.warning("weight truncation should probably be less than 0.01")

        em.weighttrunc = val

    if op.initweights:
        if op.initweights == 'uniform':
            em.sampleinitweights = False
        elif op.initweights == 'weighted':
            em.sampleinitweights = True
        else:
            sys.stderr.write("Error: Unrecognised initial weight mode: {initweights}\n".join(
                initweights=op.initweights))
            sys.exit(1)

    for i,res in enumerate(csi.runCsiEm(em, genes, lambda gene: cc.allParents(gene,depth), numprocs)):
        if csvoutput:
            res.writeCsv(csvoutput)
        if hdf5output:
            res.write_hdf5(hdf5output, i)
            hdf5output.flush()