def main(args=None): if args is None: args = sys.argv[1:] # parse command line arguments (op,fname) = cmdparser(args) # extract out the logging level early log_level = logging.WARNING if op.verbose == 1: log_level = logging.INFO elif op.verbose >= 2: log_level = logging.DEBUG # configure logger logging.basicConfig(level=log_level) logging.getLogger('GP').setLevel(logging.WARNING) logging.getLogger('parameters changed meta').setLevel(logging.WARNING) # make sure we're only processing a single file if len(fname) != 1: if len(fname) == 0: sys.stderr.write("Error: Please specify the filename to process, or run with '-h' for more options\n") else: sys.stderr.write("Error: Only one input filename currently supported\n") sys.exit(1) # pull out the parental set trunction depth and validate depth = op.depth if depth < 1: sys.stderr.write("Error: truncation depth must be greater than or equal to one") sys.exit(1) # sanity check! if depth == 1: logger.info("Truncation depth of 1 may not be very useful") numprocs = op.numprocs if numprocs is not None and numprocs < 1: sys.stderr.write("Error: must have one or more worker process") sys.exit(1) # figure out where our output is going if op.csvoutput is None or op.csvoutput == '-': csvout = csv.writer(sys.stdout) else: csvout = csv.writer(open(op.csvoutput,'w')) if op.jsonoutput: jsonoutput = open(op.jsonoutput,'w') else: jsonoutput = None if op.hdf5output: hdf5output = h5.File(op.hdf5output,'w') else: hdf5output = None # load the data from disk inp = csi.loadData(fname[0]) # check whether the second level is sorted (currently check whether all # levels are sorted, need to fix!) assert (inp.columns.is_monotonic_increasing) # not sure whether I can do anything similar for the rows if op.verbose: logger.info("Genes: %s", ", ".join([repr(x) for x in inp.index])) logger.info("Treatments: %s", ", ".join([repr(x) for x in inp.columns.levels[0]])) logger.info("Time: %s", ", ".join([repr(x) for x in inp.columns.levels[1]])) # figure out which genes/rows we're going to process genes = op.genes if genes is None: logger.debug("No genes specified, assuming all") genes = list(inp.index) else: missing = np.setdiff1d(genes, inp.index) if len(missing) > 0: sys.stderr.write("Error: The following genes were not found: {missing}\n".format( missing=', '.join(missing))) sys.exit(1) # TODO: how does the user specify the parental set? cc = csi.Csi(inp) em = cc.getEm() if hdf5output: cc.write_hdf5(hdf5output) hdf5output.flush() if op.weighttrunc: val = float(op.weightrunc) if 0 < val < 1: sys.stderr.write("Error: The weight truncation must be between zero and one\n") sys.exit(1) if val > 0.01: logger.warning("weight truncation should probably be less than 0.01") em.weightrunc = val if op.initweights: if op.initweights == 'uniform': em.sampleinitweights = False elif op.initweights == 'weighted': em.sampleinitweights = True else: sys.stderr.write("Error: Unrecognised initial weight mode: {initweights}\n".join( initweights=op.initweights)) sys.exit(1) results = [] for i,res in enumerate(csi.runCsiEm(em, genes, lambda gene: cc.allParents(gene,depth), numprocs)): res.writeCsv(csvout) results.append(res) if hdf5output: res.write_hdf5(hdf5output, i) hdf5output.flush() if jsonoutput is not None: json.dump(cc.to_dom(results), jsonoutput)
def main(args=None): if args is None: args = sys.argv[1:] # parse command line arguments (op,fname) = cmdparser(args) # extract out the logging level early log_level = logging.WARNING if op.verbose == 1: log_level = logging.INFO elif op.verbose >= 2: log_level = logging.DEBUG # configure logger logging.basicConfig(level=log_level) logging.getLogger('GP').setLevel(logging.WARNING) logging.getLogger('parameters changed meta').setLevel(logging.WARNING) # make sure we're only processing a single file if len(fname) != 1: if len(fname) == 0: sys.stderr.write("Error: Please specify the filename to process, or run with '-h' for more options\n") else: sys.stderr.write("Error: Only one input filename currently supported\n") sys.exit(1) # pull out the parental set trunction depth and validate depth = op.depth if depth < 1: sys.stderr.write("Error: truncation depth must be greater than or equal to one") sys.exit(1) # sanity check! if depth == 1: logger.info("Truncation depth of 1 may not be very useful") numprocs = op.numprocs if numprocs is not None and numprocs < 1: #add automatic parallelisation if numprocs==0: numprocs = mp.cpu_count() else: sys.stderr.write("Error: can't have a negative worker process count") sys.exit(1) if op.gpprior is None or op.gpprior == 'uniform': gpprior = None else: try: gpprior = parse_gp_hyperparam_priors(op.gpprior) except ValueError(s): sys.stderr.write("Error: "+s) sys.exit(1) # figure out where our output is going if op.csvoutput is None: csvoutput = None else: if op.csvoutput == '-': fd = sys.stdout else: fd = open(op.csvoutput,'w') csvoutput = csv.writer(fd) if op.hdf5output: hdf5output = h5.File(op.hdf5output,'w') else: hdf5output = None if hdf5output is None and csvoutput is None: logger.warning("No output will be saved, " "this is only useful for debugging and benchmarking.") # load the data from disk inp = csi.loadData(fname[0]) # check whether the second level is sorted (currently check whether all # levels are sorted, need to fix!) assert (inp.columns.is_monotonic_increasing) # not sure whether I can do anything similar for the rows #normalise the data if op.normalise == 'standardise': inp[:][:] = sp.stats.mstats.zscore(inp,axis=1,ddof=1) elif op.normalise == 'center': inp[:][:] = inp[:][:] - np.mean(inp[:][:],axis=1)[:,None] if op.verbose: logger.info("Genes: %s", ", ".join([repr(x) for x in inp.index])) logger.info("Treatments: %s", ", ".join([repr(x) for x in inp.columns.levels[0]])) logger.info("Time: %s", ", ".join([repr(x) for x in inp.columns.levels[1]])) if gpprior is None: logger.info("Hyperparameters: uniform") else: logger.info("Hyperparameters: Gamma({0},{1})".format(*gpprior)) # figure out which genes/rows we're going to process genes = op.genes if genes is None: logger.debug("No genes specified, assuming all") genes = list(inp.index) else: missing = np.setdiff1d(genes, inp.index) if len(missing) > 0: sys.stderr.write("Error: The following genes were not found: {missing}\n".format( missing=', '.join(missing))) sys.exit(1) # TODO: how does the user specify the parental set? cc = csi.Csi(inp) em = cc.getEm() if gpprior: em.set_priors(gpprior[0], gpprior[1]) if hdf5output: cc.write_hdf5(hdf5output) hdf5output.flush() if op.weighttrunc: val = float(op.weighttrunc) if not (0 < val < 1): sys.stderr.write("Error: The weight truncation must be between zero and one\n") sys.exit(1) if val > 0.01: logger.warning("weight truncation should probably be less than 0.01") em.weighttrunc = val if op.initweights: if op.initweights == 'uniform': em.sampleinitweights = False elif op.initweights == 'weighted': em.sampleinitweights = True else: sys.stderr.write("Error: Unrecognised initial weight mode: {initweights}\n".join( initweights=op.initweights)) sys.exit(1) for i,res in enumerate(csi.runCsiEm(em, genes, lambda gene: cc.allParents(gene,depth), numprocs)): if csvoutput: res.writeCsv(csvoutput) if hdf5output: res.write_hdf5(hdf5output, i) hdf5output.flush()