def run_mbsum(self, ipyclient, force=False, quiet=False): """ Sums two replicate mrbayes runs for each locus """ minidir = os.path.realpath(os.path.join(self.workdir, self.name)) trees1 = glob.glob(os.path.join(minidir, "*.run1.t")) trees2 = glob.glob(os.path.join(minidir, "*.run2.t")) ## clear existing files existing = glob.glob(os.path.join(self.workdir, self.name, "*.sumt")) if any(existing): if force: for rfile in existing: os.remove(rfile) else: path = os.path.join(self.workdir, self.name) raise IPyradError(EXISTING_SUMT_FILES.format(path)) ## load balancer lbview = ipyclient.load_balanced_view() ## submit each to be processed asyncs = [] for tidx in range(len(trees1)): rep1 = trees1[tidx] rep2 = trees2[tidx] outname = os.path.join(minidir, str(tidx) + ".sumt") rasync = lbview.apply(call_mbsum, *(rep1, rep2, outname)) asyncs.append(rasync) ## track progress start = time.time() printstr = "sum replicate runs" while 1: ready = [i.ready() for i in asyncs] if not quiet: progressbar(sum(ready), len(ready), start, printstr) if len(ready) == sum(ready): if not quiet: print("") break else: time.sleep(0.1) ## check success for rasync in asyncs: if not rasync.successful(): raise IPyradError(rasync.result())
def run_mrbayes(self, ipyclient, force=False, quiet=False): """ calls the mrbayes block in each nexus file. """ ## get all the nexus files for this object minidir = os.path.realpath(os.path.join(self.workdir, self.name)) nexus_files = glob.glob(os.path.join(minidir, "*.nex")) ## clear existing files existing = glob.glob(os.path.join(minidir, "*.nex.*")) if any(existing): if force: for rfile in existing: os.remove(rfile) else: raise IPyradError(EXISTING_NEXdot_FILES.format(minidir)) ## load balancer lbview = ipyclient.load_balanced_view() ## submit each to be processed asyncs = [] for nex in nexus_files: rasync = lbview.apply(call_mb, nex) asyncs.append(rasync) ## track progress start = time.time() printstr = "infer gene-tree posteriors" while 1: ready = [i.ready() for i in asyncs] if not quiet: progressbar(sum(ready), len(ready), start, printstr) if len(ready) == sum(ready): if not quiet: print("") break else: time.sleep(0.1) ## check success for rasync in asyncs: if not rasync.successful(): raise IPyradError(rasync.result())
def batch(baba, ipyclient=None): """ distributes jobs to the parallel client """ # parse args handle = baba.data taxdicts = baba.tests mindicts = baba.params.mincov nboots = baba.params.nboots ## if ms generator make into reusable list sims = 0 if isinstance(handle, types.GeneratorType): handle = list(handle) sims = 1 else: ## expand locifile path to full path handle = os.path.realpath(handle) ## parse taxdicts into names and lists if it a dictionary #if isinstance(taxdicts, dict): # names, taxdicts = taxdicts.keys(), taxdicts.values() #else: # names = [] names = [] if isinstance(taxdicts, dict): taxdicts = [taxdicts] ## an array to hold results (len(taxdicts), nboots) tot = len(taxdicts) resarr = np.zeros((tot, 7), dtype=np.float64) bootsarr = np.zeros((tot, nboots), dtype=np.float64) paneldict = {} ## submit jobs to run on the cluster queue start = time.time() asyncs = {} idx = 0 ## prepare data before sending to engines ## if it's a str (locifile) then parse it here just once. if isinstance(handle, str): with open(handle, 'r') as infile: loci = infile.read().strip().split("|\n") if isinstance(handle, list): pass #sims() ## iterate over tests (repeats mindicts if fewer than taxdicts) if not taxdicts: print("no tests found") return else: itests = iter(taxdicts) imdict = itertools.cycle([mindicts]) #for test, mindict in zip(taxdicts, itertools.cycle([mindicts])): for i in range(len(ipyclient)): ## next entries unless fewer than len ipyclient, skip try: test = next(itests) mindict = next(imdict) except StopIteration: continue ## if it's sim data then convert to an array if sims: loci = _msp_to_arr(handle, test) args = (loci, test, mindict, nboots) print("not yet implemented") #asyncs[idx] = lbview.apply_async(dstat, *args) else: args = [loci, test, mindict, nboots] asyncs[idx] = lbview.apply(dstat, *args) idx += 1 ## block until finished, print progress if requested. finished = 0 try: while 1: keys = [i for (i, j) in asyncs.items() if j.ready()] ## check for failures for job in keys: if not asyncs[job].successful(): raise IPyradWarningExit(\ " error: {}: {}".format(job, asyncs[job].exception())) ## enter results for successful jobs else: _res, _bot = asyncs[job].result() ## store D4 results if _res.shape[0] == 1: resarr[job] = _res.T.as_matrix()[:, 0] bootsarr[job] = _bot ## or store D5 results else: paneldict[job] = _res.T ## remove old job del asyncs[job] finished += 1 ## submit next job if there is one. try: test = next(itests) mindict = next(imdict) if sims: loci = _msp_to_arr(handle, test) args = (loci, test, mindict, nboots) print("not yet implemented") #asyncs[idx] = lbview.apply_async(dstat, *args) else: args = [loci, test, mindict, nboots] asyncs[idx] = lbview.apply(dstat, *args) idx += 1 except StopIteration: pass ## count finished and break if all are done. #fin = idx - len(asyncs) elap = datetime.timedelta(seconds=int(time.time() - start)) printstr = " calculating D-stats | {} | " progressbar(tot, finished, printstr.format(elap), spacer="") time.sleep(0.1) if not asyncs: print("") break except KeyboardInterrupt as inst: ## cancel all jobs (ipy & multiproc modes) and then raise error try: ipyclient.abort() except Exception: pass raise inst ## dress up resarr as a Pandas DataFrame if 4-part test if len(test) == 4: if not names: names = range(len(taxdicts)) #print("resarr") #print(resarr) resarr = pd.DataFrame(resarr, index=names, columns=[ "dstat", "bootmean", "bootstd", "Z", "ABBA", "BABA", "nloci" ]) ## sort results and bootsarr to match if test names were supplied resarr = resarr.sort_index() order = [list(resarr.index).index(i) for i in names] bootsarr = bootsarr[order] return resarr, bootsarr else: ## order results dfs listres = [] for key in range(len(paneldict)): listres.append(paneldict[key]) ## make into a multi-index dataframe ntests = len(paneldict) multi_index = [ np.array([[i] * 3 for i in range(ntests)]).flatten(), np.array(['p3', 'p4', 'shared'] * ntests), ] resarr = pd.DataFrame( data=pd.concat(listres).as_matrix(), index=multi_index, columns=listres[0].columns, ) return resarr, None #return listres, None #_res.T, _bot # store instead of return... self.results_table, self.results_boots
def run_bucky(self, ipyclient, force=False, quiet=False, subname=False): """ Runs bucky for a given set of parameters and stores the result to the ipa.bucky object. The results will be stored by default with the name '{name}-{alpha}' unless a argument is passed for 'subname' to customize the output name. Parameters: ----------- subname (str): A custom name prefix for the output files produced by the bucky analysis and output into the {workdir}/{name} directory. force (bool): If True then existing result files with the same name prefix will be overwritten. quiet (bool): If True the progress bars will be suppressed. ipyclient (ipyparallel.Client) An active ipyparallel client to distribute jobs to. """ ## check for existing results files minidir = os.path.realpath(os.path.join(self.workdir, self.name)) infiles = glob.glob(os.path.join(minidir, "*.sumt")) outroot = os.path.realpath(os.path.join(self.workdir, self.name)) ## build alpha list if isinstance(self.params.bucky_alpha, list): alphas = self.params.bucky_alpha else: alphas = [self.params.bucky_alpha] ## load balancer lbview = ipyclient.load_balanced_view() ## submit each to be processed asyncs = [] for alpha in alphas: pathname = os.path.join(outroot, "CF-a" + str(alpha)) if (os.path.exists(pathname)) and (force != True): print("BUCKy results already exist for this filepath. " + \ "Use force to overwrite") else: args = [ alpha, self.params.bucky_nchains, self.params.bucky_nreps, self.params.bucky_niter, pathname, infiles ] rasync = lbview.apply(call_bucky, *args) asyncs.append(rasync) ## track progress start = time.time() printstr = "infer CF posteriors" while 1: ready = [i.ready() for i in asyncs] if not quiet: progressbar(sum(ready), len(ready), start, printstr) if len(ready) == sum(ready): if not quiet: print("") break else: time.sleep(0.1) ## check success for rasync in asyncs: if not rasync.successful(): raise IPyradError(rasync.result())