Exemple #1
0
    def run_mbsum(self, ipyclient, force=False, quiet=False):
        """
        Sums two replicate mrbayes runs for each locus
        """
        minidir = os.path.realpath(os.path.join(self.workdir, self.name))
        trees1 = glob.glob(os.path.join(minidir, "*.run1.t"))
        trees2 = glob.glob(os.path.join(minidir, "*.run2.t"))

        ## clear existing files
        existing = glob.glob(os.path.join(self.workdir, self.name, "*.sumt"))
        if any(existing):
            if force:
                for rfile in existing:
                    os.remove(rfile)
            else:
                path = os.path.join(self.workdir, self.name)
                raise IPyradError(EXISTING_SUMT_FILES.format(path))

        ## load balancer
        lbview = ipyclient.load_balanced_view()

        ## submit each to be processed
        asyncs = []
        for tidx in range(len(trees1)):
            rep1 = trees1[tidx]
            rep2 = trees2[tidx]
            outname = os.path.join(minidir, str(tidx) + ".sumt")
            rasync = lbview.apply(call_mbsum, *(rep1, rep2, outname))
            asyncs.append(rasync)

        ## track progress
        start = time.time()
        printstr = "sum replicate runs"
        while 1:
            ready = [i.ready() for i in asyncs]
            if not quiet:
                progressbar(sum(ready), len(ready), start, printstr)
            if len(ready) == sum(ready):
                if not quiet:
                    print("")
                break
            else:
                time.sleep(0.1)

        ## check success
        for rasync in asyncs:
            if not rasync.successful():
                raise IPyradError(rasync.result())
Exemple #2
0
    def run_mrbayes(self, ipyclient, force=False, quiet=False):
        """
        calls the mrbayes block in each nexus file.
        """
        ## get all the nexus files for this object
        minidir = os.path.realpath(os.path.join(self.workdir, self.name))
        nexus_files = glob.glob(os.path.join(minidir, "*.nex"))

        ## clear existing files
        existing = glob.glob(os.path.join(minidir, "*.nex.*"))
        if any(existing):
            if force:
                for rfile in existing:
                    os.remove(rfile)
            else:
                raise IPyradError(EXISTING_NEXdot_FILES.format(minidir))

        ## load balancer
        lbview = ipyclient.load_balanced_view()

        ## submit each to be processed
        asyncs = []
        for nex in nexus_files:
            rasync = lbview.apply(call_mb, nex)
            asyncs.append(rasync)

        ## track progress
        start = time.time()
        printstr = "infer gene-tree posteriors"
        while 1:
            ready = [i.ready() for i in asyncs]
            if not quiet:
                progressbar(sum(ready), len(ready), start, printstr)
            if len(ready) == sum(ready):
                if not quiet:
                    print("")
                break
            else:
                time.sleep(0.1)

        ## check success
        for rasync in asyncs:
            if not rasync.successful():
                raise IPyradError(rasync.result())
Exemple #3
0
def batch(baba, ipyclient=None):
    """
    distributes jobs to the parallel client
    """
    # parse args
    handle = baba.data
    taxdicts = baba.tests
    mindicts = baba.params.mincov
    nboots = baba.params.nboots

    ## if ms generator make into reusable list
    sims = 0
    if isinstance(handle, types.GeneratorType):
        handle = list(handle)
        sims = 1
    else:
        ## expand locifile path to full path
        handle = os.path.realpath(handle)

    ## parse taxdicts into names and lists if it a dictionary
    #if isinstance(taxdicts, dict):
    #    names, taxdicts = taxdicts.keys(), taxdicts.values()
    #else:
    #    names = []
    names = []
    if isinstance(taxdicts, dict):
        taxdicts = [taxdicts]

    ## an array to hold results (len(taxdicts), nboots)
    tot = len(taxdicts)
    resarr = np.zeros((tot, 7), dtype=np.float64)
    bootsarr = np.zeros((tot, nboots), dtype=np.float64)
    paneldict = {}

    ## submit jobs to run on the cluster queue
    start = time.time()
    asyncs = {}
    idx = 0

    ## prepare data before sending to engines
    ## if it's a str (locifile) then parse it here just once.
    if isinstance(handle, str):
        with open(handle, 'r') as infile:
            loci = infile.read().strip().split("|\n")
    if isinstance(handle, list):
        pass  #sims()

    ## iterate over tests (repeats mindicts if fewer than taxdicts)
    if not taxdicts:
        print("no tests found")
        return
    else:
        itests = iter(taxdicts)
        imdict = itertools.cycle([mindicts])

    #for test, mindict in zip(taxdicts, itertools.cycle([mindicts])):
    for i in range(len(ipyclient)):

        ## next entries unless fewer than len ipyclient, skip
        try:
            test = next(itests)
            mindict = next(imdict)
        except StopIteration:
            continue

        ## if it's sim data then convert to an array
        if sims:
            loci = _msp_to_arr(handle, test)
            args = (loci, test, mindict, nboots)
            print("not yet implemented")
            #asyncs[idx] = lbview.apply_async(dstat, *args)
        else:
            args = [loci, test, mindict, nboots]
            asyncs[idx] = lbview.apply(dstat, *args)
        idx += 1

    ## block until finished, print progress if requested.
    finished = 0
    try:
        while 1:
            keys = [i for (i, j) in asyncs.items() if j.ready()]
            ## check for failures
            for job in keys:
                if not asyncs[job].successful():
                    raise IPyradWarningExit(\
                        " error: {}: {}".format(job, asyncs[job].exception()))
                ## enter results for successful jobs
                else:
                    _res, _bot = asyncs[job].result()

                    ## store D4 results
                    if _res.shape[0] == 1:
                        resarr[job] = _res.T.as_matrix()[:, 0]
                        bootsarr[job] = _bot

                    ## or store D5 results
                    else:
                        paneldict[job] = _res.T

                    ## remove old job
                    del asyncs[job]
                    finished += 1

                    ## submit next job if there is one.
                    try:
                        test = next(itests)
                        mindict = next(imdict)
                        if sims:
                            loci = _msp_to_arr(handle, test)
                            args = (loci, test, mindict, nboots)
                            print("not yet implemented")
                            #asyncs[idx] = lbview.apply_async(dstat, *args)
                        else:
                            args = [loci, test, mindict, nboots]
                            asyncs[idx] = lbview.apply(dstat, *args)
                        idx += 1
                    except StopIteration:
                        pass

            ## count finished and break if all are done.
            #fin = idx - len(asyncs)
            elap = datetime.timedelta(seconds=int(time.time() - start))
            printstr = " calculating D-stats  | {} | "
            progressbar(tot, finished, printstr.format(elap), spacer="")
            time.sleep(0.1)
            if not asyncs:
                print("")
                break

    except KeyboardInterrupt as inst:
        ## cancel all jobs (ipy & multiproc modes) and then raise error
        try:
            ipyclient.abort()
        except Exception:
            pass
        raise inst

    ## dress up resarr as a Pandas DataFrame if 4-part test
    if len(test) == 4:
        if not names:
            names = range(len(taxdicts))
        #print("resarr")
        #print(resarr)
        resarr = pd.DataFrame(resarr,
                              index=names,
                              columns=[
                                  "dstat", "bootmean", "bootstd", "Z", "ABBA",
                                  "BABA", "nloci"
                              ])

        ## sort results and bootsarr to match if test names were supplied
        resarr = resarr.sort_index()
        order = [list(resarr.index).index(i) for i in names]
        bootsarr = bootsarr[order]
        return resarr, bootsarr
    else:
        ## order results dfs
        listres = []
        for key in range(len(paneldict)):
            listres.append(paneldict[key])

        ## make into a multi-index dataframe
        ntests = len(paneldict)
        multi_index = [
            np.array([[i] * 3 for i in range(ntests)]).flatten(),
            np.array(['p3', 'p4', 'shared'] * ntests),
        ]
        resarr = pd.DataFrame(
            data=pd.concat(listres).as_matrix(),
            index=multi_index,
            columns=listres[0].columns,
        )
        return resarr, None
        #return listres, None  #_res.T, _bot

    # store instead of return...
    self.results_table, self.results_boots
Exemple #4
0
    def run_bucky(self, ipyclient, force=False, quiet=False, subname=False):
        """
        Runs bucky for a given set of parameters and stores the result 
        to the ipa.bucky object. The results will be stored by default
        with the name '{name}-{alpha}' unless a argument is passed for
        'subname' to customize the output name. 

        Parameters:
        -----------
        subname (str):
            A custom name prefix for the output files produced by the bucky
            analysis and output into the {workdir}/{name} directory.
        force (bool):
            If True then existing result files with the same name prefix
            will be overwritten. 
        quiet (bool):
            If True the progress bars will be suppressed. 
        ipyclient (ipyparallel.Client)
            An active ipyparallel client to distribute jobs to.

        """
        ## check for existing results files
        minidir = os.path.realpath(os.path.join(self.workdir, self.name))
        infiles = glob.glob(os.path.join(minidir, "*.sumt"))
        outroot = os.path.realpath(os.path.join(self.workdir, self.name))

        ## build alpha list
        if isinstance(self.params.bucky_alpha, list):
            alphas = self.params.bucky_alpha
        else:
            alphas = [self.params.bucky_alpha]

        ## load balancer
        lbview = ipyclient.load_balanced_view()

        ## submit each to be processed
        asyncs = []
        for alpha in alphas:
            pathname = os.path.join(outroot, "CF-a" + str(alpha))
            if (os.path.exists(pathname)) and (force != True):
                print("BUCKy results already exist for this filepath. " + \
                      "Use force to overwrite")
            else:
                args = [
                    alpha, self.params.bucky_nchains, self.params.bucky_nreps,
                    self.params.bucky_niter, pathname, infiles
                ]
                rasync = lbview.apply(call_bucky, *args)
                asyncs.append(rasync)

        ## track progress
        start = time.time()
        printstr = "infer CF posteriors"
        while 1:
            ready = [i.ready() for i in asyncs]
            if not quiet:
                progressbar(sum(ready), len(ready), start, printstr)
            if len(ready) == sum(ready):
                if not quiet:
                    print("")
                break
            else:
                time.sleep(0.1)

        ## check success
        for rasync in asyncs:
            if not rasync.successful():
                raise IPyradError(rasync.result())