Beispiel #1
0
    def __init__(self, name, data, workdir, mapfile=None):

        # i/o paths
        self.workdir = workdir
        self._datafile = data
        self._mapfile = mapfile
        self._check_files()
        self.data = np.zeros()
        self.maparr = np.zeros()

        # init default param settings
        self.params = Params()
        self.popdict = {}
        self.mindict = {}
        self.npops = len(self.popdict)
        self.nboots = 100

        # results dataframes
        self.results = Params()

        # pairwise Fst between all populations
        npops = len(self.popdict)
        arrfst = np.zeros((npops, npops), dtype=np.uint64)
        self.results.fst = pd.DataFrame(arrfst)

        # individual pi
        nsamples = len(list(chain(*self.popdict.values())))
        arrpi = np.zeros(nsamples, dtype=np.uint64)
        self.results.pi = pd.DataFrame(arrpi)

        # population thetas
        npops = len(self.popdict)
        arrtheta = np.zeros(npops, dtype=np.uint64)
        self.results.theta = pd.DataFrame(arrtheta)

        # parse samples from the data file
        self._check_files()
Beispiel #2
0
    def __init__(
        self,
        data=None,
        imap=None,
        minmap=1,
        newick=None,
        nboots=1000,
    ):

        # check imports
        if not sys.modules.get("toytree"):
            raise ImportError(_TOYTREE_IMPORT)

        # parse data as (1) path to data file, or (2) ndarray
        if isinstance(data, str):
            self.data = os.path.realpath(os.path.expanduser(data))
        else:
            self.data = data

        # check dtype of newick/tree entry
        self.newick = newick
        if isinstance(newick, toytree.Toytree.ToyTree):
            self.newick = newick.newick

        # store tests
        self.imap = imap
        self.minmap = minmap

        # parameters
        self.params = Params()
        self.params.nboots = nboots
        self.params.quiet = False
        self.params.database = None

        # results storage
        self.results_table = None
        self.results_boots = None

        # cluster attributes
        self.ipcluster = {
            "cluster_id": "",
            "profile": "default",
            "engines": "Local",
            "quiet": 0,
            "timeout": 60,
            "cores": 0,
            "threads": 2,
            "pids": {},
        }
Beispiel #3
0
    def __init__(
        self,
        data,
        name="test",
        workdir="analysis-treemix", 
        imap=None,
        minmap=None,
        seed=None,
        quiet=False,
        raise_root_error=False,
        binary=None,
        *args, 
        **kwargs):

        # path attributes
        self.name = name
        self.data = data

        # if not imap then it will be set to 1
        self.minmap = minmap
        self.imap = imap

        # others
        self.binary = os.path.join(sys.prefix, "bin", "treemix")
        self.binary = (binary if binary else self.binary)
        self.raise_root_error = raise_root_error
        self._find_binary()

        # params dict
        self.params = Params()
        self.params.k = 0
        self.params.m = 0
        self.params.g = (None, None)
        self.params.bootstrap = 0
        self.params.cormig = 0
        self.params.climb = 0
        self.params.noss = 0
        self.params.seed = (seed if seed else np.random.randint(0, int(1e9)))
        self.params.root = None
        self.params.se = 0
        self.params.global_ = 0

        # get snps and snpmap
        ext = SNPsExtracter(self.data, self.imap, self.minmap, quiet=quiet)
        ext.parse_genos_from_hdf5()
        self.snps = ext.subsample_snps(seed)
        self.names = ext.names
        self.nsites = self.snps.shape[1]
        self.sidxs = {
            i: [self.names.index(i) for i in self.imap[i]] 
            for i in self.imap
        }

        # make workdir if it does not exist
        if workdir:
            self.workdir = os.path.abspath(os.path.expanduser(workdir))
        else:
            self.workdir = os.path.join(
                os.path.abspath(os.path.curdir),
                "analysis-treemix",
            )
        if not os.path.exists(self.workdir):
            os.makedirs(self.workdir)

        ## set params
        notparams = set(
            ["workdir", "name", "data", "minmap", "imap", "seed", "quiet"]
        )
        for key in set(kwargs.keys()) - notparams:
            self.params[key] = kwargs[key]

        # results files
        self.files = Params()
        self.files.tree = os.path.join(self.workdir, self.name + ".treeout.gz")
        self.files.cov = os.path.join(self.workdir, self.name + ".cov.gz")
        self.files.llik = os.path.join(self.workdir, self.name + ".llik")

        # results
        self.results = Params()
        self.results.tree = ""
        self.results.admixture = []
        self.results.cov = []
        self.results.llik = None
Beispiel #4
0
    def __init__(self, name, data, workdir=None, **kwargs):

        ## store attributes
        self.name = name
        self.data = data
        self._kwargs = {
            "minsnps": 0,
            "maxloci": None,
            "seed": None,
            "mb_mcmc_ngen": int(1e6),
            "mb_mcmc_burnin": int(1e5),
            "mb_mcmc_sample_freq": int(1e3),
            "bucky_alpha": [0.1, 1.0, 10.0],
            "bucky_nchains": 4,
            "bucky_nreps": 4,
            "bucky_niter": int(1e6),
            "copied": False,
            "samples": None,
        }
        self._kwargs.update(kwargs)

        ## check binaries
        self.check_binaries()

        ## check workdir
        if workdir:
            self.workdir = os.path.realpath(
                os.path.abspath(os.path.expanduser(workdir)))
        else:
            self.workdir = os.path.realpath(
                os.path.join(os.path.curdir, "analysis-bucky"))
        if not os.path.exists(self.workdir):
            os.makedirs(self.workdir)

        ## set bucky parameters with defaults
        self.params = Params()
        notparams = set(["workdir", "samples", "copied"])
        for key in set(self._kwargs.keys()) - notparams:
            self.params[key] = self._kwargs[key]

        ## set defaults
        self.params.seed = np.random.randint(1e9)
        if self._kwargs["seed"]:
            self.params.seed = self._kwargs["seed"]
        self.samples = []
        if self._kwargs["samples"]:
            self.samples = set(self._kwargs["samples"])
        self._alleles = 0
        if ".alleles" in self.data:
            self._alleles = 1

        ## results files
        self.files = Params()
        self.files.data = data
        self.files.nexfiles = []
        self.files.sumfiles = []
        self.files.buckyfiles = []

        ## accessible results
        ## get from property functions
        self.results = Params()
        self.results.concordance_trees = Params()
        self.results.population_trees = Params()
        self.results.concordance_factors = Params()
Beispiel #5
0
    def __init__(self,
                 data,
                 name="test",
                 workdir="analysis-raxml",
                 *args,
                 **kwargs):

        ## path attributes
        self._kwargs = {
            "f": "a",
            "T": 4,  # <- change to zero !?
            "m": "GTRGAMMA",
            "N": 100,
            "x": 12345,
            "p": 54321,
            "o": None,
            "binary": "",
        }

        # update kwargs for user args and drop key if value is None
        self._kwargs.update(kwargs)
        self._kwargs = {
            i: j
            for (i, j) in self._kwargs.items() if j is not None
        }

        # check workdir
        if workdir:
            workdir = os.path.abspath(os.path.expanduser(workdir))
        else:
            workdir = os.path.abspath(os.path.curdir)
        if not os.path.exists(workdir):
            os.makedirs(workdir)

        ## entered args
        self.params = Params()
        self.params.n = name
        self.params.w = workdir
        self.params.s = os.path.abspath(os.path.expanduser(data))

        ## find the binary
        if not self._kwargs["binary"]:
            self.params.binary = _find_binary()

        ## set params
        notparams = set(["workdir", "name", "data", "binary"])
        for key in set(self._kwargs.keys()) - notparams:
            self.params[key] = self._kwargs[key]

        ## check binary
        self._get_binary()

        ## attributesx
        self.rasync = None
        self.stdout = None
        self.stderr = None

        ## results files
        self.trees = Params()
        self.trees.bestTree = OPJ(workdir, "RAxML_bestTree." + name)
        self.trees.bipartitionsBranchLabels = OPJ(
            workdir, "RAxML_bipartitionsBranchLabels." + name)
        self.trees.bipartitions = OPJ(workdir, "RAxML_bipartitions." + name)
        self.trees.bootstrap = OPJ(workdir, "RAxML_bootstrap." + name)
        self.trees.info = OPJ(workdir, "RAxML_info." + name)
Beispiel #6
0
    def __init__(self,
                 data,
                 name="test",
                 workdir="analysis-fasttree",
                 *args,
                 **kwargs):

        ## path attributes
        self._kwargs = {
            "T": 1,
            "t": "nt",
            "m": "gtr",
            "binary": "",
            "advanced": "",
            "gamma": False,
            "overwrite": False,
        }

        # update kwargs for user args and drop key if value is None
        self._kwargs.update(kwargs)
        self._kwargs = {
            i: j
            for (i, j) in self._kwargs.items() if j is not None
        }

        # check workdir
        if workdir:
            workdir = os.path.abspath(os.path.expanduser(workdir))
        else:
            workdir = os.path.abspath(os.path.curdir)
        if not os.path.exists(workdir):
            os.makedirs(workdir)

        ## entered args
        self.params = Params()
        self.params.n = name
        self.params.w = workdir
        self.params.s = os.path.abspath(os.path.expanduser(data))
        self.params.f = OPJ(workdir, name + ".fasta")

        ## find the binary
        if not self._kwargs["binary"]:
            self.params.binary = _find_binary(self._kwargs["T"])

        ## set params
        notparams = set(["workdir", "name", "data", "binary"])
        for key in set(self._kwargs.keys()) - notparams:
            self.params[key] = self._kwargs[key]

        ## check binary
        self._get_binary()

        ## attributesx
        self.rasync = None
        self.stdout = None
        self.stderr = None

        ## results files
        #         self.trees = Params()
        self.tree = OPJ(workdir, "FastTree_Tree." + name)
        self.log = OPJ(workdir, "FastTree." + name + ".log")

        # Convert phylip into fasta cause fasttree only read interleaved phylyp (or fasta) and not sequential phylip
        if os.path.exists(str(self.params.f)) and not self.params.overwrite:
            print("Fasta file already exist in: {}".format(str(self.params.f)),
                  end='\n')
        else:
            open(str(self.params.f), 'w').close(
            )  #clean fasta file in case it exist (useful for overwrite parameter)
            print("Converting phylip file to fasta...", end='\r')
            with open(str(self.params.s),
                      'r') as f:  #open phy file to convert in fasta file
                for nline, line in enumerate(f, start=1):  #move line by line
                    if nline != 1:  #skip first line
                        with open(str(self.params.f), 'a') as fw:
                            fw.write(
                                ">" + line.split()[0] + "\n" +
                                line.split()[1] + "\n"
                            )  #add > at the beggining and split line by spaces, first is name and second is sequence (If name contain space can be a problem)
            print("Temporal fasta file saved in: {}".format(str(
                self.params.f)),
                  end='\n')

        # Set enviromental variable to control OpenMP  (OMP_NUM_THREADS)
        if self._kwargs["T"] > 1:
            os.environ["OMP_NUM_THREADS"] = str(self._kwargs["T"])
            print(
                "FastTree will use {} threads, be sure have installed OpenMP".
                format(self._kwargs["T"]),
                end='\n')