コード例 #1
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
    def add_minsq_partitions(self, partitions, batchsize=1, background=False):
        if isinstance(partitions, Partition):
            partitions = (partitions, )
        index_tuples = set(ix for partition in partitions
                           for ix in partition.get_membership()).difference(
                               self.minsq_cache.keys())

        if len(index_tuples) == 0:
            return

        # Collect argument list
        args = []
        for ix in index_tuples:
            conc = self.concatenate(ix)
            args.append(conc.get_tree_collection_strings())

        # Distribute work
        msg = 'Adding MinSq cluster trees'
        client = get_client()
        if client is None:
            map_result = sequential_map(tasks.minsq_task, args, msg)
        else:
            map_result = parallel_map(client, tasks.minsq_task, args, msg,
                                      batchsize, background)
            if background:
                return map_result
        # Process results
        pbar = setup_progressbar('Processing results', len(map_result))
        pbar.start()
        for i, (ix, result) in enumerate(zip(index_tuples, map_result)):
            self.minsq_cache[ix] = result
            pbar.update(i)
        pbar.finish()
コード例 #2
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
    def add_minsq_partitions(self, partitions, batchsize=1, background=False):
        if isinstance(partitions, Partition):
            partitions = (partitions,)
        index_tuples = set(ix for partition in partitions for ix in partition.get_membership()).difference(
            self.minsq_cache.keys())

        if len(index_tuples) == 0:
            return

        # Collect argument list
        args = []
        for ix in index_tuples:
            conc = self.concatenate(ix)
            args.append(conc.get_tree_collection_strings())

        # Distribute work
        msg = 'Adding MinSq cluster trees'
        client = get_client()
        if client is None:
            map_result = sequential_map(tasks.minsq_task, args, msg)
        else:
            map_result = parallel_map(client, tasks.minsq_task, args, msg, batchsize, background)
            if background:
                return map_result
        # Process results
        pbar = setup_progressbar('Processing results', len(map_result))
        pbar.start()
        for i, (ix, result) in enumerate(zip(index_tuples, map_result)):
            self.minsq_cache[ix] = result
            pbar.update(i)
        pbar.finish()
コード例 #3
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
def _get_inter_tree_distances(metric,
                              trees,
                              normalise=False,
                              batchsize=100,
                              background=False):
    # Assemble argument lists
    args = [(t1, t2, normalise)
            for (t1, t2) in itertools.combinations(trees, 2)]

    # Get task
    tasks_dict = dict(
        zip(['euc', 'geo', 'rf', 'wrf'], [
            tasks.eucdist_task, tasks.geodist_task, tasks.rfdist_task,
            tasks.wrfdist_task
        ]))
    task = tasks_dict[metric]

    # Dispatch
    msg = 'Inter-tree distances ({})'.format(metric)
    client = get_client()
    if client is None:
        map_result = sequential_map(task, args, msg)
    else:
        map_result = parallel_map(client, task, args, msg, batchsize,
                                  background)
        if background:
            return map_result
        map_result = list(map_result)

    return squareform(map_result)
コード例 #4
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
    def calc_trees(self,
                   model=None,
                   threads=1,
                   indices=None,
                   batchsize=1,
                   output_dir=None,
                   background=False):
        """
        Use pllpy to calculate maximum-likelihood trees
        :return: None (all side effects)
        """
        # Assemble argument lists
        if indices is None:
            indices = list(range(len(self)))
        args = []
        to_delete = []
        for i in indices:
            rec = self[i]
            filename, delete = rec.get_alignment_file(as_phylip=True)
            if delete:
                to_delete.append(filename)
            if model is None:
                model = ('DNA' if rec.is_dna() else 'LGX')
            if model == 'AUTOX':
                model = 'AUTO'
            partition = '{}, {} = 1 - {}'.format(model, rec.name, len(rec))
            tree = rec.parameters.nj_tree if rec.parameters.nj_tree is not None else True
            if output_dir is not None and os.path.isdir(output_dir):
                output_file = os.path.join(output_dir,
                                           '{}.json'.format(rec.name))
                curr_args = (filename, partition, tree, threads,
                             PLL_RANDOM_SEED, None, output_file)
            else:
                curr_args = (filename, partition, tree, threads,
                             PLL_RANDOM_SEED)
            args.append(curr_args)

        # Dispatch work
        msg = 'Calculating ML trees'
        client = get_client()
        if client is None:
            map_result = sequential_map(tasks.pll_task, args, msg)
        else:
            map_result = parallel_map(client, tasks.pll_task, args, msg,
                                      batchsize, background)
            if background:
                return map_result

        # Process results
        with fileIO.TempFileList(to_delete):
            pbar = setup_progressbar('Processing results', len(map_result))
            j = 0
            pbar.start()
            for i, result in zip(indices, map_result):
                rec = self[i]
                rec.parameters.construct_from_dict(result)
                pbar.update(j + 1)
                j += 1
コード例 #5
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
    def add_lnl_partitions(self,
                           partitions,
                           threads=1,
                           use_calculated_freqs=True,
                           batchsize=1,
                           background=False):
        self.add_minsq_partitions(partitions)
        if isinstance(partitions, Partition):
            partitions = (partitions, )
        index_tuples = set(ix for partition in partitions
                           for ix in partition.get_membership()).difference(
                               self.lnl_cache.keys())
        if len(index_tuples) == 0:
            return

        # Collect argument list
        args = []
        to_delete = []
        for ix in index_tuples:
            conc = self.concatenate(ix)
            al = conc.alignment
            filename, delete = al.get_alignment_file(as_phylip=True)
            if delete:
                to_delete.append(filename)
            partition = conc.qfile(dna_model="GTR",
                                   protein_model="LG",
                                   ml_freqs=True)
            tree = self.minsq_cache[ix]['tree']
            if use_calculated_freqs:
                args.append((filename, partition, tree, threads,
                             PLL_RANDOM_SEED, conc.frequencies))
            else:
                args.append((filename, partition, tree, threads,
                             PLL_RANDOM_SEED, None))

        # Distribute work
        with fileIO.TempFileList(to_delete):
            msg = 'Adding ML cluster trees'
            client = get_client()
            if client is None:
                map_result = sequential_map(tasks.pll_task, args, msg)
            else:
                map_result = parallel_map(client, tasks.pll_task, args, msg,
                                          batchsize, background)
                if background:
                    return map_result

        # Process results
        pbar = setup_progressbar('Processing results', len(map_result))
        pbar.start()
        for i, (ix, result) in enumerate(zip(index_tuples, map_result)):
            self.lnl_cache[ix] = result
            pbar.update(i)
        pbar.finish()
コード例 #6
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
    def calc_distances(self, batchsize=1, background=False):
        """
        Calculate fast approximate intra-alignment pairwise distances and variances using
        ML (requires ML models to have been set up using `calc_trees`).
        :return: None (all side effects)
        """
        # Assemble argument lists
        args = []
        to_delete = []
        for rec in self:
            filename, delete = rec.get_alignment_file(as_phylip=True)
            if delete:
                to_delete.append(filename)
            # Get input dict
            model = {'partitions': {}}
            data = {
                'alpha': rec.parameters.partitions.alpha,
                'frequencies': rec.parameters.partitions.frequencies
            }
            if rec.is_dna():
                data['rates'] = rec.parameters.partitions.rates
            model['partitions'][0] = data
            args.append((model, filename))

        # Dispatch
        msg = 'Calculating ML distances'
        client = get_client()
        if client is None:
            map_result = sequential_map(tasks.calc_distances_task, args, msg)
        else:
            map_result = parallel_map(client, tasks.calc_distances_task, args,
                                      msg, batchsize, background)
            if background:
                return map_result

        # Process results
        with fileIO.TempFileList(to_delete):
            pbar = setup_progressbar('Processing results', len(map_result))
            j = 0
            pbar.start()
            for i, result in enumerate(map_result):
                rec = self[i]
                rec.parameters.partitions.distances = result['partitions'][0][
                    'distances']
                rec.parameters.partitions.variances = result['partitions'][0][
                    'variances']
                rec.parameters.nj_tree = result['nj_tree']
                pbar.update(j + 1)
                j += 1
コード例 #7
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
    def calc_trees(self, model=None, threads=1, indices=None, batchsize=1, output_dir=None, background=False):
        """
        Use pllpy to calculate maximum-likelihood trees
        :return: None (all side effects)
        """
        # Assemble argument lists
        if indices is None:
            indices = list(range(len(self)))
        args = []
        to_delete = []
        for i in indices:
            rec = self[i]
            filename, delete = rec.get_alignment_file(as_phylip=True)
            if delete:
                to_delete.append(filename)
            if model is None:
                model = ('DNA' if rec.is_dna() else 'LGX')
            if model == 'AUTOX':
                model = 'AUTO'
            partition = '{}, {} = 1 - {}'.format(model, rec.name, len(rec))
            tree = rec.parameters.nj_tree if rec.parameters.nj_tree is not None else True
            if output_dir is not None and os.path.isdir(output_dir):
                output_file = os.path.join(output_dir, '{}.json'.format(rec.name))
                curr_args = (filename, partition, tree, threads, PLL_RANDOM_SEED, None, output_file)
            else:
                curr_args = (filename, partition, tree, threads, PLL_RANDOM_SEED)
            args.append(curr_args)

        # Dispatch work
        msg = 'Calculating ML trees'
        client = get_client()
        if client is None:
            map_result = sequential_map(tasks.pll_task, args, msg)
        else:
            map_result = parallel_map(client, tasks.pll_task, args, msg, batchsize, background)
            if background:
                return map_result

        # Process results
        with fileIO.TempFileList(to_delete):
            pbar = setup_progressbar('Processing results', len(map_result))
            j = 0
            pbar.start()
            for i, result in zip(indices, map_result):
                rec = self[i]
                rec.parameters.construct_from_dict(result)
                pbar.update(j+1)
                j += 1
コード例 #8
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
    def fast_calc_distances(self, batchsize=1, background=False):
        """
        Calculate fast approximate intra-alignment pairwise distances and variances using
        Jukes-Cantor closed formulae.
        :return: None (all side effects)
        """
        # Assemble argument lists
        args = []
        to_delete = []
        for rec in self:
            filename, delete = rec.get_alignment_file(as_phylip=True)
            if delete:
                to_delete.append(filename)
            args.append((filename, ))

        # Dispatch work (either sequentially or in parallel)
        msg = 'Calculating fast distances'
        with fileIO.TempFileList(to_delete):
            client = get_client()
            if client is None:
                map_result = sequential_map(tasks.fast_calc_distances_task,
                                            args, msg)
            else:
                map_result = parallel_map(client,
                                          tasks.fast_calc_distances_task, args,
                                          msg, batchsize, background)
                if background:
                    return map_result

        # Process results
        pbar = setup_progressbar('Processing results', len(map_result))
        j = 0
        pbar.start()
        for i, result in enumerate(map_result):
            rec = self[i]
            distances = result['distances']
            variances = result['variances']
            tree = result['tree']
            rec.parameters.nj_tree = tree
            params = rec.parameters.partitions
            if params is None:
                params = PartitionParameters()
                rec.parameters.partitions = [params]
            params.distances = distances
            params.variances = variances
            pbar.update(i)
        pbar.finish()
コード例 #9
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
    def fast_calc_distances(self, batchsize=1, background=False):
        """
        Calculate fast approximate intra-alignment pairwise distances and variances using
        Jukes-Cantor closed formulae.
        :return: None (all side effects)
        """
        # Assemble argument lists
        args = []
        to_delete = []
        for rec in self:
            filename, delete = rec.get_alignment_file(as_phylip=True)
            if delete:
                to_delete.append(filename)
            args.append((filename,))

        # Dispatch work (either sequentially or in parallel)
        msg = 'Calculating fast distances'
        with fileIO.TempFileList(to_delete):
            client = get_client()
            if client is None:
                map_result = sequential_map(tasks.fast_calc_distances_task, args, msg)
            else:
                map_result = parallel_map(client, tasks.fast_calc_distances_task, args, msg, batchsize, background)
                if background:
                    return map_result

        # Process results
        pbar = setup_progressbar('Processing results', len(map_result))
        j = 0
        pbar.start()
        for i, result in enumerate(map_result):
            rec = self[i]
            distances = result['distances']
            variances = result['variances']
            tree = result['tree']
            rec.parameters.nj_tree = tree
            params = rec.parameters.partitions
            if params is None:
                params = PartitionParameters()
                rec.parameters.partitions = [params]
            params.distances = distances
            params.variances = variances
            pbar.update(i)
        pbar.finish()
コード例 #10
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
    def add_lnl_partitions(self, partitions, threads=1, use_calculated_freqs=True, batchsize=1, background=False):
        self.add_minsq_partitions(partitions)
        if isinstance(partitions, Partition):
            partitions = (partitions,)
        index_tuples = set(ix for partition in partitions for ix in partition.get_membership()).difference(
            self.lnl_cache.keys())
        if len(index_tuples) == 0:
            return

        # Collect argument list
        args = []
        to_delete = []
        for ix in index_tuples:
            conc = self.concatenate(ix)
            al = conc.alignment
            filename, delete = al.get_alignment_file(as_phylip=True)
            if delete:
                to_delete.append(filename)
            partition = conc.qfile(dna_model="GTR", protein_model="LG", ml_freqs=True)
            tree = self.minsq_cache[ix]['tree']
            if use_calculated_freqs:
                args.append((filename, partition, tree, threads, PLL_RANDOM_SEED, conc.frequencies))
            else:
                args.append((filename, partition, tree, threads, PLL_RANDOM_SEED, None))

        # Distribute work
        with fileIO.TempFileList(to_delete):
            msg = 'Adding ML cluster trees'
            client = get_client()
            if client is None:
                map_result = sequential_map(tasks.pll_task, args, msg)
            else:
                map_result = parallel_map(client, tasks.pll_task, args, msg, batchsize, background)
                if background:
                    return map_result

        # Process results
        pbar = setup_progressbar('Processing results', len(map_result))
        pbar.start()
        for i, (ix, result) in enumerate(zip(index_tuples, map_result)):
            self.lnl_cache[ix] = result
            pbar.update(i)
        pbar.finish()
コード例 #11
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
    def calc_distances(self, batchsize=1, background=False):
        """
        Calculate fast approximate intra-alignment pairwise distances and variances using
        ML (requires ML models to have been set up using `calc_trees`).
        :return: None (all side effects)
        """
        # Assemble argument lists
        args = []
        to_delete = []
        for rec in self:
            filename, delete = rec.get_alignment_file(as_phylip=True)
            if delete:
                to_delete.append(filename)
            # Get input dict
            model = {'partitions': {}}
            data = {'alpha': rec.parameters.partitions.alpha, 'frequencies': rec.parameters.partitions.frequencies}
            if rec.is_dna():
                data['rates'] = rec.parameters.partitions.rates
            model['partitions'][0] = data
            args.append((model, filename))

        # Dispatch
        msg = 'Calculating ML distances'
        client = get_client()
        if client is None:
            map_result = sequential_map(tasks.calc_distances_task, args, msg)
        else:
            map_result = parallel_map(client, tasks.calc_distances_task, args, msg, batchsize, background)
            if background:
                return map_result

        # Process results
        with fileIO.TempFileList(to_delete):
            pbar = setup_progressbar('Processing results', len(map_result))
            j = 0
            pbar.start()
            for i, result in enumerate(map_result):
                rec = self[i]
                rec.parameters.partitions.distances = result['partitions'][0]['distances']
                rec.parameters.partitions.variances = result['partitions'][0]['variances']
                rec.parameters.nj_tree = result['nj_tree']
                pbar.update(j+1)
                j += 1
コード例 #12
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
    def simulate(self, partition, outdir, batchsize=1, **kwargs):
        """
        Simulate a set of alignments from the parameters inferred on a partition
        :param partition:
        :return:
        """
        indices = partition.get_membership()
        self.add_lnl_partitions(partition, **kwargs)
        results = [self.lnl_cache[ix] for ix in indices]
        places = dict((j, i) for (i, j) in enumerate(
            rec.name for rec in self.collection.records))

        # Collect argument list
        args = [None] * len(self.collection)
        for result in results:
            for partition in result['partitions'].values():
                place = places[partition['name']]
                args[place] = (len(self.collection[place]),
                               model_translate(partition['model']),
                               partition['frequencies'], partition['alpha'],
                               result['ml_tree'], partition['rates']
                               if 'rates' in partition else None)

        # Distribute work
        msg = 'Simulating'
        client = get_client()
        if client is None:
            map_result = sequential_map(client, tasks.simulate_task, args, msg)
        else:
            map_result = parallel_map(client, tasks.simulate_task, args, msg,
                                      batchsize, background)
            if background:
                return map_result

        # Process results
        for i, result in enumerate(map_result):
            orig = self.collection[i]
            simseqs = gapmask(result, orig.get_sequences())
            al = Alignment(simseqs, 'protein' if orig.is_protein() else 'dna')
            outfile = os.path.join(outdir, orig.name + '.phy')
            al.write_alignment(outfile, 'phylip', True)
コード例 #13
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
def _get_inter_tree_distances(metric, trees, normalise=False, batchsize=100, background=False):
    # Assemble argument lists
    args = [(t1, t2, normalise) for (t1, t2) in itertools.combinations(trees, 2)]

    # Get task
    tasks_dict = dict(zip(['euc', 'geo', 'rf', 'wrf'],
                          [tasks.eucdist_task, tasks.geodist_task, tasks.rfdist_task, tasks.wrfdist_task]))
    task = tasks_dict[metric]

    # Dispatch
    msg = 'Inter-tree distances ({})'.format(metric)
    client = get_client()
    if client is None:
        map_result = sequential_map(task, args, msg)
    else:
        map_result = parallel_map(client, task, args, msg, batchsize, background)
        if background:
            return map_result
        map_result = list(map_result)

    return squareform(map_result)
コード例 #14
0
ファイル: collection.py プロジェクト: xypan1232/treeCl
    def simulate(self, partition, outdir, batchsize=1, **kwargs):
        """
        Simulate a set of alignments from the parameters inferred on a partition
        :param partition:
        :return:
        """
        indices = partition.get_membership()
        self.add_lnl_partitions(partition, **kwargs)
        results = [self.lnl_cache[ix] for ix in indices]
        places = dict((j,i) for (i,j) in enumerate(rec.name for rec in self.collection.records))

        # Collect argument list
        args = [None] * len(self.collection)
        for result in results:
            for partition in result['partitions'].values():
                place = places[partition['name']]
                args[place] = (len(self.collection[place]),
                               model_translate(partition['model']),
                               partition['frequencies'],
                               partition['alpha'],
                               result['ml_tree'],
                               partition['rates'] if 'rates' in partition else None)

        # Distribute work
        msg = 'Simulating'
        client = get_client()
        if client is None:
            map_result = sequential_map(client, tasks.simulate_task, args, msg)
        else:
            map_result = parallel_map(client, tasks.simulate_task, args, msg, batchsize, background)
            if background:
                return map_result

        # Process results
        for i, result in enumerate(map_result):
            orig = self.collection[i]
            simseqs = gapmask(result, orig.get_sequences())
            al = Alignment(simseqs, 'protein' if orig.is_protein() else 'dna')
            outfile = os.path.join(outdir, orig.name + '.phy')
            al.write_alignment(outfile, 'phylip', True)