Example #1
0
    def align_experiments(self, names=None, verbose=False, randomize=False,
                          rnd_method='interpolate', rnd_num=1000, **kwargs):
        """
        Align the predicted boundaries of two different experiments. The 
        resulting alignment will be stored in the self.experiment list.
        
        :param None names: list of names of the experiments to align. If None,
            align all
        :param experiment1: name of the first experiment to align
        :param experiment2: name of the second experiment to align
        :param -0.1 penalty: penalty for inserting a gap in the alignment
        :param 100000 max_dist: maximum distance between two boundaries
            allowing match (100Kb seems fair with HUMAN chromosomes)
        :param False verbose: if True, print some information about the 
            alignments
        :param False randomize: check the alignment quality by comparing
            randomized boundaries over Chromosomes of the same size. This will
            return a extra value, the p-value of accepting that the observed
            alignment is not better than a random alignment
        :param interpolate rnd_method: by default uses the interpolation of TAD
           distribution. The alternative method is 'shuffle', where TADs are
           simply shuffled
        :param 1000 rnd_num: number of randomizations to do
        :param reciprocal method: if global, Needleman-Wunsch is used to align
            (see :func:`pytadbit.boundary_aligner.globally.needleman_wunsch`);
            if reciprocal, a method based on reciprocal closest boundaries is
            used (see :func:`pytadbit.boundary_aligner.reciprocally.reciprocal`)

        :returns: the alignment and the score of the alignment (by default)
        """
        if names:
            xpers = ExperimentList([self.get_experiment(n) for n in names],
                                   self)
        else:
            xpers = self.experiments
        tads = []
        for xpr in xpers:
            if not xpr.tads:
                raise Exception('No TADs defined, use find_tad function.\n')
            tads.append([xpr.tads[x]['brk'] * xpr.resolution for x in xpr.tads])
        # new
        aligneds, score = align(tads, verbose=verbose, **kwargs)
        name = tuple(sorted([x.name for x in xpers]))
        ali = Alignment(name, aligneds, xpers, score=score)
        self.alignment[name] = ali
        if verbose:
            print self.alignment[name]
        # old
        # self.alignment[name] = {}
        # for xpr, ali in zip(xpers, aligneds):
        #     self.alignment[name][xpr.name] = ali
        # if verbose:
        #     self.print_alignment(xpers=xpers)
        if not randomize:
            # return self.get_alignment(name), score
            return ali
        p_value = randomization_test(xpers, score=score, rnd_method=rnd_method,
                                     verbose=verbose, r_size=self.r_size,
                                     num=rnd_num, **kwargs)
        return score, p_value
Example #2
0
    def align_experiments(self, names=None, verbose=False, randomize=False,
                          rnd_method='interpolate', rnd_num=1000,
                          get_score=False, **kwargs):
        """
        Align the predicted boundaries of two different experiments. The
        resulting alignment will be stored in the self.experiment list.

        :param None names: list of names of the experiments to align. If None,
            align all
        :param experiment1: name of the first experiment to align
        :param experiment2: name of the second experiment to align
        :param -0.1 penalty: penalty for inserting a gap in the alignment
        :param 100000 max_dist: maximum distance between two boundaries
            allowing match (100Kb seems fair with HUMAN chromosomes)
        :param False verbose: if True, print some information about the
            alignments
        :param False randomize: check the alignment quality by comparing
            randomized boundaries over Chromosomes of the same size. This will
            return a extra value, the p-value of accepting that the observed
            alignment is not better than a random alignment
        :param False get_score: returns alignemnt object, alignment score and
           percentage of identity from one side and from the other
        :param interpolate rnd_method: by default uses the interpolation of TAD
           distribution. The alternative method is 'shuffle', where TADs are
           simply shuffled
        :param 1000 rnd_num: number of randomizations to do
        :param reciprocal method: if global, Needleman-Wunsch is used to align
            (see :func:`pytadbit.boundary_aligner.globally.needleman_wunsch`);
            if reciprocal, a method based on reciprocal closest boundaries is
            used (see :func:`pytadbit.boundary_aligner.reciprocally.reciprocal`)

        :returns: an alignment object or, if the randomizattion was invoked,
           an alignment object, and a list of statistics that are, the alignment
           score, the probability that observed alignment performs better than
           randoms, the proportion of borders from the first experiment found
           aligned in the second experiment and the proportion of borders from
           the second experiment found aligned in the first experiment.
           Returned calues can be catched like this:

               ali = crm.align_experiments()

           or, with randomization test:

               ali, (score, pval, prop1, prop2) = crm.align_experiments(randomize=True)

        """
        if names:
            xpers = ExperimentList([self.get_experiment(n) for n in names],
                                   self)
        else:
            xpers = self.experiments
        tads = []
        for xpr in xpers:
            if not xpr.tads:
                raise Exception('No TADs defined, use find_tad function.\n')
            tads.append([xpr.tads[x]['brk'] * xpr.resolution for x in xpr.tads
                         if xpr.tads[x]['score'] >= 0])
        (aligneds, score, perc1, perc2), consensus = align(tads, verbose=verbose, **kwargs)
        name = tuple(sorted([x.name for x in xpers]))
        ali = Alignment(name, aligneds, xpers, consensus, score=score)
        self.alignment[name] = ali
        if verbose:
            print(self.alignment[name])
        if not randomize:
            if get_score:
                return ali, score, perc1, perc2
            else:
                return ali
        p_value = randomization_test(xpers, score=score, rnd_method=rnd_method,
                                     verbose=verbose, r_size=self.r_size,
                                     num=rnd_num, **kwargs)
        return ali, (score, p_value, perc1, perc2)
Example #3
0
    def align_experiments(self, names=None, verbose=False, randomize=False,
                          rnd_method='interpolate', rnd_num=1000,
                          get_score=False, **kwargs):
        """
        Align the predicted boundaries of two different experiments. The
        resulting alignment will be stored in the self.experiment list.

        :param None names: list of names of the experiments to align. If None,
            align all
        :param experiment1: name of the first experiment to align
        :param experiment2: name of the second experiment to align
        :param -0.1 penalty: penalty for inserting a gap in the alignment
        :param 100000 max_dist: maximum distance between two boundaries
            allowing match (100Kb seems fair with HUMAN chromosomes)
        :param False verbose: if True, print some information about the
            alignments
        :param False randomize: check the alignment quality by comparing
            randomized boundaries over Chromosomes of the same size. This will
            return a extra value, the p-value of accepting that the observed
            alignment is not better than a random alignment
        :param False get_score: returns alignemnt object, alignment score and
           percentage of identity from one side and from the other
        :param interpolate rnd_method: by default uses the interpolation of TAD
           distribution. The alternative method is 'shuffle', where TADs are
           simply shuffled
        :param 1000 rnd_num: number of randomizations to do
        :param reciprocal method: if global, Needleman-Wunsch is used to align
            (see :func:`pytadbit.boundary_aligner.globally.needleman_wunsch`);
            if reciprocal, a method based on reciprocal closest boundaries is
            used (see :func:`pytadbit.boundary_aligner.reciprocally.reciprocal`)

        :returns: an alignment object or, if the randomizattion was invoked,
           an alignment object, and a list of statistics that are, the alignment
           score, the probability that observed alignment performs better than
           randoms, the proportion of borders from the first experiment found
           aligned in the second experiment and the proportion of borders from
           the second experiment found aligned in the first experiment.
           Returned calues can be catched like this:

               ali = crm.align_experiments()

           or, with randomization test:

               ali, (score, pval, prop1, prop2) = crm.align_experiments(randomize=True)

        """
        if names:
            xpers = ExperimentList([self.get_experiment(n) for n in names],
                                   self)
        else:
            xpers = self.experiments
        tads = []
        for xpr in xpers:
            if not xpr.tads:
                raise Exception('No TADs defined, use find_tad function.\n')
            tads.append([xpr.tads[x]['brk'] * xpr.resolution for x in xpr.tads
                         if xpr.tads[x]['score'] >= 0])
        (aligneds, score, perc1, perc2), consensus = align(tads, verbose=verbose, **kwargs)
        name = tuple(sorted([x.name for x in xpers]))
        ali = Alignment(name, aligneds, xpers, consensus, score=score)
        self.alignment[name] = ali
        if verbose:
            print self.alignment[name]
        if not randomize:
            if get_score:
                return ali, score, perc1, perc2
            else:
                return ali
        p_value = randomization_test(xpers, score=score, rnd_method=rnd_method,
                                     verbose=verbose, r_size=self.r_size,
                                     num=rnd_num, **kwargs)
        return ali, (score, p_value, perc1, perc2)