Ejemplo n.º 1
0
def randomization_test_old(num_sequences, mean, std, score, chr_len, bin_size, num=1000, verbose=False):
    rand_distr = []
    rand_len = []
    best = (None, None)
    for n in xrange(num):
        if verbose:
            n = float(n)
            if not n / num * 100 % 5:
                stdout.write("\r" + " " * 10 + " randomizing: {:.2%} completed".format(n / num))
                stdout.flush()
        random_tads = [generate_random_tads(chr_len, mean, std, bin_size) for _ in xrange(num_sequences)]
        rand_len.append(float(sum([len(r) for r in random_tads])) / len(random_tads))
        rand_distr.append(align(random_tads, bin_size=bin_size, chr_len=chr_len, verbose=False)[1])
        if rand_distr[-1] > best[0]:
            best = rand_distr[-1], random_tads
    p_value = float(len([n for n in rand_distr if n > score])) / len(rand_distr)
    if verbose:
        stdout.write("\n {} randomizations finished.".format(num))
        stdout.flush()
        align(best[-1], bin_size=bin_size, chr_len=chr_len, verbose=True)
        print "Observed alignment score: {}".format(score)
        print "  Randomized scores between {} and {}".format(min(rand_distr), max(rand_distr))
        print "p-value: {}".format(p_value if p_value else "<{}".format(1.0 / num))
        print sum(rand_len) / len(rand_len)
    return p_value
Ejemplo n.º 2
0
 def randomization_test(self, num_sequences, distr, score=None, num=1000, verbose=False):
     """
     Return the probability that original alignment is better than an
     alignment of randomized boundaries.
     :argument num_sequences: number of sequences aligned
     :argument distr: the function to interpolate TAD lengths from\
     probability
     :argument None score: just to print it when verbose
     :argument 1000 num: number of random alignment to generate for\
     comparison
     :argument False verbose: to print something nice
     """
     rand_distr = []
     rand_len = []
     for n in xrange(num):
         if verbose:
             n = float(n)
             if not n / num * 100 % 5:
                 stdout.write("\r" + " " * 10 + " randomizing: {:.2%} completed".format(n / num))
                 stdout.flush()
         random_tads = [generate_random_tads(self.r_size, distr, self.resolution) for _ in xrange(num_sequences)]
         rand_len.append(float(sum([len(r) for r in random_tads])) / len(random_tads))
         rand_distr.append(align(random_tads, bin_size=self.resolution, chr_len=self.r_size, verbose=False)[1])
     p_value = float(len([n for n in rand_distr if n > score])) / len(rand_distr)
     if verbose:
         stdout.write("\n {} randomizations finished.".format(num))
         stdout.flush()
         print "  Observed alignment score: {}".format(score)
         print "  Mean number of boundaries: {}; observed: {}".format(
             sum(rand_len) / len(rand_len), str([len(self.experiments[e]["brks"]) for e in self.experiments])
         )
         print "Randomized scores between {} and {}; observed: {}".format(min(rand_distr), max(rand_distr), score)
         print "p-value: {}".format(p_value if p_value else "<{}".format(1.0 / num))
     return p_value
Ejemplo n.º 3
0
    def align_experiments(self, names=None, verbose=False, randomize=False, **kwargs):
        """
        Align prediction of boundaries of two different experiments

        :argument None names: list of names of experiments to align. If None\
        align all.
        :argument experiment1: name of the first experiment to align
        :argument experiment2: name of the second experiment to align
        :argument -0.1 penalty: penalty of inserting a gap in the alignment
        :argument 500000 max_dist: Maximum distance between 2 boundaries allowing match
        :argument False verbose: print somethings
        :argument False randomize: check alignment quality by comparing randomization\
        of boundaries over chromosomes of same size. This will return a extra value,\
        the p-value of accepting that observed alignment is not better than random\
        alignment
        """
        experiments = names or self.experiments.keys()
        tads = []
        for e in experiments:
            if not self.experiments[e]["tads"]:
                raise Exception("No TADs defined, use find_TAD function.\n")
            tads.append(self.experiments[e]["brks"])
        aligneds, score = align(tads, bin_size=self.resolution, chr_len=self.r_size, **kwargs)
        for e, ali in zip(experiments, aligneds):
            self.experiments[e]["align"] = ali
            self.experiments[e]["align"] = ali
        if verbose:
            self.print_alignment(experiments)
        if not randomize:
            return self.get_alignment(names), score
        # mean, std = self._get_tads_mean_std(experiments)
        # print 'mean', mean, 'std', std, self.r_size, self.r_size/mean
        # p_value = randomization_test(len(experiments), mean, std, score,
        #                             self.r_size, self.resolution,
        #                             verbose=verbose, **kwargs)
        distr = self.interpolation(experiments)
        p_value = self.randomization_test(len(experiments), distr, score, verbose=verbose, **kwargs)
        return score, p_value