Exemple #1
0
def _small_file_innerdist(start,
                          fastq_file,
                          pair_file,
                          ref_file,
                          out_base,
                          out_dir,
                          config,
                          remove_workdir=False):
    work_dir = os.path.join(out_dir, "innerdist_estimate")
    if os.path.exists(work_dir):
        shutil.rmtree(work_dir)
    safe_makedir(work_dir)
    extra_args = ["-s", str(start), "-u", "250000"]
    bowtie_runner = _select_bowtie_version(config)
    out_sam = bowtie_runner.align(fastq_file, pair_file, ref_file, out_base,
                                  work_dir, config, extra_args)
    dists = []
    with closing(pysam.Samfile(out_sam)) as work_sam:
        for read in work_sam:
            if read.is_proper_pair and read.is_read1:
                dists.append(abs(read.isize) - 2 * read.rlen)
    if dists:
        dist_stats = Stats(dists)
        return int(round(dist_stats.mean())), int(
            round(dist_stats.standard_deviation()))
    else:
        return None, None
Exemple #2
0
def _estimate_paired_innerdist(fastq_file, pair_file, ref_file, out_base,
                               out_dir, config):
    """Use Bowtie to estimate the inner distance of paired reads.
    """
    # skip initial reads for large file, but not for smaller
    dists = _bowtie_for_innerdist("1000000", fastq_file, pair_file, ref_file,
                                  out_base, out_dir, config)
    if len(dists) == 0:
        dists = _bowtie_for_innerdist("1", fastq_file, pair_file, ref_file,
                                      out_base, out_dir, config, True)
    dist_stats = Stats(dists)
    return int(round(dist_stats.mean())), int(round(dist_stats.standard_deviation()))
Exemple #3
0
def _estimate_paired_innerdist(fastq_file, pair_file, ref_file, out_base,
                               out_dir, config):
    """Use Bowtie to estimate the inner distance of paired reads.
    """
    # skip initial reads for large file, but not for smaller
    dists = _bowtie_for_innerdist("1000000", fastq_file, pair_file, ref_file,
                                  out_base, out_dir, config)
    if len(dists) == 0:
        dists = _bowtie_for_innerdist("1", fastq_file, pair_file, ref_file,
                                      out_base, out_dir, config, True)
    dist_stats = Stats(dists)
    return int(round(dist_stats.mean())), int(
        round(dist_stats.standard_deviation()))
Exemple #4
0
def _bowtie_for_innerdist(start, fastq_file, pair_file, ref_file, out_base,
                          out_dir, data, remove_workdir=False):
    work_dir = os.path.join(out_dir, "innerdist_estimate")
    if os.path.exists(work_dir):
        shutil.rmtree(work_dir)
    safe_makedir(work_dir)
    extra_args = ["-s", str(start), "-u", "250000"]
    ref_file, bowtie_runner = _determine_aligner_and_reference(ref_file, data["config"])
    out_sam = bowtie_runner.align(fastq_file, pair_file, ref_file, {"lane": out_base},
                                  work_dir, data, extra_args)
    dists = []
    with closing(pysam.Samfile(out_sam)) as work_sam:
        for read in work_sam:
            if read.is_proper_pair and read.is_read1:
                dists.append(abs(read.isize) - 2 * read.rlen)
    if dists:
        dist_stats = Stats(dists)
        return int(round(dist_stats.mean())), int(round(dist_stats.standard_deviation()))
    else:
        return None, None
Exemple #5
0
class TestPyDescriptiveStatistics(unittest.TestCase):
    def setUp(self):
        self.enum = Enum([2, 6, 9, 3, 5, 1, 8, 3, 6, 9, 2])

    def test_number(self):
        self.assertEqual(self.enum.number(), 11)

    def test_sum(self):
        self.assertEqual(self.enum.sum(), 54)

    def test_mean(self):
        self.assertEqual(self.enum.mean(), 4.909090909090909)

    def test_median(self):
        self.assertEqual(self.enum.median(), 5.0)

    def test_variance(self):
        self.assertEqual(self.enum.variance(), 7.7190082644628095)

    def test_standard_deviation(self):
        self.assertEqual(self.enum.standard_deviation(), 2.778310325442932)

    def test_percentile(self):
        self.assertEqual(self.enum.percentile(70), 6.0)
class TestPyDescriptiveStatistics(unittest.TestCase):
    def setUp(self):
        self.enum = Enum([2,6,9,3,5,1,8,3,6,9,2])
    
    def test_number(self):
        self.assertEqual(self.enum.number(), 11)
    
    def test_sum(self):
        self.assertEqual(self.enum.sum(), 54)

    def test_mean(self):
        self.assertEqual(self.enum.mean(), 4.909090909090909)

    def test_median(self):
        self.assertEqual(self.enum.median(), 5.0)

    def test_variance(self):
        self.assertEqual(self.enum.variance(), 7.7190082644628095)

    def test_standard_deviation(self):
        self.assertEqual(self.enum.standard_deviation(), 2.778310325442932)

    def test_percentile(self):
        self.assertEqual(self.enum.percentile(70), 6.0)
import sys
sys.path.append("..")

from py_descriptive_statistics import Enum

enum = Enum([2,6,9,3,5,1,8,3,6,9,2])
    
print enum.number()
print enum.sum()
print enum.mean()
print enum.median()
print enum.variance()
print enum.standard_deviation()
print enum.percentile(70)
print enum.percentile(95)
print enum.percentile(99)

Exemple #8
0
import sys
sys.path.append("..")

from py_descriptive_statistics import Enum

enum = Enum([2, 6, 9, 3, 5, 1, 8, 3, 6, 9, 2])

print enum.number()
print enum.sum()
print enum.mean()
print enum.median()
print enum.variance()
print enum.standard_deviation()
print enum.percentile(70)
print enum.percentile(95)
print enum.percentile(99)