Exemple #1
0
def _small_file_innerdist(start,
                          fastq_file,
                          pair_file,
                          ref_file,
                          out_base,
                          out_dir,
                          config,
                          remove_workdir=False):
    work_dir = os.path.join(out_dir, "innerdist_estimate")
    if os.path.exists(work_dir):
        shutil.rmtree(work_dir)
    safe_makedir(work_dir)
    extra_args = ["-s", str(start), "-u", "250000"]
    bowtie_runner = _select_bowtie_version(config)
    out_sam = bowtie_runner.align(fastq_file, pair_file, ref_file, out_base,
                                  work_dir, config, extra_args)
    dists = []
    with closing(pysam.Samfile(out_sam)) as work_sam:
        for read in work_sam:
            if read.is_proper_pair and read.is_read1:
                dists.append(abs(read.isize) - 2 * read.rlen)
    if dists:
        dist_stats = Stats(dists)
        return int(round(dist_stats.mean())), int(
            round(dist_stats.standard_deviation()))
    else:
        return None, None
Exemple #2
0
def _estimate_paired_innerdist(fastq_file, pair_file, ref_file, out_base,
                               out_dir, config):
    """Use Bowtie to estimate the inner distance of paired reads.
    """
    # skip initial reads for large file, but not for smaller
    dists = _bowtie_for_innerdist("1000000", fastq_file, pair_file, ref_file,
                                  out_base, out_dir, config)
    if len(dists) == 0:
        dists = _bowtie_for_innerdist("1", fastq_file, pair_file, ref_file,
                                      out_base, out_dir, config, True)
    dist_stats = Stats(dists)
    return int(round(dist_stats.mean())), int(round(dist_stats.standard_deviation()))
Exemple #3
0
def _estimate_paired_innerdist(fastq_file, pair_file, ref_file, out_base,
                               out_dir, config):
    """Use Bowtie to estimate the inner distance of paired reads.
    """
    # skip initial reads for large file, but not for smaller
    dists = _bowtie_for_innerdist("1000000", fastq_file, pair_file, ref_file,
                                  out_base, out_dir, config)
    if len(dists) == 0:
        dists = _bowtie_for_innerdist("1", fastq_file, pair_file, ref_file,
                                      out_base, out_dir, config, True)
    dist_stats = Stats(dists)
    return int(round(dist_stats.mean())), int(
        round(dist_stats.standard_deviation()))
Exemple #4
0
def _bowtie_for_innerdist(start, fastq_file, pair_file, ref_file, out_base,
                          out_dir, data, remove_workdir=False):
    work_dir = os.path.join(out_dir, "innerdist_estimate")
    if os.path.exists(work_dir):
        shutil.rmtree(work_dir)
    safe_makedir(work_dir)
    extra_args = ["-s", str(start), "-u", "250000"]
    ref_file, bowtie_runner = _determine_aligner_and_reference(ref_file, data["config"])
    out_sam = bowtie_runner.align(fastq_file, pair_file, ref_file, {"lane": out_base},
                                  work_dir, data, extra_args)
    dists = []
    with closing(pysam.Samfile(out_sam)) as work_sam:
        for read in work_sam:
            if read.is_proper_pair and read.is_read1:
                dists.append(abs(read.isize) - 2 * read.rlen)
    if dists:
        dist_stats = Stats(dists)
        return int(round(dist_stats.mean())), int(round(dist_stats.standard_deviation()))
    else:
        return None, None
Exemple #5
0
 def descriptive_stats(xs):
     if len(xs) < 2:
         return xs
     calc = Stats(xs)
     parts = ["min: %s" % min(xs),
              "5%%: %s" % calc.percentile(5),
              "25%%: %s" % calc.percentile(25),
              "median: %s" % calc.percentile(50),
              "75%%: %s" % calc.percentile(75),
              "95%%: %s" % calc.percentile(95),
              "99%%: %s" % calc.percentile(99),
              "max: %s" % max(xs)]
     return "\n".join(["  " + x for x in parts])
Exemple #6
0
 def descriptive_stats(xs):
     if len(xs) < 2:
         return xs
     calc = Stats(xs)
     parts = [
         "min: %s" % min(xs),
         "5%%: %s" % calc.percentile(5),
         "25%%: %s" % calc.percentile(25),
         "median: %s" % calc.percentile(50),
         "75%%: %s" % calc.percentile(75),
         "95%%: %s" % calc.percentile(95),
         "99%%: %s" % calc.percentile(99),
         "max: %s" % max(xs)
     ]
     return "\n".join(["  " + x for x in parts])
Exemple #7
0
class TestPyDescriptiveStatistics(unittest.TestCase):
    def setUp(self):
        self.enum = Enum([2, 6, 9, 3, 5, 1, 8, 3, 6, 9, 2])

    def test_number(self):
        self.assertEqual(self.enum.number(), 11)

    def test_sum(self):
        self.assertEqual(self.enum.sum(), 54)

    def test_mean(self):
        self.assertEqual(self.enum.mean(), 4.909090909090909)

    def test_median(self):
        self.assertEqual(self.enum.median(), 5.0)

    def test_variance(self):
        self.assertEqual(self.enum.variance(), 7.7190082644628095)

    def test_standard_deviation(self):
        self.assertEqual(self.enum.standard_deviation(), 2.778310325442932)

    def test_percentile(self):
        self.assertEqual(self.enum.percentile(70), 6.0)
class TestPyDescriptiveStatistics(unittest.TestCase):
    def setUp(self):
        self.enum = Enum([2,6,9,3,5,1,8,3,6,9,2])
    
    def test_number(self):
        self.assertEqual(self.enum.number(), 11)
    
    def test_sum(self):
        self.assertEqual(self.enum.sum(), 54)

    def test_mean(self):
        self.assertEqual(self.enum.mean(), 4.909090909090909)

    def test_median(self):
        self.assertEqual(self.enum.median(), 5.0)

    def test_variance(self):
        self.assertEqual(self.enum.variance(), 7.7190082644628095)

    def test_standard_deviation(self):
        self.assertEqual(self.enum.standard_deviation(), 2.778310325442932)

    def test_percentile(self):
        self.assertEqual(self.enum.percentile(70), 6.0)
Exemple #9
0
#! /usr/bin/python
import subprocess
from py_descriptive_statistics import Enum  # calculates variance using https://github.com/gleicon/py_descriptive_statistics

city = 'cincinnati'

last = subprocess.check_output(["head", "-n", "1", "{}pair.txt".format(city)])
now = subprocess.check_output(["tail", "-n", "1", "{}pair.txt".format(city)])
last = float(last)
now = float(now)
enum = Enum([last, now])
rounded = round(enum.variance())
print rounded
import sys
sys.path.append("..")

from py_descriptive_statistics import Enum

enum = Enum([2,6,9,3,5,1,8,3,6,9,2])
    
print enum.number()
print enum.sum()
print enum.mean()
print enum.median()
print enum.variance()
print enum.standard_deviation()
print enum.percentile(70)
print enum.percentile(95)
print enum.percentile(99)

Exemple #11
0
 def setUp(self):
     self.enum = Enum([2, 6, 9, 3, 5, 1, 8, 3, 6, 9, 2])
#! /usr/bin/python
import subprocess
from py_descriptive_statistics import Enum   # calculates variance using https://github.com/gleicon/py_descriptive_statistics

city = 'cincinnati'

last = subprocess.check_output(["head", "-n", "1", "{}pair.txt".format(city)])
now = subprocess.check_output(["tail", "-n", "1", "{}pair.txt".format(city)])
last = float(last)
now = float(now)
enum = Enum([last,now])
rounded = round(enum.variance())
print rounded
Exemple #13
0
import sys
sys.path.append("..")

from py_descriptive_statistics import Enum

enum = Enum([2, 6, 9, 3, 5, 1, 8, 3, 6, 9, 2])

print enum.number()
print enum.sum()
print enum.mean()
print enum.median()
print enum.variance()
print enum.standard_deviation()
print enum.percentile(70)
print enum.percentile(95)
print enum.percentile(99)
#! /usr/bin/python
import subprocess
from py_descriptive_statistics import Enum  # calculates standard deviation (root of variance?)  using https://github.com/gleicon/py_descriptive_statistics

city = 'cincinnati'

last = subprocess.check_output(["head", "-n", "1", "{}pair.txt".format(city)])
now = subprocess.check_output(["tail", "-n", "1", "{}pair.txt".format(city)])
last = float(last)
now = float(now)
enum = Enum([last, now])
rounded = round(enum.standard_deviation())  # rounds the number cleanly
print rounded
 def setUp(self):
     self.enum = Enum([2,6,9,3,5,1,8,3,6,9,2])
#! /usr/bin/python
import subprocess
from py_descriptive_statistics import Enum # calculates standard deviation (root of variance?)  using https://github.com/gleicon/py_descriptive_statistics

city = 'cincinnati'

last = subprocess.check_output(["head", "-n", "1", "{}pair.txt".format(city)])
now = subprocess.check_output(["tail", "-n", "1", "{}pair.txt".format(city)])
last = float(last)
now = float(now)
enum = Enum([last,now])
rounded = round(enum.standard_deviation()) # rounds the number cleanly
print rounded