Example #1
0
def convert_files():
    print 'Converting files to CSV...'
    filenames = map(utils.datafile, get_filenames())
    
    convert(filenames[0], filenames[1], utils.datafile('mnist-train.csv'), 60000)
    convert(filenames[2], filenames[3], utils.datafile('mnist-test.csv'), 10000)
    
    for fn in filenames:
        os.remove(fn)
Example #2
0
def download_files():
    base_url = 'http://yann.lecun.com/exdb/mnist/'
    for fn in get_filenames():
        file_url = base_url+fn+'.gz'
        local_path = utils.datafile(fn+'.gz')
        print 'Saving {0} to {1}'.format(file_url, local_path)
        urllib.urlretrieve(file_url,local_path)
Example #3
0
def ensure_datadir_exists():
    datadir = utils.datafile('')
    if os.path.exists(datadir):
        print 'Directory {0} already exists; skipping'.format(datadir)
    else:
        print 'Creating directory {0}'.format(datadir)
        os.makedirs(datadir)
Example #4
0
def extract_files():
    for fn in map(utils.datafile, get_filenames()):
        with gzip.open(fn+'.gz', 'rb') as gzf:
            with open(utils.datafile(fn), 'wb') as f:
                print 'Extracting {0}'.format(gzf.name)
                f.write(gzf.read())
        
        print 'Deleting {0}.gz'.format(fn)
        os.remove(fn+'.gz')
Example #5
0
 def get_Y_depth(self, N=5):
     UNIQY = datafile("chrY.{}.unique_ccn.gc".format(
         self.ref.split('_')[0]))
     fp = open(UNIQY)
     depths = []
     for i, row in enumerate(fp):
         # Some regions still have mapped reads, exclude a few
         if i in (1, 4, 6, 7, 10, 11, 13, 16, 18, 19):
             continue
         if len(depths) >= N:
             break
         c, start, end, gc = row.split()
         start, end = int(start), int(end)
         d = self.region_depth(c, start, end)
         depths.append(d)
     self.logger.debug("Y depths (first {} regions): {}"\
                 .format(N, np.array(depths)))
     return np.median(depths)
Example #6
0
import numpy as np

from math import exp
from collections import defaultdict

from bam_parser import PEextractor, FLANKMATCH, SPAN
from utils import datafile, listify
from scipy.stats import gaussian_kde, poisson


# Global settings
MAX_PERIOD = 6
SMALL_VALUE = exp(-10)
REALLY_SMALL_VALUE = exp(-100)
MODEL_PREFIX = "illumina_v3.pcrfree"
STEPMODEL = datafile(MODEL_PREFIX + ".stepmodel")
NOISEMODEL = datafile(MODEL_PREFIX + ".stuttermodel")
MIN_SPANNING_PAIRS = 5


class StepModel:
    """
    Contains information about step size distributions
    """
    def __init__(self, filename=STEPMODEL):
        self.non_unit_step_by_period = {}
        self.step_size_by_period = {}
        fp = open(filename, "r")
        for i in range(MAX_PERIOD):
            line = fp.readline()
            self.non_unit_step_by_period[i + 1] = float(line.strip())