예제 #1
0
def writeMatlabScript(sample, species, tempDir, lowessDir, segmentDir):
    segVars = cfg.Segment()

    matlabName = ''.join(sample.split('_'))
    matlabName = ''.join(matlabName.split('-'))
    matlabName = ''.join(matlabName.split('.'))
    scriptFile = tempDir + matlabName + '.m'
    OUT = open(scriptFile, 'w')

    OUT.write('%Sample specific variable definitions\n')

    OUT.write(str("refFile = '" + segVars.binDict[species] + "';\n"))
    OUT.write(str("binFile = '" + lowessDir + sample + ".lowess.txt';\n"))
    OUT.write(str("saveFile = '" + segmentDir + sample + ".segments.txt';\n"))
    #	OUT.write(str("chromNum = " + str(segVars.chromNumDict[species]) + ";\n"))
    OUT.write(str("alpha = " + str(segVars.CBSalpha) + ";\n"))

    OUT.write('\n\n\n\n\n%Generic processing code\n')

    IN = open(segVars.matlabBase, 'r')

    for x in IN:
        OUT.write(x)

    OUT.close()
    IN.close()

    return matlabName
예제 #2
0
def runNormalizeOne(species, infile, methodRef, outfile):
    normVars = cfg.Segment()

    binArray = common.importInfoFile(normVars.binDict[species], [0, 2, 6],
                                     'normref',
                                     skiprows=1)

    data = np.loadtxt(infile, usecols=[3], dtype='int')
    lowessData = runLowess(data, binArray['gc'])

    if methodRef:
        lowessData = lowessData - methodRef

    np.savetxt(outfile, lowessData)
예제 #3
0
def runMakeMethodRef(species, sampleList, methodName, lowessDir):
    if len(sampleList) < 10:
        return False

    normVars = cfg.Segment()

    binArray = common.importInfoFile(normVars.binDict[species], [0, 2, 6],
                                     'normref',
                                     skiprows=1)
    xLocs = [x for x, y in enumerate(binArray['chrom']) if y == 'chrX']
    yLocs = [x for x, y in enumerate(binArray['chrom']) if y == 'chrY']

    mergeArray = np.zeros(len(binArray), dtype='int')

    sampleCount = 0
    for i in sampleList:
        data = np.loadtxt(i, usecols=[3], dtype='int')

        if sum(data) < 500000:
            continue

        mergeArray = mergeArray + data
        sampleCount += 1

    if sampleCount < 10:
        return False

    mergeArray, maleTest = adjustSexChroms(mergeArray, xLocs, yLocs,
                                           sampleCount)

    lowessData = runLowess(mergeArray, binArray['gc'])

    if not maleTest:
        lowessData[yLocs] = len(yLocs) * [0.]

    np.savetxt(lowessDir + methodName + '.methodRef.lowess.txt', lowessData)

    printText = '\tAmplification method reference for ' + methodName + ' has been generated from ' + str(
        sampleCount) + ' samples '
    if maleTest:
        printText += 'which included at least one male\n'
    else:
        printText += 'with included no male samples\n'
    print(printText)

    return lowessData
#!/usr/bin/python
import sys
import os
import inspect
import numpy as np

currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 
import common
import config as cfg

normVars = cfg.Segment()
from statsmodels.nonparametric.smoothers_lowess import lowess as lowess









#Lowess regression to correct bin counts for gc-content bias#
def runLowess(counts, gc):
	counts = counts + 1
	counts = counts / np.median(counts)
	counts = np.log(counts)
	
	lowessModel = lowess(counts, gc, frac=0.05, return_sorted=False)