def writeMatlabScript(sample, species, tempDir, lowessDir, segmentDir): segVars = cfg.Segment() matlabName = ''.join(sample.split('_')) matlabName = ''.join(matlabName.split('-')) matlabName = ''.join(matlabName.split('.')) scriptFile = tempDir + matlabName + '.m' OUT = open(scriptFile, 'w') OUT.write('%Sample specific variable definitions\n') OUT.write(str("refFile = '" + segVars.binDict[species] + "';\n")) OUT.write(str("binFile = '" + lowessDir + sample + ".lowess.txt';\n")) OUT.write(str("saveFile = '" + segmentDir + sample + ".segments.txt';\n")) # OUT.write(str("chromNum = " + str(segVars.chromNumDict[species]) + ";\n")) OUT.write(str("alpha = " + str(segVars.CBSalpha) + ";\n")) OUT.write('\n\n\n\n\n%Generic processing code\n') IN = open(segVars.matlabBase, 'r') for x in IN: OUT.write(x) OUT.close() IN.close() return matlabName
def runNormalizeOne(species, infile, methodRef, outfile): normVars = cfg.Segment() binArray = common.importInfoFile(normVars.binDict[species], [0, 2, 6], 'normref', skiprows=1) data = np.loadtxt(infile, usecols=[3], dtype='int') lowessData = runLowess(data, binArray['gc']) if methodRef: lowessData = lowessData - methodRef np.savetxt(outfile, lowessData)
def runMakeMethodRef(species, sampleList, methodName, lowessDir): if len(sampleList) < 10: return False normVars = cfg.Segment() binArray = common.importInfoFile(normVars.binDict[species], [0, 2, 6], 'normref', skiprows=1) xLocs = [x for x, y in enumerate(binArray['chrom']) if y == 'chrX'] yLocs = [x for x, y in enumerate(binArray['chrom']) if y == 'chrY'] mergeArray = np.zeros(len(binArray), dtype='int') sampleCount = 0 for i in sampleList: data = np.loadtxt(i, usecols=[3], dtype='int') if sum(data) < 500000: continue mergeArray = mergeArray + data sampleCount += 1 if sampleCount < 10: return False mergeArray, maleTest = adjustSexChroms(mergeArray, xLocs, yLocs, sampleCount) lowessData = runLowess(mergeArray, binArray['gc']) if not maleTest: lowessData[yLocs] = len(yLocs) * [0.] np.savetxt(lowessDir + methodName + '.methodRef.lowess.txt', lowessData) printText = '\tAmplification method reference for ' + methodName + ' has been generated from ' + str( sampleCount) + ' samples ' if maleTest: printText += 'which included at least one male\n' else: printText += 'with included no male samples\n' print(printText) return lowessData
#!/usr/bin/python import sys import os import inspect import numpy as np currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(currentdir) sys.path.insert(0,parentdir) import common import config as cfg normVars = cfg.Segment() from statsmodels.nonparametric.smoothers_lowess import lowess as lowess #Lowess regression to correct bin counts for gc-content bias# def runLowess(counts, gc): counts = counts + 1 counts = counts / np.median(counts) counts = np.log(counts) lowessModel = lowess(counts, gc, frac=0.05, return_sorted=False)