コード例 #1
0
ファイル: runSoma.py プロジェクト: LeeMendelowitz/SOMA-V3
def adjustOpticalMap(contigMapFile, opticalMapFileOrig, opticalMapFileNew):

    # Perform an initial alignment of of the contigMapFile against the opticalMapFile
    outputPfx = '%s.%s.alignForAdjustment'%(contigMapFile,opticalMapFileOrig)
    res = makeAlignments(opticalMapFileOrig, contigMapFile, outputPfx)
    ml = parseSomaMatch.parseMatchFileXML(res['xmlFile'])

    sys.stderr.write('adjustOpticalMap: Parsed %i matches from file %s\n'%(len(ml), res['xmlFile']))

    # Filter the match list:
    minHits = 10
    maxMissRate = 0.10
    def matchOK(mr):
        hitsOK = mr.contigHits >= minHits
        missRateOK = max(mr.contigMissRate, mr.opticalMissRate) <= maxMissRate
        return hitsOK and missRateOK
    goodMatches = [mr for mr in ml if matchOK(mr)]
    sys.stderr.write('adjustOpticalMap: Filtered to %i matches based on quality'%(len(goodMatches)))

    # Count the number of good alignments per contig. Only select the unique alignments
    contigAlignmentCounts = Counter(mr.contigId for mr in goodMatches)
    uniqueAlignments = [mr for mr in goodMatches if contigAlignmentCounts[mr.contigId]==1]
    sys.stderr.write('adjustOpticalMap: Filtered to %i matches based on uniqueness'%(len(uniqueAlignments)))

    # Create a matched chunk file
    matchedChunkFile = '%s.matchedChunks'%outputPfx
    adjustOpticalMaps.makeMatchedChunkFile(uniqueAlignments, matchedChunkFile)
    adjustOpticalMaps.run(opticalMapFileOrig, matchedChunkFile, opticalMapFileNew)
コード例 #2
0
def parseRandomAlignments(xmlFile):
    numFrags = int(xmlFile.split('.')[1])
    matchResults = parseSomaMatch.parseMatchFileXML(xmlFile)
    ml = matchResults
    matchDict = dict( (mr.contigId, mr) for mr in matchResults)
    resDict = {}
    resDict['matchDict'] = matchDict
    resDict['chunkScores'] = [chunkScore for mr in ml for chunkScore in mr.getChunkScores()]
    resDict['scores'] = [mr.score for mr in ml]
    return resDict
コード例 #3
0
ファイル: runSoma.py プロジェクト: LeeMendelowitz/SOMA-V3
def postProcess(xmlFile, opticalMapFile, contigMapFile, outputPfx):

    # Parse results
    print '\n'+'*'*50
    print 'Parsing SOMA OUTPUT...'
    print '*'*50 + '\n'

    pickleFileAll = '%s.matchList.all.pickle'%outputPfx
    pickleFileSig = '%s.matchList.sig.pickle'%outputPfx
    pickleFileSigUnique = '%s.matchList.sig.unique.pickle'%outputPfx

    # Parse Match Results. Write Pickle Files
    ml = parseSomaMatch.parseMatchFileXML(xmlFile)
    significanceTest.runSignificanceTest(ml, contigMapFile, opticalMapFile, numThreads=numThreads)

    # Select significant results
    pvalCutoff = 0.05
    sigMatches = [mr for mr in ml if mr.pval <= pvalCutoff]
    sigMatchDict = parseSomaMatch.collectMatchResultsByContig(sigMatches)
    sigUniqueMatches = [matches[0] for contigId, matches in sigMatchDict.iteritems() if len(matches)==1]
    sigUniqueMatchDict = parseSomaMatch.collectMatchResultsByContig(sigUniqueMatches)
    sys.stdout.write('Found %i significant matches (%i bp)\n'%(len(sigMatches), sum(mr.cAlignedBases for mr in sigMatches)))
    sys.stdout.write('Found %i unique significant matches (%i bp)\n'%(len(sigUniqueMatches), sum(mr.cAlignedBases for mr in sigUniqueMatches)))

    # Pickle the matchResults
    cPickle.dump(ml, open(pickleFileAll, 'w'))
    cPickle.dump(sigMatches, open(pickleFileSig, 'w'))
    cPickle.dump(sigUniqueMatches, open(pickleFileSigUnique, 'w'))

    infoFileOut = '%s.info'%outputPfx
    parseSomaMatch.writeInfoFile2(ml, infoFileOut)

    # Summarize alignment status for contigs in the silicoFile
    contigMapDict = SOMAMap.readMaps(contigMapFile)
    opMapDict= SOMAMap.readMaps(opticalMapFile)
    summarizeContigStatus.summarizeContigStatus(outputPfx, sigMatchDict, contigMapDict)

    #  Print all of the alignments to a textFile
    fout = open('%s.SigUniqueAlignments.txt'%outputPfx, 'w')
    parseSomaMatch.printAlignments(sigUniqueMatches, fout)
    fout.close()
    fout = open('%s.AllSigAlignments.txt'%outputPfx, 'w')
    parseSomaMatch.printAlignments(sigMatches, fout)
    fout.close()

    # Create scaffolds
    print '\n'+'*'*50
    print 'Creating Scaffolds...'
    print '*'*50 + '\n'

    createScaffolds.createScaffolds(sigMatchDict, opMapDict, '%s.scaffold_sigMatches_withOverlaps.txt'%outputPfx, allowOverlaps=True)
    createScaffolds.createScaffolds(sigMatchDict, opMapDict, '%s.scaffold_sigMatches_noOverlaps.txt'%outputPfx, allowOverlaps=False)
    createScaffolds.createScaffolds(sigUniqueMatchDict, opMapDict, '%s.scaffold_sigUniqueMatches_withOverlaps.txt'%outputPfx, allowOverlaps=True)
    createScaffolds.createScaffolds(sigUniqueMatchDict, opMapDict, '%s.scaffold_sigUniqueMatches_noOverlaps.txt'%outputPfx, allowOverlaps=False)