def convertOpticalMaps(opticalMapFile, outputPfx): opMapFileOut = '%s.opt'%outputPfx msg = '\n'+'*'*50 + \ '\nReading Optical Map File %s\n'%opticalMapFile + \ '*'*50 + '\n' sys.stderr.write(msg) opMapList = make_opt.readMapDataSchwartz(opticalMapFile) enzymeSet = set(om.enzyme for om in opMapList) if len(enzymeSet) > 1: raise RuntimeError('Different enzymes used in the input optical map set!') enzyme = opMapList[0].enzyme msg = '\n'+'*'*50 +\ '\nConverting Optical Map to SOMA Format\n' +\ '*'*50 + '\n' sys.stderr.write(msg) # Optical maps for chromosomes # Remove all white space from restriction map names for opMap in opMapList: opMap.mapId = ''.join(opMap.mapId.split()) SOMAMap.writeMaps(opMapList, opMapFileOut) result = { 'enzyme' : enzyme, 'opMapList' : opMapList, 'opticalMapFile' : opMapFileOut} return result
def postProcess(xmlFile, opticalMapFile, contigMapFile, outputPfx): # Parse results print '\n'+'*'*50 print 'Parsing SOMA OUTPUT...' print '*'*50 + '\n' pickleFileAll = '%s.matchList.all.pickle'%outputPfx pickleFileSig = '%s.matchList.sig.pickle'%outputPfx pickleFileSigUnique = '%s.matchList.sig.unique.pickle'%outputPfx # Parse Match Results. Write Pickle Files ml = parseSomaMatch.parseMatchFileXML(xmlFile) significanceTest.runSignificanceTest(ml, contigMapFile, opticalMapFile, numThreads=numThreads) # Select significant results pvalCutoff = 0.05 sigMatches = [mr for mr in ml if mr.pval <= pvalCutoff] sigMatchDict = parseSomaMatch.collectMatchResultsByContig(sigMatches) sigUniqueMatches = [matches[0] for contigId, matches in sigMatchDict.iteritems() if len(matches)==1] sigUniqueMatchDict = parseSomaMatch.collectMatchResultsByContig(sigUniqueMatches) sys.stdout.write('Found %i significant matches (%i bp)\n'%(len(sigMatches), sum(mr.cAlignedBases for mr in sigMatches))) sys.stdout.write('Found %i unique significant matches (%i bp)\n'%(len(sigUniqueMatches), sum(mr.cAlignedBases for mr in sigUniqueMatches))) # Pickle the matchResults cPickle.dump(ml, open(pickleFileAll, 'w')) cPickle.dump(sigMatches, open(pickleFileSig, 'w')) cPickle.dump(sigUniqueMatches, open(pickleFileSigUnique, 'w')) infoFileOut = '%s.info'%outputPfx parseSomaMatch.writeInfoFile2(ml, infoFileOut) # Summarize alignment status for contigs in the silicoFile contigMapDict = SOMAMap.readMaps(contigMapFile) opMapDict= SOMAMap.readMaps(opticalMapFile) summarizeContigStatus.summarizeContigStatus(outputPfx, sigMatchDict, contigMapDict) # Print all of the alignments to a textFile fout = open('%s.SigUniqueAlignments.txt'%outputPfx, 'w') parseSomaMatch.printAlignments(sigUniqueMatches, fout) fout.close() fout = open('%s.AllSigAlignments.txt'%outputPfx, 'w') parseSomaMatch.printAlignments(sigMatches, fout) fout.close() # Create scaffolds print '\n'+'*'*50 print 'Creating Scaffolds...' print '*'*50 + '\n' createScaffolds.createScaffolds(sigMatchDict, opMapDict, '%s.scaffold_sigMatches_withOverlaps.txt'%outputPfx, allowOverlaps=True) createScaffolds.createScaffolds(sigMatchDict, opMapDict, '%s.scaffold_sigMatches_noOverlaps.txt'%outputPfx, allowOverlaps=False) createScaffolds.createScaffolds(sigUniqueMatchDict, opMapDict, '%s.scaffold_sigUniqueMatches_withOverlaps.txt'%outputPfx, allowOverlaps=True) createScaffolds.createScaffolds(sigUniqueMatchDict, opMapDict, '%s.scaffold_sigUniqueMatches_noOverlaps.txt'%outputPfx, allowOverlaps=False)
def addFragsFromMap(self, mapFileName): mapDict = SOMAMap.readMaps(open(mapFileName)) for mapId, mapObj in mapDict.iteritems(): frags = mapObj.frags if not frags: continue self.addFrags(frags) if len(frags) > 2: self.addInteriorFrags(frags[1:-1])