Esempio n. 1
0
def convertOpticalMaps(opticalMapFile, outputPfx):
    opMapFileOut = '%s.opt'%outputPfx

    msg = '\n'+'*'*50 + \
          '\nReading Optical Map File %s\n'%opticalMapFile + \
          '*'*50 + '\n'
    sys.stderr.write(msg)

    opMapList = make_opt.readMapDataSchwartz(opticalMapFile)
    enzymeSet = set(om.enzyme for om in opMapList)
    if len(enzymeSet) > 1:
        raise RuntimeError('Different enzymes used in the input optical map set!')
    enzyme = opMapList[0].enzyme

    msg = '\n'+'*'*50 +\
          '\nConverting Optical Map to SOMA Format\n' +\
          '*'*50 + '\n'
    sys.stderr.write(msg)

    # Optical maps for chromosomes 
    # Remove all white space from restriction map names
    for opMap in opMapList:
        opMap.mapId = ''.join(opMap.mapId.split())
    SOMAMap.writeMaps(opMapList, opMapFileOut)
    result = { 'enzyme' : enzyme,
               'opMapList' : opMapList,
               'opticalMapFile' : opMapFileOut}
    return result
Esempio n. 2
0
def postProcess(xmlFile, opticalMapFile, contigMapFile, outputPfx):

    # Parse results
    print '\n'+'*'*50
    print 'Parsing SOMA OUTPUT...'
    print '*'*50 + '\n'

    pickleFileAll = '%s.matchList.all.pickle'%outputPfx
    pickleFileSig = '%s.matchList.sig.pickle'%outputPfx
    pickleFileSigUnique = '%s.matchList.sig.unique.pickle'%outputPfx

    # Parse Match Results. Write Pickle Files
    ml = parseSomaMatch.parseMatchFileXML(xmlFile)
    significanceTest.runSignificanceTest(ml, contigMapFile, opticalMapFile, numThreads=numThreads)

    # Select significant results
    pvalCutoff = 0.05
    sigMatches = [mr for mr in ml if mr.pval <= pvalCutoff]
    sigMatchDict = parseSomaMatch.collectMatchResultsByContig(sigMatches)
    sigUniqueMatches = [matches[0] for contigId, matches in sigMatchDict.iteritems() if len(matches)==1]
    sigUniqueMatchDict = parseSomaMatch.collectMatchResultsByContig(sigUniqueMatches)
    sys.stdout.write('Found %i significant matches (%i bp)\n'%(len(sigMatches), sum(mr.cAlignedBases for mr in sigMatches)))
    sys.stdout.write('Found %i unique significant matches (%i bp)\n'%(len(sigUniqueMatches), sum(mr.cAlignedBases for mr in sigUniqueMatches)))

    # Pickle the matchResults
    cPickle.dump(ml, open(pickleFileAll, 'w'))
    cPickle.dump(sigMatches, open(pickleFileSig, 'w'))
    cPickle.dump(sigUniqueMatches, open(pickleFileSigUnique, 'w'))

    infoFileOut = '%s.info'%outputPfx
    parseSomaMatch.writeInfoFile2(ml, infoFileOut)

    # Summarize alignment status for contigs in the silicoFile
    contigMapDict = SOMAMap.readMaps(contigMapFile)
    opMapDict= SOMAMap.readMaps(opticalMapFile)
    summarizeContigStatus.summarizeContigStatus(outputPfx, sigMatchDict, contigMapDict)

    #  Print all of the alignments to a textFile
    fout = open('%s.SigUniqueAlignments.txt'%outputPfx, 'w')
    parseSomaMatch.printAlignments(sigUniqueMatches, fout)
    fout.close()
    fout = open('%s.AllSigAlignments.txt'%outputPfx, 'w')
    parseSomaMatch.printAlignments(sigMatches, fout)
    fout.close()

    # Create scaffolds
    print '\n'+'*'*50
    print 'Creating Scaffolds...'
    print '*'*50 + '\n'

    createScaffolds.createScaffolds(sigMatchDict, opMapDict, '%s.scaffold_sigMatches_withOverlaps.txt'%outputPfx, allowOverlaps=True)
    createScaffolds.createScaffolds(sigMatchDict, opMapDict, '%s.scaffold_sigMatches_noOverlaps.txt'%outputPfx, allowOverlaps=False)
    createScaffolds.createScaffolds(sigUniqueMatchDict, opMapDict, '%s.scaffold_sigUniqueMatches_withOverlaps.txt'%outputPfx, allowOverlaps=True)
    createScaffolds.createScaffolds(sigUniqueMatchDict, opMapDict, '%s.scaffold_sigUniqueMatches_noOverlaps.txt'%outputPfx, allowOverlaps=False)
Esempio n. 3
0
 def addFragsFromMap(self, mapFileName):
     mapDict = SOMAMap.readMaps(open(mapFileName))
     for mapId, mapObj in mapDict.iteritems():
         frags = mapObj.frags
         if not frags:
             continue
         self.addFrags(frags)
         if len(frags) > 2:
             self.addInteriorFrags(frags[1:-1])