Exemple #1
0
                used.append(node)
                next = getStrongestEdge(node, direction, used)
                if next is None:
                    break
                path.append(next)
                if next.qstrand == '-':
                    if direction == SUPPORTFLAGS.left:
                        direction = SUPPORTFLAGS.right
                    elif direction == SUPPORTFLAGS.right:
                        direction = SUPPORTFLAGS.left
                node = next.qname
            paths.append(path)
    for p in paths:
        if len(p) == 0:
            continue
        for i in p:
            if not i.tname.startswith("ref"):
                sys.stdout.write(i.tname.split('/')[1], i.tstrand, "\t")
            else:
                sys.stdout.write(i.tname, i.tstrand, "\t")
        sys.stdout.write('\n')


if __name__ == '__main__':
    reads = sys.argv[1]
    fasta = FastaFile(reads)
    #blasr(reads, reads, 4)
    ovl = m5ToOvlGraph(fasta.keys(), "out.m5")
    ovlSimplify(ovl)
    nx.write_gml(ovl, "ovl.gml")
Exemple #2
0
import sys, random
from pbsuite.utils.FileHandlers import FastaFile, revComp, wrap

def getRandomSeq(length):
    return "".join([random.choice(['A', 'T', 'C', 'G']) for i in xrange(length)])
    
if __name__ == '__main__':
    fasta = FastaFile(sys.argv[1])
    key = fasta.keys()[0]
    ref = list(fasta[key])
    
    #800bp insertion in the sample (deletion in the reference) 
    ref[5000:5800] = ""
    #5000 Insertion

    #Inversion in the sample (inversion in the reference) tails
    ref[9000:12000] = list("".join(ref[10000:13000]).translate(revComp)[::-1])
    #9000-12000 - INversion
    
    #1kb deletion in sample (insert into the reference) tails
    seq = getRandomSeq(1000)
    ref[20000:20000] = list(seq)
    #20000-21000 -- Deletion 
    
    #100bp insertion in sample (deletion in the reference) spots
    ref[30000:30100] = ""
    #30000 - Insertion

    #200bp deletion in sample (insert into the reference) spots
    seq = getRandomSeq(200)
    ref[35000:35000] = list(seq)