Esempio n. 1
0
    def generateReference(self):
        lrepeat = self.repeatLen

        startLoc = self.G / 2
        endLoc = startLoc + lrepeat

        locListTmp = dataGenLib.locList()

        repeatInfoTmp = dataGenLib.repeatInfo()

        for i in range(self.m):
            repeatInfoTmp.addRepeatCopy(i, startLoc, endLoc)

        locListTmp.addRepeatInfo(repeatInfoTmp)

        seqList = []

        for i in range(self.m):
            seqList.append(dataGenLib.randomStringGen(self.G))

        seqList = dataGenLib.insertRepeat(seqList, locListTmp)

        newSeqList = ["", ""]

        pt1, pt2 = startLoc + lrepeat / 3, startLoc + 2 * lrepeat / 3
        print pt1, pt2
        newSeqList[0] = seqList[0][0:pt1] + 'A' + seqList[0][
            pt1 + 1:pt2] + 'G' + seqList[0][pt2 + 1:]
        newSeqList[1] = seqList[1][0:pt1] + 'C' + seqList[1][
            pt1 + 1:pt2] + 'T' + seqList[1][pt2 + 1:]

        dataGenLib.writeListToFile(self.folderName, 'reference.fasta',
                                   newSeqList)
Esempio n. 2
0
    def genearteReads(self):
        genList = dataGenLib.readFromFasta(self.folderName + "reference.fasta")

        NList = []
        segList = []

        for c in self.abun:
            NList.append(int(self.G * c / self.L))

        for i in range(len(genList)):
            for j in range(NList[i]):
                segList.append(
                    dataGenLib.createANoisyRead(self.L, self.p, genList[i]))

        dataGenLib.writeListToFile(self.folderName, "raw_reads.fasta", segList)
Esempio n. 3
0
    def generateContigs(self):
        genList = dataGenLib.readFromFasta(self.folderName + "reference.fasta")
        breakPoints = []

        numCopy = 3
        separation = 10**6

        for i in range(1, 1 + numCopy):
            breakPoints.append([0, i * separation])

        segList = dataGenLib.contigBreakDown(genList, breakPoints)

        seqListNew = [segList[0] + segList[2] + segList[1] + segList[3]]

        dataGenLib.writeListToFile(self.folderName, 'contigs.fasta',
                                   seqListNew)
Esempio n. 4
0
    def generateContigs(self):
        print "Generate contigs"
        genList = dataGenLib.readFromFasta(self.folderName + "reference.fasta")
        breakPoints = []

        for i in range(len(genList)):
            G = len(genList[i])
            breakPoints.append([i, G / 2])

        segList = dataGenLib.contigBreakDown(genList, breakPoints)

        seqListNew = [segList[0] + segList[3], segList[2] + segList[1]]

        for eachitem in seqListNew:
            print len(eachitem)

        dataGenLib.writeListToFile(self.folderName, 'contigs.fasta',
                                   seqListNew)
Esempio n. 5
0
    def generateReference(self):
        lrepeat = 1000
        numCopy = 3
        separation = 10**6

        locListTmp = dataGenLib.locList()
        repeatInfoTmp = dataGenLib.repeatInfo()

        for i in range(1, 1 + numCopy):
            repeatInfoTmp.addRepeatCopy(0, i * separation,
                                        i * separation + lrepeat)

        locListTmp.addRepeatInfo(repeatInfoTmp)

        seqList = [dataGenLib.randomStringGen(self.G)]
        seqList = dataGenLib.insertRepeat(seqList, locListTmp)

        dataGenLib.writeListToFile(self.folderName, 'reference.fasta', seqList)
Esempio n. 6
0
    def generateReference(self):
        lrepeat = 5000

        startLoc = self.G / 2
        endLoc = startLoc + lrepeat

        locListTmp = dataGenLib.locList()

        repeatInfoTmp = dataGenLib.repeatInfo()

        for i in range(self.m):
            repeatInfoTmp.addRepeatCopy(i, startLoc, endLoc)

        locListTmp.addRepeatInfo(repeatInfoTmp)

        seqList = []

        for i in range(self.m):
            seqList.append(dataGenLib.randomStringGen(self.G))

        seqList = dataGenLib.insertRepeat(seqList, locListTmp)

        dataGenLib.writeListToFile(self.folderName, 'reference.fasta', seqList)