def generateReference(self): lrepeat = self.repeatLen startLoc = self.G / 2 endLoc = startLoc + lrepeat locListTmp = dataGenLib.locList() repeatInfoTmp = dataGenLib.repeatInfo() for i in range(self.m): repeatInfoTmp.addRepeatCopy(i, startLoc, endLoc) locListTmp.addRepeatInfo(repeatInfoTmp) seqList = [] for i in range(self.m): seqList.append(dataGenLib.randomStringGen(self.G)) seqList = dataGenLib.insertRepeat(seqList, locListTmp) newSeqList = ["", ""] pt1, pt2 = startLoc + lrepeat / 3, startLoc + 2 * lrepeat / 3 print pt1, pt2 newSeqList[0] = seqList[0][0:pt1] + 'A' + seqList[0][ pt1 + 1:pt2] + 'G' + seqList[0][pt2 + 1:] newSeqList[1] = seqList[1][0:pt1] + 'C' + seqList[1][ pt1 + 1:pt2] + 'T' + seqList[1][pt2 + 1:] dataGenLib.writeListToFile(self.folderName, 'reference.fasta', newSeqList)
def genearteReads(self): genList = dataGenLib.readFromFasta(self.folderName + "reference.fasta") NList = [] segList = [] for c in self.abun: NList.append(int(self.G * c / self.L)) for i in range(len(genList)): for j in range(NList[i]): segList.append( dataGenLib.createANoisyRead(self.L, self.p, genList[i])) dataGenLib.writeListToFile(self.folderName, "raw_reads.fasta", segList)
def generateContigs(self): genList = dataGenLib.readFromFasta(self.folderName + "reference.fasta") breakPoints = [] numCopy = 3 separation = 10**6 for i in range(1, 1 + numCopy): breakPoints.append([0, i * separation]) segList = dataGenLib.contigBreakDown(genList, breakPoints) seqListNew = [segList[0] + segList[2] + segList[1] + segList[3]] dataGenLib.writeListToFile(self.folderName, 'contigs.fasta', seqListNew)
def generateContigs(self): print "Generate contigs" genList = dataGenLib.readFromFasta(self.folderName + "reference.fasta") breakPoints = [] for i in range(len(genList)): G = len(genList[i]) breakPoints.append([i, G / 2]) segList = dataGenLib.contigBreakDown(genList, breakPoints) seqListNew = [segList[0] + segList[3], segList[2] + segList[1]] for eachitem in seqListNew: print len(eachitem) dataGenLib.writeListToFile(self.folderName, 'contigs.fasta', seqListNew)
def generateReference(self): lrepeat = 1000 numCopy = 3 separation = 10**6 locListTmp = dataGenLib.locList() repeatInfoTmp = dataGenLib.repeatInfo() for i in range(1, 1 + numCopy): repeatInfoTmp.addRepeatCopy(0, i * separation, i * separation + lrepeat) locListTmp.addRepeatInfo(repeatInfoTmp) seqList = [dataGenLib.randomStringGen(self.G)] seqList = dataGenLib.insertRepeat(seqList, locListTmp) dataGenLib.writeListToFile(self.folderName, 'reference.fasta', seqList)
def generateReference(self): lrepeat = 5000 startLoc = self.G / 2 endLoc = startLoc + lrepeat locListTmp = dataGenLib.locList() repeatInfoTmp = dataGenLib.repeatInfo() for i in range(self.m): repeatInfoTmp.addRepeatCopy(i, startLoc, endLoc) locListTmp.addRepeatInfo(repeatInfoTmp) seqList = [] for i in range(self.m): seqList.append(dataGenLib.randomStringGen(self.G)) seqList = dataGenLib.insertRepeat(seqList, locListTmp) dataGenLib.writeListToFile(self.folderName, 'reference.fasta', seqList)