def simulateReads(bed12, bed12Fasta, explv, bedReads, faReads, readLength, readCount, seqError): #output = shell(getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " -b " + rNASeqReadSimulatorPath + "demo/input/sampleposbias.txt --stranded " + bed12 + " > " + bedReads) output = shell(getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " --stranded " + bed12 + " 2> /dev/null > " + bedReads) if len(output.strip()) > 5: print(output) output = shell(getBinary("getseqfrombed.py") + " -f -r " + str(seqError) + " -l " + str(readLength) + " " + bedReads + " " + bed12Fasta + " 2> /dev/null > " + faReads) if len(output.strip()) > 5: print(output)
def simulateReads(bed12, bed12Fasta, explv, bedReads, faReads, readLength, readCount, seqError): #output = shell(getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " -b " + rNASeqReadSimulatorPath + "demo/input/sampleposbias.txt --stranded " + bed12 + " > " + bedReads) output = shell( getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " --stranded " + bed12 + " 2> /dev/null > " + bedReads) if len(output.strip()) > 5: print(output) output = shell( getBinary("getseqfrombed.py") + " -f -r " + str(seqError) + " -l " + str(readLength) + " " + bedReads + " " + bed12Fasta + " 2> /dev/null > " + faReads) if len(output.strip()) > 5: print(output)
def prepareUTRs(bed, bed12, bed12Fasta, referenceFasta, readLength, polyALength, explv, snpRate, vcfFile): # Read utrs from BED file utrs = parseUtrBedFile(bed) vcf = open(vcfFile, "w") print("##fileformat=VCFv4.1", file=vcf) print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", file=vcf) bedFile = BedTool(bed) bedFasta = bedFile.sequence(fi=referenceFasta, s=True, name=True) bed12FastaFile = open(bed12Fasta, "w") utrName = None for line in bedFasta.print_sequence().splitlines(): if(line[0] == ">"): print(line, file=bed12FastaFile) utrName = line[1:] else: print(simulateUTR(line, utrs[utrName], polyALength, snpRate, vcf), file=bed12FastaFile) bed12FastaFile.close() vcf.close() bed12File = open(bed12, "w") totalLength = 0 minFragmentLength = 150 maxFragmentLength = 450 for utr in BedIterator(bed): fragmentLength = random.randrange(minFragmentLength, maxFragmentLength, 1) #+ readLength fragmentLength = min(fragmentLength, utr.getLength()) start = max(0, utr.getLength() - fragmentLength) end = utr.getLength() #- readLength totalLength += (end - start) # min(utr.getLength() + readLength / 4, fragmentLength + readLength / 4) print(utr.name, start, end, utr.name, utr.score, "+", start, end, "255,0,0", "1", (end - start), 0, sep="\t", file=bed12File) bed12File.close() output = shell(getBinary("genexplvprofile.py") + " --geometric 1 " + bed12 + " 2> /dev/null > " + explv) if len(output.strip()) > 5: print(output) return totalLength
def prepareUTRs(bed, bed12, bed12Fasta, referenceFasta, readLength, polyALength, explv, snpRate, vcfFile): # Read utrs from BED file utrs = parseUtrBedFile(bed) vcf = open(vcfFile, "w") print("##fileformat=VCFv4.1", file=vcf) print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", file=vcf) bedFile = BedTool(bed) bedFasta = bedFile.sequence(fi=referenceFasta, s=True, name=True) f = tempfile.NamedTemporaryFile(mode='w', delete=False) for line in bedFasta.print_sequence().splitlines(): if (line[0] == ">"): print(line.split("::")[0], file=f) else: print(line.rstrip(), file=f) f.close() bed12FastaFile = open(bed12Fasta, "w") utrName = None with open(f.name, 'r') as f: for line in f: if (line[0] == ">"): print(line.rstrip(), file=bed12FastaFile) utrName = line.rstrip()[1:] else: print(simulateUTR(line.rstrip(), utrs[utrName], polyALength, snpRate, vcf).rstrip(), file=bed12FastaFile) bed12FastaFile.close() vcf.close() bed12File = open(bed12, "w") totalLength = 0 minFragmentLength = 150 maxFragmentLength = 450 for utr in BedIterator(bed): fragmentLength = random.randrange(minFragmentLength, maxFragmentLength, 1) #+ readLength fragmentLength = min(fragmentLength, utr.getLength()) start = max(0, utr.getLength() - fragmentLength) end = utr.getLength() #- readLength totalLength += (end - start) # min(utr.getLength() + readLength / 4, fragmentLength + readLength / 4) print(utr.name, start, end, utr.name, utr.score, "+", start, end, "255,0,0", "1", (end - start), 0, sep="\t", file=bed12File) bed12File.close() output = shell( getRNASeqReadSimulator("genexplvprofile.py") + " --geometric 1 " + bed12 + " 2> /dev/null > " + explv) if len(output.strip()) > 5: print(output) return totalLength