Ejemplo n.º 1
0
def simulateReads(bed12, bed12Fasta, explv, bedReads, faReads, readLength, readCount, seqError):    
    #output = shell(getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " -b " + rNASeqReadSimulatorPath + "demo/input/sampleposbias.txt --stranded " + bed12 + " > " + bedReads)
    output = shell(getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " --stranded " + bed12 + " 2> /dev/null > " + bedReads)
    if len(output.strip()) > 5:
        print(output)
    output = shell(getBinary("getseqfrombed.py") + " -f -r " + str(seqError) + " -l " + str(readLength) + " " + bedReads + " " + bed12Fasta + " 2> /dev/null > " + faReads)
    if len(output.strip()) > 5:
        print(output)
Ejemplo n.º 2
0
def simulateReads(bed12, bed12Fasta, explv, bedReads, faReads, readLength,
                  readCount, seqError):
    #output = shell(getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " -b " + rNASeqReadSimulatorPath + "demo/input/sampleposbias.txt --stranded " + bed12 + " > " + bedReads)
    output = shell(
        getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " +
        explv + " -n " + str(readCount) + " --stranded " + bed12 +
        " 2> /dev/null > " + bedReads)
    if len(output.strip()) > 5:
        print(output)
    output = shell(
        getBinary("getseqfrombed.py") + " -f -r " + str(seqError) + " -l " +
        str(readLength) + " " + bedReads + " " + bed12Fasta +
        " 2> /dev/null > " + faReads)
    if len(output.strip()) > 5:
        print(output)
Ejemplo n.º 3
0
def prepareUTRs(bed, bed12, bed12Fasta, referenceFasta, readLength, polyALength, explv, snpRate, vcfFile):
    
    # Read utrs from BED file
    utrs = parseUtrBedFile(bed)
    
    vcf = open(vcfFile, "w")
    print("##fileformat=VCFv4.1", file=vcf)
    print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", file=vcf)
    
    bedFile = BedTool(bed)
    
    bedFasta = bedFile.sequence(fi=referenceFasta, s=True, name=True)
      
    bed12FastaFile = open(bed12Fasta, "w")
    utrName = None
    for line in bedFasta.print_sequence().splitlines():
        if(line[0] == ">"):
            print(line, file=bed12FastaFile)
            utrName = line[1:] 
        else:
            print(simulateUTR(line, utrs[utrName], polyALength, snpRate, vcf), file=bed12FastaFile)
    bed12FastaFile.close()
    vcf.close()
    
    bed12File = open(bed12, "w")
    
    totalLength = 0
    
    minFragmentLength = 150
    maxFragmentLength = 450
    for utr in BedIterator(bed):
        
        fragmentLength = random.randrange(minFragmentLength, maxFragmentLength, 1) #+ readLength
        fragmentLength = min(fragmentLength, utr.getLength())
        
        start = max(0, utr.getLength() - fragmentLength)
        end = utr.getLength() #- readLength

        totalLength += (end - start)
#         min(utr.getLength() + readLength / 4, fragmentLength + readLength / 4)
        print(utr.name, start, end, utr.name, utr.score, "+", start, end, "255,0,0", "1", (end - start), 0, sep="\t", file=bed12File)
        
    bed12File.close()    
    
    output = shell(getBinary("genexplvprofile.py") + " --geometric 1 " + bed12 + " 2> /dev/null > " + explv)
    if len(output.strip()) > 5:
        print(output)
        
    return totalLength
Ejemplo n.º 4
0
def prepareUTRs(bed, bed12, bed12Fasta, referenceFasta, readLength,
                polyALength, explv, snpRate, vcfFile):

    # Read utrs from BED file
    utrs = parseUtrBedFile(bed)

    vcf = open(vcfFile, "w")
    print("##fileformat=VCFv4.1", file=vcf)
    print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", file=vcf)

    bedFile = BedTool(bed)

    bedFasta = bedFile.sequence(fi=referenceFasta, s=True, name=True)

    f = tempfile.NamedTemporaryFile(mode='w', delete=False)

    for line in bedFasta.print_sequence().splitlines():
        if (line[0] == ">"):
            print(line.split("::")[0], file=f)
        else:
            print(line.rstrip(), file=f)

    f.close()

    bed12FastaFile = open(bed12Fasta, "w")
    utrName = None
    with open(f.name, 'r') as f:
        for line in f:
            if (line[0] == ">"):
                print(line.rstrip(), file=bed12FastaFile)
                utrName = line.rstrip()[1:]
            else:
                print(simulateUTR(line.rstrip(), utrs[utrName], polyALength,
                                  snpRate, vcf).rstrip(),
                      file=bed12FastaFile)
    bed12FastaFile.close()
    vcf.close()

    bed12File = open(bed12, "w")

    totalLength = 0

    minFragmentLength = 150
    maxFragmentLength = 450
    for utr in BedIterator(bed):

        fragmentLength = random.randrange(minFragmentLength, maxFragmentLength,
                                          1)  #+ readLength
        fragmentLength = min(fragmentLength, utr.getLength())

        start = max(0, utr.getLength() - fragmentLength)
        end = utr.getLength()  #- readLength

        totalLength += (end - start)
        #         min(utr.getLength() + readLength / 4, fragmentLength + readLength / 4)
        print(utr.name,
              start,
              end,
              utr.name,
              utr.score,
              "+",
              start,
              end,
              "255,0,0",
              "1", (end - start),
              0,
              sep="\t",
              file=bed12File)

    bed12File.close()

    output = shell(
        getRNASeqReadSimulator("genexplvprofile.py") + " --geometric 1 " +
        bed12 + " 2> /dev/null > " + explv)
    if len(output.strip()) > 5:
        print(output)

    return totalLength