Beispiel #1
0
def __main__():

    try:
        fastqFile = sys.argv[1]
        outFile = sys.argv[2]
        subCnt = int(sys.argv[3])  # sub sample count
    except:
        print __doc__
        sys.exit(-1)

    ## open the file
    ffh = helper._open_file(fastqFile)

    ## counting number of reads in fastq file
    read_cnt = 0
    for rec in SeqIO.parse(ffh, 'fastq'):
        read_cnt += 1
    ffh.close()
    print 'Number of reads in FASTQ: ', read_cnt

    assert subCnt <= read_cnt, str(
        subCnt
    ) + ' (sub-sample count) should be less than total read count ' + str(
        read_cnt)
    try:
        accept_prob = (1.0 * subCnt) / read_cnt
    except:
        accept_prob = 1

    ## outfile directory check for creating the new file
    try:
        subFile = bz2.BZ2File(outFile, 'wb')
    except Exception as error:
        sys.exit(error)

    cnt, sub_cnt = 0, 0
    print 'Writing compressed file...'

    ffh = helper._open_file(fastqFile)
    for rec in SeqIO.parse(ffh, 'fastq'):
        rnb = random.random()
        cnt += 1
        if rnb <= accept_prob:
            sub_cnt += 1
            subFile.write(rec.format("fastq"))
        if subCnt == sub_cnt:
            print '...done'
            break
    ffh.close()
    subFile.close()

    print 'Number of reads scanned: ', cnt
    print 'Number of reads in: ', sub_cnt
Beispiel #2
0
def __main__():

    try:
        fastqFile = sys.argv[1]
        outFile = sys.argv[2]
        subCnt = int(sys.argv[3])  # sub sample count
    except:
        print __doc__
        sys.exit(-1)

    ## open the file
    ffh = helper._open_file(fastqFile)

    ## counting number of reads in fastq file
    read_cnt = 0
    for rec in SeqIO.parse(ffh, "fastq"):
        read_cnt += 1
    ffh.close()
    print "Number of reads in FASTQ: ", read_cnt

    assert subCnt <= read_cnt, str(subCnt) + " (sub-sample count) should be less than total read count " + str(read_cnt)
    try:
        accept_prob = (1.0 * subCnt) / read_cnt
    except:
        accept_prob = 1

    ## outfile directory check for creating the new file
    try:
        subFile = bz2.BZ2File(outFile, "wb")
    except Exception as error:
        sys.exit(error)

    cnt, sub_cnt = 0, 0
    print "Writing compressed file..."

    ffh = helper._open_file(fastqFile)
    for rec in SeqIO.parse(ffh, "fastq"):
        rnb = random.random()
        cnt += 1
        if rnb <= accept_prob:
            sub_cnt += 1
            subFile.write(rec.format("fastq"))
        if subCnt == sub_cnt:
            print "...done"
            break
    ffh.close()
    subFile.close()

    print "Number of reads scanned: ", cnt
    print "Number of reads in: ", sub_cnt
Beispiel #3
0
def __main__():

    try:
        fa_name = sys.argv[1]
    except:
        print __doc__
        sys.exit(-1)

    seq_info = dict()
    fah = helper._open_file(fa_name)

    for rec in SeqIO.parse(fah, "fasta"):
        seq_info[rec.id] = len(rec.seq)
        print rec.id, len(rec.seq)
    fah.close()
    
    print 
    print 'Number of FASTA entries: ', len(seq_info)
    for long_one in sorted(seq_info.items(), key=itemgetter(1), reverse=True):
        print 'Long contig length (bp): ', long_one[0], long_one[1]
        break
    for short_one in sorted(seq_info.items(), key=itemgetter(1)):
        print 'Short contig length (bp): ', short_one[0], short_one[1]
        break
    flength = 0 
    for ele in sorted(seq_info.items(), key=itemgetter(1)):
        flength += ele[1]
    print 'Average length of FASTA contig (bp): ', (flength/len(seq_info))
    print