Example #1
0
        # Remove wt entries
        tmp = seqarray.copy()
        tmp[:,wtrow] = 0

        # Store results from this chunk
        mutarray_lil[startrow:(endrow+1),:] = tmp

        # Increment rows
        startrow = endrow+1
        endrow = startrow + chunksize - 1

    # Convert to csr matrix
    mutarray_csr = mutarray_lil.tocsr()

    # Return vararray as well as binary representation of wt seq
    return mutarray_csr, wtrow


# Create sequences to test this on
wtseq = 'AAAAAAAGTGAGATGGCAATCTAATTCGGCACCCCAGGTTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGG'
dataset_df = simulate_library(wtseq,numseq=10000,mutrate=.1,tags=True)
seqarray = dataset2seqarray(dataset_df, modeltype='MAT')
mutarray, wtrow = dataset2mutarray(dataset_df, modeltype='MAT')

# Print compression results
seqarray_size = nbytes(seqarray)
mutarray_size = nbytes(mutarray)

print 'size of seqarray = %d'%seqarray_size
print 'size of mutarray = %d'%mutarray_size
print 'compression ratio = %.1f'%(1.*seqarray_size/mutarray_size)