예제 #1
0
    def test_insertOneTime(self):
        print(self._testMethodName)
        kSize = 31
        _kmer, rand_count = self.params.generate_singleKmer()

        kf = kp.kDataFrameMQF(kSize)
        self.assertTrue(kf.empty())

        kf.insert(_kmer, rand_count)
        self.assertFalse(kf.empty())
        self.assertEqual(kf.getCount(_kmer), rand_count)
예제 #2
0
import kProcessor as kp
import sys

fasta_file = sys.argv[1]
names_file = sys.argv[2]

kf = kp.kDataFrameMQF(21, 29, 1)

# Mode 1 activates the kmers mode
ckf = kp.index(kf, {"mode": 1}, fasta_file, 10000, names_file)

print("Serializing the index ...")

ckf.save("index/idx_gencode.v32.transcripts")
예제 #3
0
import kProcessor as kp
from glob import glob
import os
import sys

if len(sys.argv) < 3:
    sys.exit(
        "run: python genomes_kmerCounting.py <genomes_dir> <kDataframes_output_dir>"
    )

genomes_dir = sys.argv[1]
output_directory = sys.argv[2]

kSize = 21
chunk_size = 10000
hashing_mode = 1  # Integer hashing

genomes = glob(f"{genomes_dir}/*")

for genome_fasta in genomes:
    print(f"counting {genome_fasta} kmers ...")
    kf = kp.kDataFrameMQF(kSize)
    kp.countKmersFromFile(kf, {"mode": hashing_mode}, genome_fasta, chunk_size)
    print(f"finished and counted {kf.size()} kmers...")
    print(f"saving ...")
    output_prefix = os.path.join(output_directory,
                                 "idx_" + os.path.basename(genome_fasta))
    kf.save(output_prefix)
예제 #4
0
import os
import sys

import kProcessor as kp

fasta_file = str()
namesFile = str()

if len(sys.argv) < 4:
    sys.exit("run: python genes_indexing.py <fasta> <namesFiles> <kSize>")

else:
    fasta_file = sys.argv[1]
    namesFile = sys.argv[2]
    kSize = int(sys.argv[3])

idx_suffix = os.path.basename(fasta_file).replace(".fa.names", "")
print(f"Indexing {idx_suffix} ...", file=sys.stderr)
kmers_mode = 1
hashing_mode = 1
chunk_size = 10000
#kf_PHMAP = kp.kDataFramePHMAP(kSize, hashing_mode)
kf_PHMAP = kp.kDataFrameMQF(kSize, 29, hashing_mode)
ckf = kp.index(kf_PHMAP, {"mode": kmers_mode}, fasta_file, chunk_size,
               namesFile)
ckf.save(f"idx_{idx_suffix}")