def test_kmer_generator(self): self.assertEqual(kmer_generator("", 0), {}) self.assertEqual(kmer_generator("A", 1), {0: ['A', 'T']}) self.assertEqual( kmer_generator(["ATTCG", "GGGG"], 3), { 0: [ 'ATT', 'TTC', 'TCG', 'CGA', 'GAT', 'ATC', 'TCG', 'CGA', 'GAA', 'AAT' ], 1: ['GGG', 'GGG', 'GGG', 'GGG', 'CCC', 'CCC', 'CCC', 'CCC'] }) self.assertEqual( kmer_generator(["ATTCG", "GGGG"], 3, extension=False), { 0: ['ATT', 'TTC', 'TCG', 'CGA', 'GAA', 'AAT'], 1: ['GGG', 'GGG', 'CCC', 'CCC'] })
def compute_match_score(query, template_obj, kmerLength, primers): ''' Inputs: - (str) data_prefix: the prefix of the NGS dataset paths ''' record1, record2, gzip1, gzip2, filename = get_reads_records(query) #Run reads through Coarse Filtering to drastically reduce computation for Fine Filtering reads = combine_records(record1,record2) flanking_sequences = set(kmer for kmers in kmer_generator([sequence for primer_set in primers.values() for sequence in primer_set], kmerLength, extension=False).values() for kmer in kmers) nucleotides_seen,recruitedReads = coarse_filtering(reads, kmerLength, template_obj["Kmers"], flanking_sequences) # Close all the things record1.close() if record2 is not None: record2.close() if gzip1 is not None: gzip1.close() if gzip2 is not None: gzip2.close() coverage = nucleotides_seen/1000000.0 #assuming all genomes are roughly the same length - nucleotides seen should be proportional to coverage #Run reads through Fine Filtering to get score for each template matchScore, flanking_coverage = fine_filtering(template_obj, recruitedReads, kmerLength, primers) print(matchScore) print(flanking_coverage) matchScore = [score/float(1+flanking_coverage[i]) for i,score in enumerate(matchScore)] print(matchScore) print("\n") return matchScore, filename
def setUp(self): self.PATH_PREFIX = PATH_PREFIX templates = list(SeqIO.parse(resource_filename('prince.resources', "templates.fasta"), "fasta")) templateNames = [t.id for t in templates] templates = [str(t.seq) for t in templates] #Generate k-mers templateKmers = kmer_generator(templates, 25) self.template_obj = {"Names":templateNames, "Sequences":templates, "Kmers":templateKmers} with open(resource_filename('prince.resources', "TB_primers_extended.json")) as primers: self.primers=json.load(primers)